{"created":"2023-06-19T10:26:32.623710+00:00","id":5134,"links":{},"metadata":{"_buckets":{"deposit":"df1fe6a9-40ee-422a-9600-57d68b72177e"},"_deposit":{"created_by":18,"id":"5134","owners":[18],"pid":{"revision_id":0,"type":"depid","value":"5134"},"status":"published"},"_oai":{"id":"oai:muroran-it.repo.nii.ac.jp:00005134","sets":["41:227"]},"author_link":["22765"],"item_81_date_granted_17":{"attribute_name":"学位授与年月日","attribute_value_mlt":[{"subitem_dategranted":"2015-03-23"}]},"item_81_degree_grantor_10":{"attribute_name":"学位授与機関","attribute_value_mlt":[{"subitem_degreegrantor":[{"subitem_degreegrantor_language":"ja","subitem_degreegrantor_name":"室蘭工業大学"},{"subitem_degreegrantor_language":"en","subitem_degreegrantor_name":"Muroran Institute of Technology"}],"subitem_degreegrantor_identifier":[{"subitem_degreegrantor_identifier_name":"10103","subitem_degreegrantor_identifier_scheme":"kakenhi"}]}]},"item_81_degree_name_11":{"attribute_name":"学位名","attribute_value_mlt":[{"subitem_degreename":"博士(工学)","subitem_degreename_language":"ja"}]},"item_81_description_25":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_81_description_7":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"With the increasing use of rescue robots in disasters, such as earthquakes and tsunami, there is an urgent need to develop robotics software that can learn and adapt to any environment. Reinforcement Learning (RL) is often used in the development of robotic software. RL is a field of machine learning within the computer science domain; moreover, many RL methods have been proposed recently and applied to a variety of problems, where agents learn policies to maximize the total number of rewards determined according to specific rules. In the process whereby agents obtain rewards, data consisting of state-action pairs are generated. The agents’ policies are improved effectively by a supervised learning mechanism using a sequential expression of the stored data series and rewards. Typically, RL agents must initialize policies when they are placed in a new environment, and the learning process starts afresh each time. Effective adjustment to an unknown environment becomes possible using statistical methods, such as a Bayesian network model, mixture probability, and clustering distribution, which consist of observational data for multiple environments that the agents have learned. However, adapting to environmental change, such as unknown environments, is challenging. For example, setting appropriate experimental parameters, including the number of the input status and the output action, becomes difficult in complicated real environments, and that makes it difficult for an agent to learn a policy. Furthermore, the use of a mixture of Bayesian network models increases the system’s calculation time. In addition, due to limited processing resources, it becomes necessary to control computational complexity. The goal of this research is to create an efficient and practical RL system that is adaptive to unknown and complex environments, such as dynamic movement environments and multi-layer environments. In addition, the proposed method attempts to control computation complexity while retaining system performance. In this study, a modified profit-sharing method with new parameters, such as changing reward value, is proposed. A weight update system and changing the dimension of the episode data make it possible to work in dynamically moving multi-layer environments. A mixture probability consisting of the integration of observational environmental data that an agent has learned within an RL framework is introduced. This provides initial knowledge to the agent and enables efficient adjustment to a changing environment. A clustering method that enables selection of fewer elements has also been implemented. This reduces computational complexity significantly while retaining system performance. By statistical-model approach, an RL system with a utility algorithm that can adapt to unknown multi-layer environments is realized.","subitem_description_language":"ja","subitem_description_type":"Abstract"}]},"item_81_dissertation_number_13":{"attribute_name":"学位授与番号","attribute_value_mlt":[{"subitem_dissertationnumber":"甲第369号"}]},"item_81_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15118/00005125","subitem_identifier_reg_type":"JaLC"}]},"item_81_subject_9":{"attribute_name":"日本十進分類法","attribute_value_mlt":[{"subitem_subject":"548","subitem_subject_scheme":"NDC"}]},"item_81_text_12":{"attribute_name":"学位の種別","attribute_value_mlt":[{"subitem_text_language":"ja","subitem_text_value":"課程博士"}]},"item_81_text_14":{"attribute_name":"報告番号","attribute_value_mlt":[{"subitem_text_language":"ja","subitem_text_value":"甲第369号"}]},"item_81_text_15":{"attribute_name":"学位記番号","attribute_value_mlt":[{"subitem_text_language":"ja","subitem_text_value":"博甲第369号"}]},"item_81_version_type_24":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_access_right":{"attribute_name":"アクセス権","attribute_value_mlt":[{"subitem_access_right":"open access","subitem_access_right_uri":"http://purl.org/coar/access_right/c_abf2"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorAffiliations":[{"affiliationNameIdentifiers":[],"affiliationNames":[{"affiliationName":""}]}],"creatorNames":[{"creatorName":"ポッマサク, ウタイ","creatorNameLang":"ja"},{"creatorName":"PHOMMASAK, UTHAI","creatorNameLang":"en"}],"familyNames":[{},{}],"givenNames":[{},{}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2016-02-15"}],"displaytype":"detail","filename":"A369.pdf","filesize":[{"value":"3.3 MB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"A369","objectType":"fulltext","url":"https://muroran-it.repo.nii.ac.jp/record/5134/files/A369.pdf"},"version_id":"b53cfcea-73f4-4605-975b-fa7a7bff5b5e"},{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2016-02-15"}],"displaytype":"detail","filename":"A369_summary.pdf","filesize":[{"value":"473.4 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"A369_summary","objectType":"abstract","url":"https://muroran-it.repo.nii.ac.jp/record/5134/files/A369_summary.pdf"},"version_id":"1879b563-be5a-4636-98eb-36ab3f65cc2c"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"doctoral thesis","resourceuri":"http://purl.org/coar/resource_type/c_db06"}]},"item_title":"動的な階層環境における強化学習エージェントの確率知識を用いた方策改善に関する研究","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"動的な階層環境における強化学習エージェントの確率知識を用いた方策改善に関する研究","subitem_title_language":"ja"}]},"item_type_id":"81","owner":"18","path":["227"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2015-06-11"},"publish_date":"2015-06-11","publish_status":"0","recid":"5134","relation_version_is_last":true,"title":["動的な階層環境における強化学習エージェントの確率知識を用いた方策改善に関する研究"],"weko_creator_id":"18","weko_shared_id":-1},"updated":"2023-11-14T02:48:21.625201+00:00"}