{"data":{"id":"10.48550/arxiv.2111.02104","type":"dois","attributes":{"doi":"10.48550/arxiv.2111.02104","prefix":"10.48550","suffix":"arxiv.2111.02104","identifiers":[{"identifier":"2111.02104","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"2111.02104"}],"creators":[{"name":"Le, Hung","nameType":"Personal","givenName":"Hung","familyName":"Le","affiliation":[],"nameIdentifiers":[]},{"name":"George, Thommen Karimpanal","nameType":"Personal","givenName":"Thommen Karimpanal","familyName":"George","affiliation":[],"nameIdentifiers":[]},{"name":"Abdolshah, Majid","nameType":"Personal","givenName":"Majid","familyName":"Abdolshah","affiliation":[],"nameIdentifiers":[]},{"name":"Tran, Truyen","nameType":"Personal","givenName":"Truyen","familyName":"Tran","affiliation":[],"nameIdentifiers":[]},{"name":"Venkatesh, Svetha","nameType":"Personal","givenName":"Svetha","familyName":"Venkatesh","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"Model-Based Episodic Memory Induces Dynamic Hybrid Controls"}],"publisher":"arXiv","container":{},"publicationYear":2021,"subjects":[{"lang":"en","subject":"Machine Learning (cs.LG)","subjectScheme":"arXiv"},{"lang":"en","subject":"Artificial Intelligence (cs.AI)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2021-11-03T09:52:33Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2021-11-04T00:14:29Z","dateType":"Updated","dateInformation":"v1"},{"date":"2021-11-06T08:04:18Z","dateType":"Submitted","dateInformation":"v2"},{"date":"2021-11-09T01:07:49Z","dateType":"Updated","dateInformation":"v2"},{"date":"2021-11","dateType":"Available","dateInformation":"v1"},{"date":"2021","dateType":"Issued"}],"language":null,"types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"Article","resourceTypeGeneral":"Preprint"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"2","rightsList":[{"rights":"Creative Commons Attribution 4.0 International","rightsUri":"https://creativecommons.org/licenses/by/4.0/legalcode","schemeUri":"https://spdx.org/licenses/","rightsIdentifier":"cc-by-4.0","rightsIdentifierScheme":"SPDX"}],"descriptions":[{"description":"Episodic control enables sample efficiency in reinforcement learning by recalling past experiences from an episodic memory. We propose a new model-based episodic memory of trajectories addressing current limitations of episodic control. Our memory estimates trajectory values, guiding the agent towards good policies. Built upon the memory, we construct a complementary learning model via a dynamic hybrid control unifying model-based, episodic and habitual learning into a single architecture. Experiments demonstrate that our model allows significantly faster and better learning than other strong reinforcement learning agents across a variety of environments including stochastic and non-Markovian settings.","descriptionType":"Abstract"},{"description":"26 pages","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4yMTExLjAyMTA0PC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MjExMS4wMjEwNDwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5MZSwgSHVuZzwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+SHVuZzwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5MZTwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5HZW9yZ2UsIFRob21tZW4gS2FyaW1wYW5hbDwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+VGhvbW1lbiBLYXJpbXBhbmFsPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkdlb3JnZTwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5BYmRvbHNoYWgsIE1hamlkPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5NYWppZDwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5BYmRvbHNoYWg8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+VHJhbiwgVHJ1eWVuPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5UcnV5ZW48L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+VHJhbjwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5WZW5rYXRlc2gsIFN2ZXRoYTwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+U3ZldGhhPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPlZlbmthdGVzaDwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICA8L2NyZWF0b3JzPgogIDx0aXRsZXM+CiAgICA8dGl0bGU+TW9kZWwtQmFzZWQgRXBpc29kaWMgTWVtb3J5IEluZHVjZXMgRHluYW1pYyBIeWJyaWQgQ29udHJvbHM8L3RpdGxlPgogIDwvdGl0bGVzPgogIDxwdWJsaXNoZXI+YXJYaXY8L3B1Ymxpc2hlcj4KICA8cHVibGljYXRpb25ZZWFyPjIwMjE8L3B1YmxpY2F0aW9uWWVhcj4KICA8c3ViamVjdHM+CiAgICA8c3ViamVjdCB4bWw6bGFuZz0iZW4iIHN1YmplY3RTY2hlbWU9ImFyWGl2Ij5NYWNoaW5lIExlYXJuaW5nIChjcy5MRyk8L3N1YmplY3Q+CiAgICA8c3ViamVjdCB4bWw6bGFuZz0iZW4iIHN1YmplY3RTY2hlbWU9ImFyWGl2Ij5BcnRpZmljaWFsIEludGVsbGlnZW5jZSAoY3MuQUkpPC9zdWJqZWN0PgogICAgPHN1YmplY3Qgc3ViamVjdFNjaGVtZT0iRmllbGRzIG9mIFNjaWVuY2UgYW5kIFRlY2hub2xvZ3kgKEZPUykiPkZPUzogQ29tcHV0ZXIgYW5kIGluZm9ybWF0aW9uIHNjaWVuY2VzPC9zdWJqZWN0PgogIDwvc3ViamVjdHM+CiAgPGRhdGVzPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAyMS0xMS0wM1QwOTo1MjozM1o8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAyMS0xMS0wNFQwMDoxNDoyOVo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iU3VibWl0dGVkIiBkYXRlSW5mb3JtYXRpb249InYyIj4yMDIxLTExLTA2VDA4OjA0OjE4WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJVcGRhdGVkIiBkYXRlSW5mb3JtYXRpb249InYyIj4yMDIxLTExLTA5VDAxOjA3OjQ5WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJBdmFpbGFibGUiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMjEtMTE8L2RhdGU+CiAgPC9kYXRlcz4KICA8cmVzb3VyY2VUeXBlIHJlc291cmNlVHlwZUdlbmVyYWw9IlByZXByaW50Ij5BcnRpY2xlPC9yZXNvdXJjZVR5cGU+CiAgPHZlcnNpb24+MjwvdmVyc2lvbj4KICA8cmlnaHRzTGlzdD4KICAgIDxyaWdodHMgcmlnaHRzVVJJPSJodHRwOi8vY3JlYXRpdmVjb21tb25zLm9yZy9saWNlbnNlcy9ieS80LjAvIiByaWdodHNJZGVudGlmaWVyU2NoZW1lPSJTUERYIiByaWdodHNJZGVudGlmaWVyPSJDQy1CWS00LjAiPkNyZWF0aXZlIENvbW1vbnMgQXR0cmlidXRpb24gNC4wIEludGVybmF0aW9uYWw8L3JpZ2h0cz4KICA8L3JpZ2h0c0xpc3Q+CiAgPGRlc2NyaXB0aW9ucz4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9IkFic3RyYWN0Ij5FcGlzb2RpYyBjb250cm9sIGVuYWJsZXMgc2FtcGxlIGVmZmljaWVuY3kgaW4gcmVpbmZvcmNlbWVudCBsZWFybmluZyBieSByZWNhbGxpbmcgcGFzdCBleHBlcmllbmNlcyBmcm9tIGFuIGVwaXNvZGljIG1lbW9yeS4gV2UgcHJvcG9zZSBhIG5ldyBtb2RlbC1iYXNlZCBlcGlzb2RpYyBtZW1vcnkgb2YgdHJhamVjdG9yaWVzIGFkZHJlc3NpbmcgY3VycmVudCBsaW1pdGF0aW9ucyBvZiBlcGlzb2RpYyBjb250cm9sLiBPdXIgbWVtb3J5IGVzdGltYXRlcyB0cmFqZWN0b3J5IHZhbHVlcywgZ3VpZGluZyB0aGUgYWdlbnQgdG93YXJkcyBnb29kIHBvbGljaWVzLiBCdWlsdCB1cG9uIHRoZSBtZW1vcnksIHdlIGNvbnN0cnVjdCBhIGNvbXBsZW1lbnRhcnkgbGVhcm5pbmcgbW9kZWwgdmlhIGEgZHluYW1pYyBoeWJyaWQgY29udHJvbCB1bmlmeWluZyBtb2RlbC1iYXNlZCwgZXBpc29kaWMgYW5kIGhhYml0dWFsIGxlYXJuaW5nIGludG8gYSBzaW5nbGUgYXJjaGl0ZWN0dXJlLiBFeHBlcmltZW50cyBkZW1vbnN0cmF0ZSB0aGF0IG91ciBtb2RlbCBhbGxvd3Mgc2lnbmlmaWNhbnRseSBmYXN0ZXIgYW5kIGJldHRlciBsZWFybmluZyB0aGFuIG90aGVyIHN0cm9uZyByZWluZm9yY2VtZW50IGxlYXJuaW5nIGFnZW50cyBhY3Jvc3MgYSB2YXJpZXR5IG9mIGVudmlyb25tZW50cyBpbmNsdWRpbmcgc3RvY2hhc3RpYyBhbmQgbm9uLU1hcmtvdmlhbiBzZXR0aW5ncy48L2Rlc2NyaXB0aW9uPgogICAgPGRlc2NyaXB0aW9uIGRlc2NyaXB0aW9uVHlwZT0iT3RoZXIiPjI2IHBhZ2VzPC9kZXNjcmlwdGlvbj4KICA8L2Rlc2NyaXB0aW9ucz4KPC9yZXNvdXJjZT4=","url":"https://arxiv.org/abs/2111.02104","contentUrl":null,"metadataVersion":0,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":0,"citationsOverTime":[],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2022-02-19T20:38:26.000Z","registered":"2022-02-19T20:38:27.000Z","published":"2021","updated":"2022-02-19T20:38:27.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.2111.02104","type":"media"}},"references":{"data":[]},"citations":{"data":[]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}