{"data":{"id":"10.48550/arxiv.1907.05855","type":"dois","attributes":{"doi":"10.48550/arxiv.1907.05855","prefix":"10.48550","suffix":"arxiv.1907.05855","identifiers":[{"identifier":"1907.05855","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"1907.05855"}],"creators":[{"name":"Traoré, René","nameType":"Personal","givenName":"René","familyName":"Traoré","affiliation":[],"nameIdentifiers":[]},{"name":"Caselles-Dupré, Hugo","nameType":"Personal","givenName":"Hugo","familyName":"Caselles-Dupré","affiliation":[],"nameIdentifiers":[]},{"name":"Lesort, Timothée","nameType":"Personal","givenName":"Timothée","familyName":"Lesort","affiliation":[],"nameIdentifiers":[]},{"name":"Sun, Te","nameType":"Personal","givenName":"Te","familyName":"Sun","affiliation":[],"nameIdentifiers":[]},{"name":"Cai, Guanghang","nameType":"Personal","givenName":"Guanghang","familyName":"Cai","affiliation":[],"nameIdentifiers":[]},{"name":"Díaz-Rodríguez, Natalia","nameType":"Personal","givenName":"Natalia","familyName":"Díaz-Rodríguez","affiliation":[],"nameIdentifiers":[]},{"name":"Filliat, David","nameType":"Personal","givenName":"David","familyName":"Filliat","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"DisCoRL: Continual Reinforcement Learning via Policy Distillation"}],"publisher":"arXiv","container":{},"publicationYear":2019,"subjects":[{"lang":"en","subject":"Machine Learning (cs.LG)","subjectScheme":"arXiv"},{"lang":"en","subject":"Artificial Intelligence (cs.AI)","subjectScheme":"arXiv"},{"lang":"en","subject":"Machine Learning (stat.ML)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2019-07-11T09:12:42Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2019-07-15T00:16:43Z","dateType":"Updated","dateInformation":"v1"},{"date":"2019-07","dateType":"Available","dateInformation":"v1"},{"date":"2019","dateType":"Issued"}],"language":null,"types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"Article","resourceTypeGeneral":"Preprint"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"1","rightsList":[{"rights":"arXiv.org perpetual, non-exclusive license","rightsUri":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/"}],"descriptions":[{"description":"In multi-task reinforcement learning there are two main challenges: at training time, the ability to learn different policies with a single model; at test time, inferring which of those policies applying without an external signal. In the case of continual reinforcement learning a third challenge arises: learning tasks sequentially without forgetting the previous ones. In this paper, we tackle these challenges by proposing DisCoRL, an approach combining state representation learning and policy distillation. We experiment on a sequence of three simulated 2D navigation tasks with a 3 wheel omni-directional robot. Moreover, we tested our approach's robustness by transferring the final policy into a real life setting. The policy can solve all tasks and automatically infer which one to run.","descriptionType":"Abstract"},{"description":"arXiv admin note: text overlap with arXiv:1906.04452","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4xOTA3LjA1ODU1PC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MTkwNy4wNTg1NTwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5UcmFvcsOpLCBSZW7DqTwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+UmVuw6k8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+VHJhb3LDqTwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5DYXNlbGxlcy1EdXByw6ksIEh1Z288L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkh1Z288L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+Q2FzZWxsZXMtRHVwcsOpPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPkxlc29ydCwgVGltb3Row6llPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5UaW1vdGjDqWU8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+TGVzb3J0PC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPlN1biwgVGU8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPlRlPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPlN1bjwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5DYWksIEd1YW5naGFuZzwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+R3VhbmdoYW5nPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkNhaTwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5Ew61hei1Sb2Ryw61ndWV6LCBOYXRhbGlhPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5OYXRhbGlhPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkTDrWF6LVJvZHLDrWd1ZXo8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+RmlsbGlhdCwgRGF2aWQ8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkRhdmlkPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkZpbGxpYXQ8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlPkRpc0NvUkw6IENvbnRpbnVhbCBSZWluZm9yY2VtZW50IExlYXJuaW5nIHZpYSBQb2xpY3kgRGlzdGlsbGF0aW9uPC90aXRsZT4KICA8L3RpdGxlcz4KICA8cHVibGlzaGVyPmFyWGl2PC9wdWJsaXNoZXI+CiAgPHB1YmxpY2F0aW9uWWVhcj4yMDE5PC9wdWJsaWNhdGlvblllYXI+CiAgPHN1YmplY3RzPgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIiBzdWJqZWN0U2NoZW1lPSJhclhpdiI+TWFjaGluZSBMZWFybmluZyAoY3MuTEcpPC9zdWJqZWN0PgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIiBzdWJqZWN0U2NoZW1lPSJhclhpdiI+QXJ0aWZpY2lhbCBJbnRlbGxpZ2VuY2UgKGNzLkFJKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPk1hY2hpbmUgTGVhcm5pbmcgKHN0YXQuTUwpPC9zdWJqZWN0PgogICAgPHN1YmplY3Qgc3ViamVjdFNjaGVtZT0iRmllbGRzIG9mIFNjaWVuY2UgYW5kIFRlY2hub2xvZ3kgKEZPUykiPkZPUzogQ29tcHV0ZXIgYW5kIGluZm9ybWF0aW9uIHNjaWVuY2VzPC9zdWJqZWN0PgogIDwvc3ViamVjdHM+CiAgPGRhdGVzPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAxOS0wNy0xMVQwOToxMjo0Mlo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAxOS0wNy0xNVQwMDoxNjo0M1o8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iQXZhaWxhYmxlIiBkYXRlSW5mb3JtYXRpb249InYxIj4yMDE5LTA3PC9kYXRlPgogIDwvZGF0ZXM+CiAgPHJlc291cmNlVHlwZSByZXNvdXJjZVR5cGVHZW5lcmFsPSJQcmVwcmludCI+QXJ0aWNsZTwvcmVzb3VyY2VUeXBlPgogIDx2ZXJzaW9uPjE8L3ZlcnNpb24+CiAgPHJpZ2h0c0xpc3Q+CiAgICA8cmlnaHRzIHJpZ2h0c1VSST0iaHR0cDovL2FyeGl2Lm9yZy9saWNlbnNlcy9ub25leGNsdXNpdmUtZGlzdHJpYi8xLjAvIj5hclhpdi5vcmcgcGVycGV0dWFsLCBub24tZXhjbHVzaXZlIGxpY2Vuc2U8L3JpZ2h0cz4KICA8L3JpZ2h0c0xpc3Q+CiAgPGRlc2NyaXB0aW9ucz4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9IkFic3RyYWN0Ij5JbiBtdWx0aS10YXNrIHJlaW5mb3JjZW1lbnQgbGVhcm5pbmcgdGhlcmUgYXJlIHR3byBtYWluIGNoYWxsZW5nZXM6IGF0IHRyYWluaW5nIHRpbWUsIHRoZSBhYmlsaXR5IHRvIGxlYXJuIGRpZmZlcmVudCBwb2xpY2llcyB3aXRoIGEgc2luZ2xlIG1vZGVsOyBhdCB0ZXN0IHRpbWUsIGluZmVycmluZyB3aGljaCBvZiB0aG9zZSBwb2xpY2llcyBhcHBseWluZyB3aXRob3V0IGFuIGV4dGVybmFsIHNpZ25hbC4gSW4gdGhlIGNhc2Ugb2YgY29udGludWFsIHJlaW5mb3JjZW1lbnQgbGVhcm5pbmcgYSB0aGlyZCBjaGFsbGVuZ2UgYXJpc2VzOiBsZWFybmluZyB0YXNrcyBzZXF1ZW50aWFsbHkgd2l0aG91dCBmb3JnZXR0aW5nIHRoZSBwcmV2aW91cyBvbmVzLiBJbiB0aGlzIHBhcGVyLCB3ZSB0YWNrbGUgdGhlc2UgY2hhbGxlbmdlcyBieSBwcm9wb3NpbmcgRGlzQ29STCwgYW4gYXBwcm9hY2ggY29tYmluaW5nIHN0YXRlIHJlcHJlc2VudGF0aW9uIGxlYXJuaW5nIGFuZCBwb2xpY3kgZGlzdGlsbGF0aW9uLiBXZSBleHBlcmltZW50IG9uIGEgc2VxdWVuY2Ugb2YgdGhyZWUgc2ltdWxhdGVkIDJEIG5hdmlnYXRpb24gdGFza3Mgd2l0aCBhIDMgd2hlZWwgb21uaS1kaXJlY3Rpb25hbCByb2JvdC4gTW9yZW92ZXIsIHdlIHRlc3RlZCBvdXIgYXBwcm9hY2gncyByb2J1c3RuZXNzIGJ5IHRyYW5zZmVycmluZyB0aGUgZmluYWwgcG9saWN5IGludG8gYSByZWFsIGxpZmUgc2V0dGluZy4gVGhlIHBvbGljeSBjYW4gc29sdmUgYWxsIHRhc2tzIGFuZCBhdXRvbWF0aWNhbGx5IGluZmVyIHdoaWNoIG9uZSB0byBydW4uPC9kZXNjcmlwdGlvbj4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9Ik90aGVyIj5hclhpdiBhZG1pbiBub3RlOiB0ZXh0IG92ZXJsYXAgd2l0aCBhclhpdjoxOTA2LjA0NDUyPC9kZXNjcmlwdGlvbj4KICA8L2Rlc2NyaXB0aW9ucz4KPC9yZXNvdXJjZT4=","url":"https://arxiv.org/abs/1907.05855","contentUrl":null,"metadataVersion":0,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":1,"citationsOverTime":[{"year":"2022","total":1}],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2022-02-27T21:41:33.000Z","registered":"2022-02-27T21:41:34.000Z","published":"2019","updated":"2022-02-27T21:41:34.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.1907.05855","type":"media"}},"references":{"data":[]},"citations":{"data":[{"id":"10.1038/s42256-022-00452-0","type":"dois"}]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}