{"data":{"id":"10.48550/arxiv.2306.09459","type":"dois","attributes":{"doi":"10.48550/arxiv.2306.09459","prefix":"10.48550","suffix":"arxiv.2306.09459","identifiers":[{"identifier":"2306.09459","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"2306.09459"}],"creators":[{"name":"Cherepanov, Egor","nameType":"Personal","givenName":"Egor","familyName":"Cherepanov","affiliation":[],"nameIdentifiers":[]},{"name":"Staroverov, Alexey","nameType":"Personal","givenName":"Alexey","familyName":"Staroverov","affiliation":[],"nameIdentifiers":[]},{"name":"Kovalev, Alexey K.","nameType":"Personal","givenName":"Alexey K.","familyName":"Kovalev","affiliation":[],"nameIdentifiers":[]},{"name":"Panov, Aleksandr I.","nameType":"Personal","givenName":"Aleksandr I.","familyName":"Panov","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"Recurrent Action Transformer with Memory"}],"publisher":"arXiv","container":{},"publicationYear":2023,"subjects":[{"lang":"en","subject":"Machine Learning (cs.LG)","subjectScheme":"arXiv"},{"lang":"en","subject":"Artificial Intelligence (cs.AI)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2023-06-15T19:29:08Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2023-06-19T00:02:10Z","dateType":"Updated","dateInformation":"v1"},{"date":"2023-07-05T06:20:35Z","dateType":"Submitted","dateInformation":"v2"},{"date":"2023-07-06T00:39:04Z","dateType":"Updated","dateInformation":"v2"},{"date":"2024-03-27T14:02:58Z","dateType":"Submitted","dateInformation":"v3"},{"date":"2024-03-28T01:02:10Z","dateType":"Updated","dateInformation":"v3"},{"date":"2024-07-23T16:17:36Z","dateType":"Submitted","dateInformation":"v4"},{"date":"2024-07-24T00:53:37Z","dateType":"Updated","dateInformation":"v4"},{"date":"2024-10-14T14:33:48Z","dateType":"Submitted","dateInformation":"v5"},{"date":"2024-10-15T02:00:25Z","dateType":"Updated","dateInformation":"v5"},{"date":"2026-03-04T17:00:23Z","dateType":"Submitted","dateInformation":"v6"},{"date":"2026-03-05T02:12:12Z","dateType":"Updated","dateInformation":"v6"},{"date":"2023-06","dateType":"Available","dateInformation":"v1"},{"date":"2023","dateType":"Issued"}],"language":null,"types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"Article","resourceTypeGeneral":"Preprint"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"6","rightsList":[{"rights":"Creative Commons Attribution 4.0 International","rightsUri":"https://creativecommons.org/licenses/by/4.0/legalcode","schemeUri":"https://spdx.org/licenses/","rightsIdentifier":"cc-by-4.0","rightsIdentifierScheme":"SPDX"}],"descriptions":[{"description":"Transformers have become increasingly popular in offline reinforcement learning (RL) due to their ability to treat agent trajectories as sequences, reframing policy learning as a sequence modeling task. However, in partially observable environments (POMDPs), effective decision-making depends on retaining information about past events -- something that standard transformers struggle with due to the quadratic complexity of self-attention, which limits their context length. One solution to this problem is to extend transformers with memory mechanisms. We propose the Recurrent Action Transformer with Memory (RATE), a novel transformer-based architecture for offline RL that incorporates a recurrent memory mechanism designed to regulate information retention. We evaluate RATE across a diverse set of environments: memory-intensive tasks (ViZDoom-Two-Colors, T-Maze, Memory Maze, Minigrid-Memory, and POPGym), as well as standard Atari and MuJoCo benchmarks. Our comprehensive experiments demonstrate that RATE significantly improves performance in memory-dependent settings while remaining competitive on standard tasks across a broad range of baselines. These findings underscore the pivotal role of integrated memory mechanisms in offline RL and establish RATE as a unified, high-capacity architecture for effective decision-making over extended horizons. Code: https://sites.google.com/view/rate-model/.","descriptionType":"Abstract"},{"description":"29 pages, 22 figures, 13 tables","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4yMzA2LjA5NDU5PC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MjMwNi4wOTQ1OTwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5DaGVyZXBhbm92LCBFZ29yPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5FZ29yPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkNoZXJlcGFub3Y8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+U3Rhcm92ZXJvdiwgQWxleGV5PC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5BbGV4ZXk8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+U3Rhcm92ZXJvdjwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5Lb3ZhbGV2LCBBbGV4ZXkgSy48L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkFsZXhleSBLLjwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5Lb3ZhbGV2PC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPlBhbm92LCBBbGVrc2FuZHIgSS48L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkFsZWtzYW5kciBJLjwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5QYW5vdjwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICA8L2NyZWF0b3JzPgogIDx0aXRsZXM+CiAgICA8dGl0bGU+UmVjdXJyZW50IEFjdGlvbiBUcmFuc2Zvcm1lciB3aXRoIE1lbW9yeTwvdGl0bGU+CiAgPC90aXRsZXM+CiAgPHB1Ymxpc2hlcj5hclhpdjwvcHVibGlzaGVyPgogIDxwdWJsaWNhdGlvblllYXI+MjAyMzwvcHVibGljYXRpb25ZZWFyPgogIDxzdWJqZWN0cz4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPk1hY2hpbmUgTGVhcm5pbmcgKGNzLkxHKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPkFydGlmaWNpYWwgSW50ZWxsaWdlbmNlIChjcy5BSSk8L3N1YmplY3Q+CiAgICA8c3ViamVjdCBzdWJqZWN0U2NoZW1lPSJGaWVsZHMgb2YgU2NpZW5jZSBhbmQgVGVjaG5vbG9neSAoRk9TKSI+Rk9TOiBDb21wdXRlciBhbmQgaW5mb3JtYXRpb24gc2NpZW5jZXM8L3N1YmplY3Q+CiAgPC9zdWJqZWN0cz4KICA8ZGF0ZXM+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iU3VibWl0dGVkIiBkYXRlSW5mb3JtYXRpb249InYxIj4yMDIzLTA2LTE1VDE5OjI5OjA4WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJVcGRhdGVkIiBkYXRlSW5mb3JtYXRpb249InYxIj4yMDIzLTA2LTE5VDAwOjAyOjEwWjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjIiPjIwMjMtMDctMDVUMDY6MjA6MzVaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjIiPjIwMjMtMDctMDZUMDA6Mzk6MDRaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MyI+MjAyNC0wMy0yN1QxNDowMjo1OFo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MyI+MjAyNC0wMy0yOFQwMTowMjoxMFo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iU3VibWl0dGVkIiBkYXRlSW5mb3JtYXRpb249InY0Ij4yMDI0LTA3LTIzVDE2OjE3OjM2WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJVcGRhdGVkIiBkYXRlSW5mb3JtYXRpb249InY0Ij4yMDI0LTA3LTI0VDAwOjUzOjM3WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjUiPjIwMjQtMTAtMTRUMTQ6MzM6NDhaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjUiPjIwMjQtMTAtMTVUMDI6MDA6MjVaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2NiI+MjAyNi0wMy0wNFQxNzowMDoyM1o8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2NiI+MjAyNi0wMy0wNVQwMjoxMjoxMlo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iQXZhaWxhYmxlIiBkYXRlSW5mb3JtYXRpb249InYxIj4yMDIzLTA2PC9kYXRlPgogIDwvZGF0ZXM+CiAgPHJlc291cmNlVHlwZSByZXNvdXJjZVR5cGVHZW5lcmFsPSJQcmVwcmludCI+QXJ0aWNsZTwvcmVzb3VyY2VUeXBlPgogIDx2ZXJzaW9uPjY8L3ZlcnNpb24+CiAgPHJpZ2h0c0xpc3Q+CiAgICA8cmlnaHRzIHJpZ2h0c1VSST0iaHR0cDovL2NyZWF0aXZlY29tbW9ucy5vcmcvbGljZW5zZXMvYnkvNC4wLyIgcmlnaHRzSWRlbnRpZmllclNjaGVtZT0iU1BEWCIgcmlnaHRzSWRlbnRpZmllcj0iQ0MtQlktNC4wIj5DcmVhdGl2ZSBDb21tb25zIEF0dHJpYnV0aW9uIDQuMCBJbnRlcm5hdGlvbmFsPC9yaWdodHM+CiAgPC9yaWdodHNMaXN0PgogIDxkZXNjcmlwdGlvbnM+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJBYnN0cmFjdCI+VHJhbnNmb3JtZXJzIGhhdmUgYmVjb21lIGluY3JlYXNpbmdseSBwb3B1bGFyIGluIG9mZmxpbmUgcmVpbmZvcmNlbWVudCBsZWFybmluZyAoUkwpIGR1ZSB0byB0aGVpciBhYmlsaXR5IHRvIHRyZWF0IGFnZW50IHRyYWplY3RvcmllcyBhcyBzZXF1ZW5jZXMsIHJlZnJhbWluZyBwb2xpY3kgbGVhcm5pbmcgYXMgYSBzZXF1ZW5jZSBtb2RlbGluZyB0YXNrLiBIb3dldmVyLCBpbiBwYXJ0aWFsbHkgb2JzZXJ2YWJsZSBlbnZpcm9ubWVudHMgKFBPTURQcyksIGVmZmVjdGl2ZSBkZWNpc2lvbi1tYWtpbmcgZGVwZW5kcyBvbiByZXRhaW5pbmcgaW5mb3JtYXRpb24gYWJvdXQgcGFzdCBldmVudHMgLS0gc29tZXRoaW5nIHRoYXQgc3RhbmRhcmQgdHJhbnNmb3JtZXJzIHN0cnVnZ2xlIHdpdGggZHVlIHRvIHRoZSBxdWFkcmF0aWMgY29tcGxleGl0eSBvZiBzZWxmLWF0dGVudGlvbiwgd2hpY2ggbGltaXRzIHRoZWlyIGNvbnRleHQgbGVuZ3RoLiBPbmUgc29sdXRpb24gdG8gdGhpcyBwcm9ibGVtIGlzIHRvIGV4dGVuZCB0cmFuc2Zvcm1lcnMgd2l0aCBtZW1vcnkgbWVjaGFuaXNtcy4gV2UgcHJvcG9zZSB0aGUgUmVjdXJyZW50IEFjdGlvbiBUcmFuc2Zvcm1lciB3aXRoIE1lbW9yeSAoUkFURSksIGEgbm92ZWwgdHJhbnNmb3JtZXItYmFzZWQgYXJjaGl0ZWN0dXJlIGZvciBvZmZsaW5lIFJMIHRoYXQgaW5jb3Jwb3JhdGVzIGEgcmVjdXJyZW50IG1lbW9yeSBtZWNoYW5pc20gZGVzaWduZWQgdG8gcmVndWxhdGUgaW5mb3JtYXRpb24gcmV0ZW50aW9uLiBXZSBldmFsdWF0ZSBSQVRFIGFjcm9zcyBhIGRpdmVyc2Ugc2V0IG9mIGVudmlyb25tZW50czogbWVtb3J5LWludGVuc2l2ZSB0YXNrcyAoVmlaRG9vbS1Ud28tQ29sb3JzLCBULU1hemUsIE1lbW9yeSBNYXplLCBNaW5pZ3JpZC1NZW1vcnksIGFuZCBQT1BHeW0pLCBhcyB3ZWxsIGFzIHN0YW5kYXJkIEF0YXJpIGFuZCBNdUpvQ28gYmVuY2htYXJrcy4gT3VyIGNvbXByZWhlbnNpdmUgZXhwZXJpbWVudHMgZGVtb25zdHJhdGUgdGhhdCBSQVRFIHNpZ25pZmljYW50bHkgaW1wcm92ZXMgcGVyZm9ybWFuY2UgaW4gbWVtb3J5LWRlcGVuZGVudCBzZXR0aW5ncyB3aGlsZSByZW1haW5pbmcgY29tcGV0aXRpdmUgb24gc3RhbmRhcmQgdGFza3MgYWNyb3NzIGEgYnJvYWQgcmFuZ2Ugb2YgYmFzZWxpbmVzLiBUaGVzZSBmaW5kaW5ncyB1bmRlcnNjb3JlIHRoZSBwaXZvdGFsIHJvbGUgb2YgaW50ZWdyYXRlZCBtZW1vcnkgbWVjaGFuaXNtcyBpbiBvZmZsaW5lIFJMIGFuZCBlc3RhYmxpc2ggUkFURSBhcyBhIHVuaWZpZWQsIGhpZ2gtY2FwYWNpdHkgYXJjaGl0ZWN0dXJlIGZvciBlZmZlY3RpdmUgZGVjaXNpb24tbWFraW5nIG92ZXIgZXh0ZW5kZWQgaG9yaXpvbnMuIENvZGU6IGh0dHBzOi8vc2l0ZXMuZ29vZ2xlLmNvbS92aWV3L3JhdGUtbW9kZWwvLjwvZGVzY3JpcHRpb24+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJPdGhlciI+MjkgcGFnZXMsIDIyIGZpZ3VyZXMsIDEzIHRhYmxlczwvZGVzY3JpcHRpb24+CiAgPC9kZXNjcmlwdGlvbnM+CjwvcmVzb3VyY2U+","url":"https://arxiv.org/abs/2306.09459","contentUrl":null,"metadataVersion":5,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":0,"citationsOverTime":[],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2023-06-19T01:01:58.000Z","registered":"2023-06-19T01:01:59.000Z","published":"2023","updated":"2026-03-05T03:36:16.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.2306.09459","type":"media"}},"references":{"data":[]},"citations":{"data":[]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}