{"data":{"id":"10.48550/arxiv.1602.02410","type":"dois","attributes":{"doi":"10.48550/arxiv.1602.02410","prefix":"10.48550","suffix":"arxiv.1602.02410","identifiers":[{"identifier":"1602.02410","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"1602.02410"}],"creators":[{"name":"Jozefowicz, Rafal","nameType":"Personal","givenName":"Rafal","familyName":"Jozefowicz","affiliation":[],"nameIdentifiers":[]},{"name":"Vinyals, Oriol","nameType":"Personal","givenName":"Oriol","familyName":"Vinyals","affiliation":[],"nameIdentifiers":[]},{"name":"Schuster, Mike","nameType":"Personal","givenName":"Mike","familyName":"Schuster","affiliation":[],"nameIdentifiers":[]},{"name":"Shazeer, Noam","nameType":"Personal","givenName":"Noam","familyName":"Shazeer","affiliation":[],"nameIdentifiers":[]},{"name":"Wu, Yonghui","nameType":"Personal","givenName":"Yonghui","familyName":"Wu","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"Exploring the Limits of Language Modeling"}],"publisher":"arXiv","container":{},"publicationYear":2016,"subjects":[{"lang":"en","subject":"Computation and Language (cs.CL)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2016-02-07T19:11:17Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2016-02-09T01:09:09Z","dateType":"Updated","dateInformation":"v1"},{"date":"2016-02-11T23:01:48Z","dateType":"Submitted","dateInformation":"v2"},{"date":"2016-02-15T01:01:29Z","dateType":"Updated","dateInformation":"v2"},{"date":"2016-02","dateType":"Available","dateInformation":"v1"},{"date":"2016","dateType":"Issued"}],"language":null,"types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"Article","resourceTypeGeneral":"Preprint"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"2","rightsList":[{"rights":"arXiv.org perpetual, non-exclusive license","rightsUri":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/"}],"descriptions":[{"description":"In this work we explore recent advances in Recurrent Neural Networks for large scale Language Modeling, a task central to language understanding. We extend current models to deal with two key challenges present in this task: corpora and vocabulary sizes, and complex, long term structure of language. We perform an exhaustive study on techniques such as character Convolutional Neural Networks or Long-Short Term Memory, on the One Billion Word Benchmark. Our best single model significantly improves state-of-the-art perplexity from 51.3 down to 30.0 (whilst reducing the number of parameters by a factor of 20), while an ensemble of models sets a new record by improving perplexity from 41.0 down to 23.7. We also release these models for the NLP and ML community to study and improve upon.","descriptionType":"Abstract"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4xNjAyLjAyNDEwPC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MTYwMi4wMjQxMDwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5Kb3plZm93aWN6LCBSYWZhbDwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+UmFmYWw8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+Sm96ZWZvd2ljejwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5WaW55YWxzLCBPcmlvbDwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+T3Jpb2w8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+VmlueWFsczwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5TY2h1c3RlciwgTWlrZTwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+TWlrZTwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5TY2h1c3RlcjwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5TaGF6ZWVyLCBOb2FtPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5Ob2FtPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPlNoYXplZXI8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+V3UsIFlvbmdodWk8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPllvbmdodWk8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+V3U8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlPkV4cGxvcmluZyB0aGUgTGltaXRzIG9mIExhbmd1YWdlIE1vZGVsaW5nPC90aXRsZT4KICA8L3RpdGxlcz4KICA8cHVibGlzaGVyPmFyWGl2PC9wdWJsaXNoZXI+CiAgPHB1YmxpY2F0aW9uWWVhcj4yMDE2PC9wdWJsaWNhdGlvblllYXI+CiAgPHN1YmplY3RzPgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIiBzdWJqZWN0U2NoZW1lPSJhclhpdiI+Q29tcHV0YXRpb24gYW5kIExhbmd1YWdlIChjcy5DTCk8L3N1YmplY3Q+CiAgICA8c3ViamVjdCBzdWJqZWN0U2NoZW1lPSJGaWVsZHMgb2YgU2NpZW5jZSBhbmQgVGVjaG5vbG9neSAoRk9TKSI+Rk9TOiBDb21wdXRlciBhbmQgaW5mb3JtYXRpb24gc2NpZW5jZXM8L3N1YmplY3Q+CiAgPC9zdWJqZWN0cz4KICA8ZGF0ZXM+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iU3VibWl0dGVkIiBkYXRlSW5mb3JtYXRpb249InYxIj4yMDE2LTAyLTA3VDE5OjExOjE3WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJVcGRhdGVkIiBkYXRlSW5mb3JtYXRpb249InYxIj4yMDE2LTAyLTA5VDAxOjA5OjA5WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjIiPjIwMTYtMDItMTFUMjM6MDE6NDhaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjIiPjIwMTYtMDItMTVUMDE6MDE6MjlaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IkF2YWlsYWJsZSIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAxNi0wMjwvZGF0ZT4KICA8L2RhdGVzPgogIDxyZXNvdXJjZVR5cGUgcmVzb3VyY2VUeXBlR2VuZXJhbD0iUHJlcHJpbnQiPkFydGljbGU8L3Jlc291cmNlVHlwZT4KICA8dmVyc2lvbj4yPC92ZXJzaW9uPgogIDxyaWdodHNMaXN0PgogICAgPHJpZ2h0cyByaWdodHNVUkk9Imh0dHA6Ly9hcnhpdi5vcmcvbGljZW5zZXMvbm9uZXhjbHVzaXZlLWRpc3RyaWIvMS4wLyI+YXJYaXYub3JnIHBlcnBldHVhbCwgbm9uLWV4Y2x1c2l2ZSBsaWNlbnNlPC9yaWdodHM+CiAgPC9yaWdodHNMaXN0PgogIDxkZXNjcmlwdGlvbnM+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJBYnN0cmFjdCI+SW4gdGhpcyB3b3JrIHdlIGV4cGxvcmUgcmVjZW50IGFkdmFuY2VzIGluIFJlY3VycmVudCBOZXVyYWwgTmV0d29ya3MgZm9yIGxhcmdlIHNjYWxlIExhbmd1YWdlIE1vZGVsaW5nLCBhIHRhc2sgY2VudHJhbCB0byBsYW5ndWFnZSB1bmRlcnN0YW5kaW5nLiBXZSBleHRlbmQgY3VycmVudCBtb2RlbHMgdG8gZGVhbCB3aXRoIHR3byBrZXkgY2hhbGxlbmdlcyBwcmVzZW50IGluIHRoaXMgdGFzazogY29ycG9yYSBhbmQgdm9jYWJ1bGFyeSBzaXplcywgYW5kIGNvbXBsZXgsIGxvbmcgdGVybSBzdHJ1Y3R1cmUgb2YgbGFuZ3VhZ2UuIFdlIHBlcmZvcm0gYW4gZXhoYXVzdGl2ZSBzdHVkeSBvbiB0ZWNobmlxdWVzIHN1Y2ggYXMgY2hhcmFjdGVyIENvbnZvbHV0aW9uYWwgTmV1cmFsIE5ldHdvcmtzIG9yIExvbmctU2hvcnQgVGVybSBNZW1vcnksIG9uIHRoZSBPbmUgQmlsbGlvbiBXb3JkIEJlbmNobWFyay4gT3VyIGJlc3Qgc2luZ2xlIG1vZGVsIHNpZ25pZmljYW50bHkgaW1wcm92ZXMgc3RhdGUtb2YtdGhlLWFydCBwZXJwbGV4aXR5IGZyb20gNTEuMyBkb3duIHRvIDMwLjAgKHdoaWxzdCByZWR1Y2luZyB0aGUgbnVtYmVyIG9mIHBhcmFtZXRlcnMgYnkgYSBmYWN0b3Igb2YgMjApLCB3aGlsZSBhbiBlbnNlbWJsZSBvZiBtb2RlbHMgc2V0cyBhIG5ldyByZWNvcmQgYnkgaW1wcm92aW5nIHBlcnBsZXhpdHkgZnJvbSA0MS4wIGRvd24gdG8gMjMuNy4gV2UgYWxzbyByZWxlYXNlIHRoZXNlIG1vZGVscyBmb3IgdGhlIE5MUCBhbmQgTUwgY29tbXVuaXR5IHRvIHN0dWR5IGFuZCBpbXByb3ZlIHVwb24uPC9kZXNjcmlwdGlvbj4KICA8L2Rlc2NyaXB0aW9ucz4KPC9yZXNvdXJjZT4=","url":"https://arxiv.org/abs/1602.02410","contentUrl":null,"metadataVersion":0,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":0,"citationsOverTime":[],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2022-03-06T20:08:44.000Z","registered":"2022-03-06T20:08:45.000Z","published":"2016","updated":"2022-03-06T20:08:45.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.1602.02410","type":"media"}},"references":{"data":[]},"citations":{"data":[]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}