{"data":{"id":"10.48550/arxiv.2304.01373","type":"dois","attributes":{"doi":"10.48550/arxiv.2304.01373","prefix":"10.48550","suffix":"arxiv.2304.01373","identifiers":[{"identifier":"2304.01373","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"2304.01373"}],"creators":[{"name":"Biderman, Stella","nameType":"Personal","givenName":"Stella","familyName":"Biderman","affiliation":[],"nameIdentifiers":[]},{"name":"Schoelkopf, Hailey","nameType":"Personal","givenName":"Hailey","familyName":"Schoelkopf","affiliation":[],"nameIdentifiers":[]},{"name":"Anthony, Quentin","nameType":"Personal","givenName":"Quentin","familyName":"Anthony","affiliation":[],"nameIdentifiers":[]},{"name":"Bradley, Herbie","nameType":"Personal","givenName":"Herbie","familyName":"Bradley","affiliation":[],"nameIdentifiers":[]},{"name":"O'Brien, Kyle","nameType":"Personal","givenName":"Kyle","familyName":"O'Brien","affiliation":[],"nameIdentifiers":[]},{"name":"Hallahan, Eric","nameType":"Personal","givenName":"Eric","familyName":"Hallahan","affiliation":[],"nameIdentifiers":[]},{"name":"Khan, Mohammad Aflah","nameType":"Personal","givenName":"Mohammad Aflah","familyName":"Khan","affiliation":[],"nameIdentifiers":[]},{"name":"Purohit, Shivanshu","nameType":"Personal","givenName":"Shivanshu","familyName":"Purohit","affiliation":[],"nameIdentifiers":[]},{"name":"Prashanth, USVSN Sai","nameType":"Personal","givenName":"USVSN Sai","familyName":"Prashanth","affiliation":[],"nameIdentifiers":[]},{"name":"Raff, Edward","nameType":"Personal","givenName":"Edward","familyName":"Raff","affiliation":[],"nameIdentifiers":[]},{"name":"Skowron, Aviya","nameType":"Personal","givenName":"Aviya","familyName":"Skowron","affiliation":[],"nameIdentifiers":[]},{"name":"Sutawika, Lintang","nameType":"Personal","givenName":"Lintang","familyName":"Sutawika","affiliation":[],"nameIdentifiers":[]},{"name":"van der Wal, Oskar","nameType":"Personal","givenName":"Oskar","familyName":"van der Wal","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"Pythia: A Suite for Analyzing Large Language Models Across Training and Scaling"}],"publisher":"arXiv","container":{},"publicationYear":2023,"subjects":[{"lang":"en","subject":"Computation and Language (cs.CL)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2023-04-03T20:58:15Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2023-04-05T00:03:59Z","dateType":"Updated","dateInformation":"v1"},{"date":"2023-05-31T17:54:07Z","dateType":"Submitted","dateInformation":"v2"},{"date":"2023-06-01T00:28:00Z","dateType":"Updated","dateInformation":"v2"},{"date":"2023-04","dateType":"Available","dateInformation":"v1"},{"date":"2023","dateType":"Issued"}],"language":null,"types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"Article","resourceTypeGeneral":"Preprint"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"2","rightsList":[{"rights":"Creative Commons Attribution Share Alike 4.0 International","rightsUri":"https://creativecommons.org/licenses/by-sa/4.0/legalcode","schemeUri":"https://spdx.org/licenses/","rightsIdentifier":"cc-by-sa-4.0","rightsIdentifierScheme":"SPDX"}],"descriptions":[{"description":"How do large language models (LLMs) develop and evolve over the course of training? How do these patterns change as models scale? To answer these questions, we introduce \\textit{Pythia}, a suite of 16 LLMs all trained on public data seen in the exact same order and ranging in size from 70M to 12B parameters. We provide public access to 154 checkpoints for each one of the 16 models, alongside tools to download and reconstruct their exact training dataloaders for further study. We intend \\textit{Pythia} to facilitate research in many areas, and we present several case studies including novel results in memorization, term frequency effects on few-shot performance, and reducing gender bias. We demonstrate that this highly controlled setup can be used to yield novel insights toward LLMs and their training dynamics. Trained models, analysis code, training code, and training data can be found at \\url{https://github.com/EleutherAI/pythia}.","descriptionType":"Abstract"},{"description":"Code at https://github.com/EleutherAI/pythia","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4yMzA0LjAxMzczPC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MjMwNC4wMTM3MzwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5CaWRlcm1hbiwgU3RlbGxhPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5TdGVsbGE8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+QmlkZXJtYW48L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+U2Nob2Vsa29wZiwgSGFpbGV5PC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5IYWlsZXk8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+U2Nob2Vsa29wZjwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5BbnRob255LCBRdWVudGluPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5RdWVudGluPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkFudGhvbnk8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+QnJhZGxleSwgSGVyYmllPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5IZXJiaWU8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+QnJhZGxleTwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5PJ0JyaWVuLCBLeWxlPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5LeWxlPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPk8nQnJpZW48L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+SGFsbGFoYW4sIEVyaWM8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkVyaWM8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+SGFsbGFoYW48L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+S2hhbiwgTW9oYW1tYWQgQWZsYWg8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPk1vaGFtbWFkIEFmbGFoPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPktoYW48L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+UHVyb2hpdCwgU2hpdmFuc2h1PC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5TaGl2YW5zaHU8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+UHVyb2hpdDwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5QcmFzaGFudGgsIFVTVlNOIFNhaTwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+VVNWU04gU2FpPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPlByYXNoYW50aDwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5SYWZmLCBFZHdhcmQ8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkVkd2FyZDwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5SYWZmPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPlNrb3dyb24sIEF2aXlhPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5Bdml5YTwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5Ta293cm9uPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPlN1dGF3aWthLCBMaW50YW5nPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5MaW50YW5nPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPlN1dGF3aWthPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPnZhbiBkZXIgV2FsLCBPc2thcjwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+T3NrYXI8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+dmFuIGRlciBXYWw8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlPlB5dGhpYTogQSBTdWl0ZSBmb3IgQW5hbHl6aW5nIExhcmdlIExhbmd1YWdlIE1vZGVscyBBY3Jvc3MgVHJhaW5pbmcgYW5kIFNjYWxpbmc8L3RpdGxlPgogIDwvdGl0bGVzPgogIDxwdWJsaXNoZXI+YXJYaXY8L3B1Ymxpc2hlcj4KICA8cHVibGljYXRpb25ZZWFyPjIwMjM8L3B1YmxpY2F0aW9uWWVhcj4KICA8c3ViamVjdHM+CiAgICA8c3ViamVjdCB4bWw6bGFuZz0iZW4iIHN1YmplY3RTY2hlbWU9ImFyWGl2Ij5Db21wdXRhdGlvbiBhbmQgTGFuZ3VhZ2UgKGNzLkNMKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHN1YmplY3RTY2hlbWU9IkZpZWxkcyBvZiBTY2llbmNlIGFuZCBUZWNobm9sb2d5IChGT1MpIj5GT1M6IENvbXB1dGVyIGFuZCBpbmZvcm1hdGlvbiBzY2llbmNlczwvc3ViamVjdD4KICA8L3N1YmplY3RzPgogIDxkYXRlcz4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMjMtMDQtMDNUMjA6NTg6MTVaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMjMtMDQtMDVUMDA6MDM6NTlaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MiI+MjAyMy0wNS0zMVQxNzo1NDowN1o8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MiI+MjAyMy0wNi0wMVQwMDoyODowMFo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iQXZhaWxhYmxlIiBkYXRlSW5mb3JtYXRpb249InYxIj4yMDIzLTA0PC9kYXRlPgogIDwvZGF0ZXM+CiAgPHJlc291cmNlVHlwZSByZXNvdXJjZVR5cGVHZW5lcmFsPSJQcmVwcmludCI+QXJ0aWNsZTwvcmVzb3VyY2VUeXBlPgogIDx2ZXJzaW9uPjI8L3ZlcnNpb24+CiAgPHJpZ2h0c0xpc3Q+CiAgICA8cmlnaHRzIHJpZ2h0c1VSST0iaHR0cDovL2NyZWF0aXZlY29tbW9ucy5vcmcvbGljZW5zZXMvYnktc2EvNC4wLyIgcmlnaHRzSWRlbnRpZmllclNjaGVtZT0iU1BEWCIgcmlnaHRzSWRlbnRpZmllcj0iQ0MtQlktU0EtNC4wIj5DcmVhdGl2ZSBDb21tb25zIEF0dHJpYnV0aW9uIFNoYXJlIEFsaWtlIDQuMCBJbnRlcm5hdGlvbmFsPC9yaWdodHM+CiAgPC9yaWdodHNMaXN0PgogIDxkZXNjcmlwdGlvbnM+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJBYnN0cmFjdCI+SG93IGRvIGxhcmdlIGxhbmd1YWdlIG1vZGVscyAoTExNcykgZGV2ZWxvcCBhbmQgZXZvbHZlIG92ZXIgdGhlIGNvdXJzZSBvZiB0cmFpbmluZz8gSG93IGRvIHRoZXNlIHBhdHRlcm5zIGNoYW5nZSBhcyBtb2RlbHMgc2NhbGU/IFRvIGFuc3dlciB0aGVzZSBxdWVzdGlvbnMsIHdlIGludHJvZHVjZSBcdGV4dGl0e1B5dGhpYX0sIGEgc3VpdGUgb2YgMTYgTExNcyBhbGwgdHJhaW5lZCBvbiBwdWJsaWMgZGF0YSBzZWVuIGluIHRoZSBleGFjdCBzYW1lIG9yZGVyIGFuZCByYW5naW5nIGluIHNpemUgZnJvbSA3ME0gdG8gMTJCIHBhcmFtZXRlcnMuIFdlIHByb3ZpZGUgcHVibGljIGFjY2VzcyB0byAxNTQgY2hlY2twb2ludHMgZm9yIGVhY2ggb25lIG9mIHRoZSAxNiBtb2RlbHMsIGFsb25nc2lkZSB0b29scyB0byBkb3dubG9hZCBhbmQgcmVjb25zdHJ1Y3QgdGhlaXIgZXhhY3QgdHJhaW5pbmcgZGF0YWxvYWRlcnMgZm9yIGZ1cnRoZXIgc3R1ZHkuIFdlIGludGVuZCBcdGV4dGl0e1B5dGhpYX0gdG8gZmFjaWxpdGF0ZSByZXNlYXJjaCBpbiBtYW55IGFyZWFzLCBhbmQgd2UgcHJlc2VudCBzZXZlcmFsIGNhc2Ugc3R1ZGllcyBpbmNsdWRpbmcgbm92ZWwgcmVzdWx0cyBpbiBtZW1vcml6YXRpb24sIHRlcm0gZnJlcXVlbmN5IGVmZmVjdHMgb24gZmV3LXNob3QgcGVyZm9ybWFuY2UsIGFuZCByZWR1Y2luZyBnZW5kZXIgYmlhcy4gV2UgZGVtb25zdHJhdGUgdGhhdCB0aGlzIGhpZ2hseSBjb250cm9sbGVkIHNldHVwIGNhbiBiZSB1c2VkIHRvIHlpZWxkIG5vdmVsIGluc2lnaHRzIHRvd2FyZCBMTE1zIGFuZCB0aGVpciB0cmFpbmluZyBkeW5hbWljcy4gVHJhaW5lZCBtb2RlbHMsIGFuYWx5c2lzIGNvZGUsIHRyYWluaW5nIGNvZGUsIGFuZCB0cmFpbmluZyBkYXRhIGNhbiBiZSBmb3VuZCBhdCBcdXJse2h0dHBzOi8vZ2l0aHViLmNvbS9FbGV1dGhlckFJL3B5dGhpYX0uPC9kZXNjcmlwdGlvbj4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9Ik90aGVyIj5Db2RlIGF0IGh0dHBzOi8vZ2l0aHViLmNvbS9FbGV1dGhlckFJL3B5dGhpYTwvZGVzY3JpcHRpb24+CiAgPC9kZXNjcmlwdGlvbnM+CjwvcmVzb3VyY2U+","url":"https://arxiv.org/abs/2304.01373","contentUrl":null,"metadataVersion":1,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":2,"citationsOverTime":[{"year":"2026","total":2}],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2023-04-05T01:39:23.000Z","registered":"2023-04-05T01:39:23.000Z","published":"2023","updated":"2023-06-01T01:35:24.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.2304.01373","type":"media"}},"references":{"data":[]},"citations":{"data":[{"id":"10.5281/zenodo.19435674","type":"dois"},{"id":"10.5281/zenodo.19704227","type":"dois"}]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}