{"data":{"id":"10.48550/arxiv.2102.12092","type":"dois","attributes":{"doi":"10.48550/arxiv.2102.12092","prefix":"10.48550","suffix":"arxiv.2102.12092","identifiers":[{"identifier":"2102.12092","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"2102.12092"}],"creators":[{"name":"Ramesh, Aditya","nameType":"Personal","givenName":"Aditya","familyName":"Ramesh","affiliation":[],"nameIdentifiers":[]},{"name":"Pavlov, Mikhail","nameType":"Personal","givenName":"Mikhail","familyName":"Pavlov","affiliation":[],"nameIdentifiers":[]},{"name":"Goh, Gabriel","nameType":"Personal","givenName":"Gabriel","familyName":"Goh","affiliation":[],"nameIdentifiers":[]},{"name":"Gray, Scott","nameType":"Personal","givenName":"Scott","familyName":"Gray","affiliation":[],"nameIdentifiers":[]},{"name":"Voss, Chelsea","nameType":"Personal","givenName":"Chelsea","familyName":"Voss","affiliation":[],"nameIdentifiers":[]},{"name":"Radford, Alec","nameType":"Personal","givenName":"Alec","familyName":"Radford","affiliation":[],"nameIdentifiers":[]},{"name":"Chen, Mark","nameType":"Personal","givenName":"Mark","familyName":"Chen","affiliation":[],"nameIdentifiers":[]},{"name":"Sutskever, Ilya","nameType":"Personal","givenName":"Ilya","familyName":"Sutskever","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"Zero-Shot Text-to-Image Generation"}],"publisher":"arXiv","container":{},"publicationYear":2021,"subjects":[{"lang":"en","subject":"Computer Vision and Pattern Recognition (cs.CV)","subjectScheme":"arXiv"},{"lang":"en","subject":"Machine Learning (cs.LG)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2021-02-24T06:42:31Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2021-02-25T01:12:26Z","dateType":"Updated","dateInformation":"v1"},{"date":"2021-02-26T23:26:05Z","dateType":"Submitted","dateInformation":"v2"},{"date":"2021-03-02T01:06:13Z","dateType":"Updated","dateInformation":"v2"},{"date":"2021-02","dateType":"Available","dateInformation":"v1"},{"date":"2021","dateType":"Issued"}],"language":null,"types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"Article","resourceTypeGeneral":"Preprint"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"2","rightsList":[{"rights":"Creative Commons Attribution 4.0 International","rightsUri":"https://creativecommons.org/licenses/by/4.0/legalcode","schemeUri":"https://spdx.org/licenses/","rightsIdentifier":"cc-by-4.0","rightsIdentifierScheme":"SPDX"}],"descriptions":[{"description":"Text-to-image generation has traditionally focused on finding better modeling assumptions for training on a fixed dataset. These assumptions might involve complex architectures, auxiliary losses, or side information such as object part labels or segmentation masks supplied during training. We describe a simple approach for this task based on a transformer that autoregressively models the text and image tokens as a single stream of data. With sufficient data and scale, our approach is competitive with previous domain-specific models when evaluated in a zero-shot fashion.","descriptionType":"Abstract"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4yMTAyLjEyMDkyPC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MjEwMi4xMjA5MjwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5SYW1lc2gsIEFkaXR5YTwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+QWRpdHlhPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPlJhbWVzaDwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5QYXZsb3YsIE1pa2hhaWw8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPk1pa2hhaWw8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+UGF2bG92PC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPkdvaCwgR2FicmllbDwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+R2FicmllbDwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5Hb2g8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+R3JheSwgU2NvdHQ8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPlNjb3R0PC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkdyYXk8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+Vm9zcywgQ2hlbHNlYTwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+Q2hlbHNlYTwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5Wb3NzPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPlJhZGZvcmQsIEFsZWM8L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkFsZWM8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+UmFkZm9yZDwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5DaGVuLCBNYXJrPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5NYXJrPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkNoZW48L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+U3V0c2tldmVyLCBJbHlhPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5JbHlhPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPlN1dHNrZXZlcjwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICA8L2NyZWF0b3JzPgogIDx0aXRsZXM+CiAgICA8dGl0bGU+WmVyby1TaG90IFRleHQtdG8tSW1hZ2UgR2VuZXJhdGlvbjwvdGl0bGU+CiAgPC90aXRsZXM+CiAgPHB1Ymxpc2hlcj5hclhpdjwvcHVibGlzaGVyPgogIDxwdWJsaWNhdGlvblllYXI+MjAyMTwvcHVibGljYXRpb25ZZWFyPgogIDxzdWJqZWN0cz4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPkNvbXB1dGVyIFZpc2lvbiBhbmQgUGF0dGVybiBSZWNvZ25pdGlvbiAoY3MuQ1YpPC9zdWJqZWN0PgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIiBzdWJqZWN0U2NoZW1lPSJhclhpdiI+TWFjaGluZSBMZWFybmluZyAoY3MuTEcpPC9zdWJqZWN0PgogICAgPHN1YmplY3Qgc3ViamVjdFNjaGVtZT0iRmllbGRzIG9mIFNjaWVuY2UgYW5kIFRlY2hub2xvZ3kgKEZPUykiPkZPUzogQ29tcHV0ZXIgYW5kIGluZm9ybWF0aW9uIHNjaWVuY2VzPC9zdWJqZWN0PgogIDwvc3ViamVjdHM+CiAgPGRhdGVzPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAyMS0wMi0yNFQwNjo0MjozMVo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAyMS0wMi0yNVQwMToxMjoyNlo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iU3VibWl0dGVkIiBkYXRlSW5mb3JtYXRpb249InYyIj4yMDIxLTAyLTI2VDIzOjI2OjA1WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJVcGRhdGVkIiBkYXRlSW5mb3JtYXRpb249InYyIj4yMDIxLTAzLTAyVDAxOjA2OjEzWjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJBdmFpbGFibGUiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMjEtMDI8L2RhdGU+CiAgPC9kYXRlcz4KICA8cmVzb3VyY2VUeXBlIHJlc291cmNlVHlwZUdlbmVyYWw9IlByZXByaW50Ij5BcnRpY2xlPC9yZXNvdXJjZVR5cGU+CiAgPHZlcnNpb24+MjwvdmVyc2lvbj4KICA8cmlnaHRzTGlzdD4KICAgIDxyaWdodHMgcmlnaHRzVVJJPSJodHRwOi8vY3JlYXRpdmVjb21tb25zLm9yZy9saWNlbnNlcy9ieS80LjAvIiByaWdodHNJZGVudGlmaWVyU2NoZW1lPSJTUERYIiByaWdodHNJZGVudGlmaWVyPSJDQy1CWS00LjAiPkNyZWF0aXZlIENvbW1vbnMgQXR0cmlidXRpb24gNC4wIEludGVybmF0aW9uYWw8L3JpZ2h0cz4KICA8L3JpZ2h0c0xpc3Q+CiAgPGRlc2NyaXB0aW9ucz4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9IkFic3RyYWN0Ij5UZXh0LXRvLWltYWdlIGdlbmVyYXRpb24gaGFzIHRyYWRpdGlvbmFsbHkgZm9jdXNlZCBvbiBmaW5kaW5nIGJldHRlciBtb2RlbGluZyBhc3N1bXB0aW9ucyBmb3IgdHJhaW5pbmcgb24gYSBmaXhlZCBkYXRhc2V0LiBUaGVzZSBhc3N1bXB0aW9ucyBtaWdodCBpbnZvbHZlIGNvbXBsZXggYXJjaGl0ZWN0dXJlcywgYXV4aWxpYXJ5IGxvc3Nlcywgb3Igc2lkZSBpbmZvcm1hdGlvbiBzdWNoIGFzIG9iamVjdCBwYXJ0IGxhYmVscyBvciBzZWdtZW50YXRpb24gbWFza3Mgc3VwcGxpZWQgZHVyaW5nIHRyYWluaW5nLiBXZSBkZXNjcmliZSBhIHNpbXBsZSBhcHByb2FjaCBmb3IgdGhpcyB0YXNrIGJhc2VkIG9uIGEgdHJhbnNmb3JtZXIgdGhhdCBhdXRvcmVncmVzc2l2ZWx5IG1vZGVscyB0aGUgdGV4dCBhbmQgaW1hZ2UgdG9rZW5zIGFzIGEgc2luZ2xlIHN0cmVhbSBvZiBkYXRhLiBXaXRoIHN1ZmZpY2llbnQgZGF0YSBhbmQgc2NhbGUsIG91ciBhcHByb2FjaCBpcyBjb21wZXRpdGl2ZSB3aXRoIHByZXZpb3VzIGRvbWFpbi1zcGVjaWZpYyBtb2RlbHMgd2hlbiBldmFsdWF0ZWQgaW4gYSB6ZXJvLXNob3QgZmFzaGlvbi48L2Rlc2NyaXB0aW9uPgogIDwvZGVzY3JpcHRpb25zPgo8L3Jlc291cmNlPg==","url":"https://arxiv.org/abs/2102.12092","contentUrl":null,"metadataVersion":0,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":1,"citationsOverTime":[{"year":"2022","total":1}],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2022-02-23T12:39:30.000Z","registered":"2022-02-23T12:39:31.000Z","published":"2021","updated":"2022-02-23T12:39:31.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.2102.12092","type":"media"}},"references":{"data":[]},"citations":{"data":[{"id":"10.1007/978-981-16-9669-5_24","type":"dois"}]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}