{"data":{"id":"10.48550/arxiv.2310.02446","type":"dois","attributes":{"doi":"10.48550/arxiv.2310.02446","prefix":"10.48550","suffix":"arxiv.2310.02446","identifiers":[{"identifier":"2310.02446","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"2310.02446"}],"creators":[{"name":"Yong, Zheng-Xin","nameType":"Personal","givenName":"Zheng-Xin","familyName":"Yong","affiliation":[],"nameIdentifiers":[]},{"name":"Menghini, Cristina","nameType":"Personal","givenName":"Cristina","familyName":"Menghini","affiliation":[],"nameIdentifiers":[]},{"name":"Bach, Stephen H.","nameType":"Personal","givenName":"Stephen H.","familyName":"Bach","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"Low-Resource Languages Jailbreak GPT-4"}],"publisher":"arXiv","container":{},"publicationYear":2023,"subjects":[{"lang":"en","subject":"Computation and Language (cs.CL)","subjectScheme":"arXiv"},{"lang":"en","subject":"Artificial Intelligence (cs.AI)","subjectScheme":"arXiv"},{"lang":"en","subject":"Cryptography and Security (cs.CR)","subjectScheme":"arXiv"},{"lang":"en","subject":"Machine Learning (cs.LG)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2023-10-03T21:30:56Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2023-10-05T00:07:24Z","dateType":"Updated","dateInformation":"v1"},{"date":"2024-01-27T22:54:52Z","dateType":"Submitted","dateInformation":"v2"},{"date":"2024-01-30T01:14:23Z","dateType":"Updated","dateInformation":"v2"},{"date":"2023-10","dateType":"Available","dateInformation":"v1"},{"date":"2023","dateType":"Issued"}],"language":null,"types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"Article","resourceTypeGeneral":"Preprint"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"2","rightsList":[{"rights":"arXiv.org perpetual, non-exclusive license","rightsUri":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/"}],"descriptions":[{"description":"AI safety training and red-teaming of large language models (LLMs) are measures to mitigate the generation of unsafe content. Our work exposes the inherent cross-lingual vulnerability of these safety mechanisms, resulting from the linguistic inequality of safety training data, by successfully circumventing GPT-4's safeguard through translating unsafe English inputs into low-resource languages. On the AdvBenchmark, GPT-4 engages with the unsafe translated inputs and provides actionable items that can get the users towards their harmful goals 79% of the time, which is on par with or even surpassing state-of-the-art jailbreaking attacks. Other high-/mid-resource languages have significantly lower attack success rate, which suggests that the cross-lingual vulnerability mainly applies to low-resource languages. Previously, limited training on low-resource languages primarily affects speakers of those languages, causing technological disparities. However, our work highlights a crucial shift: this deficiency now poses a risk to all LLMs users. Publicly available translation APIs enable anyone to exploit LLMs' safety vulnerabilities. Therefore, our work calls for a more holistic red-teaming efforts to develop robust multilingual safeguards with wide language coverage.","descriptionType":"Abstract"},{"description":"NeurIPS Workshop on Socially Responsible Language Modelling Research (SoLaR) 2023. Best Paper Award","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4yMzEwLjAyNDQ2PC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MjMxMC4wMjQ0NjwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5Zb25nLCBaaGVuZy1YaW48L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPlpoZW5nLVhpbjwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5Zb25nPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPk1lbmdoaW5pLCBDcmlzdGluYTwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+Q3Jpc3RpbmE8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+TWVuZ2hpbmk8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+QmFjaCwgU3RlcGhlbiBILjwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+U3RlcGhlbiBILjwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5CYWNoPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogIDwvY3JlYXRvcnM+CiAgPHRpdGxlcz4KICAgIDx0aXRsZT5Mb3ctUmVzb3VyY2UgTGFuZ3VhZ2VzIEphaWxicmVhayBHUFQtNDwvdGl0bGU+CiAgPC90aXRsZXM+CiAgPHB1Ymxpc2hlcj5hclhpdjwvcHVibGlzaGVyPgogIDxwdWJsaWNhdGlvblllYXI+MjAyMzwvcHVibGljYXRpb25ZZWFyPgogIDxzdWJqZWN0cz4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPkNvbXB1dGF0aW9uIGFuZCBMYW5ndWFnZSAoY3MuQ0wpPC9zdWJqZWN0PgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIiBzdWJqZWN0U2NoZW1lPSJhclhpdiI+QXJ0aWZpY2lhbCBJbnRlbGxpZ2VuY2UgKGNzLkFJKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPkNyeXB0b2dyYXBoeSBhbmQgU2VjdXJpdHkgKGNzLkNSKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPk1hY2hpbmUgTGVhcm5pbmcgKGNzLkxHKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHN1YmplY3RTY2hlbWU9IkZpZWxkcyBvZiBTY2llbmNlIGFuZCBUZWNobm9sb2d5IChGT1MpIj5GT1M6IENvbXB1dGVyIGFuZCBpbmZvcm1hdGlvbiBzY2llbmNlczwvc3ViamVjdD4KICA8L3N1YmplY3RzPgogIDxkYXRlcz4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMjMtMTAtMDNUMjE6MzA6NTZaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMjMtMTAtMDVUMDA6MDc6MjRaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MiI+MjAyNC0wMS0yN1QyMjo1NDo1Mlo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MiI+MjAyNC0wMS0zMFQwMToxNDoyM1o8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iQXZhaWxhYmxlIiBkYXRlSW5mb3JtYXRpb249InYxIj4yMDIzLTEwPC9kYXRlPgogIDwvZGF0ZXM+CiAgPHJlc291cmNlVHlwZSByZXNvdXJjZVR5cGVHZW5lcmFsPSJQcmVwcmludCI+QXJ0aWNsZTwvcmVzb3VyY2VUeXBlPgogIDx2ZXJzaW9uPjI8L3ZlcnNpb24+CiAgPHJpZ2h0c0xpc3Q+CiAgICA8cmlnaHRzIHJpZ2h0c1VSST0iaHR0cDovL2FyeGl2Lm9yZy9saWNlbnNlcy9ub25leGNsdXNpdmUtZGlzdHJpYi8xLjAvIj5hclhpdi5vcmcgcGVycGV0dWFsLCBub24tZXhjbHVzaXZlIGxpY2Vuc2U8L3JpZ2h0cz4KICA8L3JpZ2h0c0xpc3Q+CiAgPGRlc2NyaXB0aW9ucz4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9IkFic3RyYWN0Ij5BSSBzYWZldHkgdHJhaW5pbmcgYW5kIHJlZC10ZWFtaW5nIG9mIGxhcmdlIGxhbmd1YWdlIG1vZGVscyAoTExNcykgYXJlIG1lYXN1cmVzIHRvIG1pdGlnYXRlIHRoZSBnZW5lcmF0aW9uIG9mIHVuc2FmZSBjb250ZW50LiBPdXIgd29yayBleHBvc2VzIHRoZSBpbmhlcmVudCBjcm9zcy1saW5ndWFsIHZ1bG5lcmFiaWxpdHkgb2YgdGhlc2Ugc2FmZXR5IG1lY2hhbmlzbXMsIHJlc3VsdGluZyBmcm9tIHRoZSBsaW5ndWlzdGljIGluZXF1YWxpdHkgb2Ygc2FmZXR5IHRyYWluaW5nIGRhdGEsIGJ5IHN1Y2Nlc3NmdWxseSBjaXJjdW12ZW50aW5nIEdQVC00J3Mgc2FmZWd1YXJkIHRocm91Z2ggdHJhbnNsYXRpbmcgdW5zYWZlIEVuZ2xpc2ggaW5wdXRzIGludG8gbG93LXJlc291cmNlIGxhbmd1YWdlcy4gT24gdGhlIEFkdkJlbmNobWFyaywgR1BULTQgZW5nYWdlcyB3aXRoIHRoZSB1bnNhZmUgdHJhbnNsYXRlZCBpbnB1dHMgYW5kIHByb3ZpZGVzIGFjdGlvbmFibGUgaXRlbXMgdGhhdCBjYW4gZ2V0IHRoZSB1c2VycyB0b3dhcmRzIHRoZWlyIGhhcm1mdWwgZ29hbHMgNzklIG9mIHRoZSB0aW1lLCB3aGljaCBpcyBvbiBwYXIgd2l0aCBvciBldmVuIHN1cnBhc3Npbmcgc3RhdGUtb2YtdGhlLWFydCBqYWlsYnJlYWtpbmcgYXR0YWNrcy4gT3RoZXIgaGlnaC0vbWlkLXJlc291cmNlIGxhbmd1YWdlcyBoYXZlIHNpZ25pZmljYW50bHkgbG93ZXIgYXR0YWNrIHN1Y2Nlc3MgcmF0ZSwgd2hpY2ggc3VnZ2VzdHMgdGhhdCB0aGUgY3Jvc3MtbGluZ3VhbCB2dWxuZXJhYmlsaXR5IG1haW5seSBhcHBsaWVzIHRvIGxvdy1yZXNvdXJjZSBsYW5ndWFnZXMuIFByZXZpb3VzbHksIGxpbWl0ZWQgdHJhaW5pbmcgb24gbG93LXJlc291cmNlIGxhbmd1YWdlcyBwcmltYXJpbHkgYWZmZWN0cyBzcGVha2VycyBvZiB0aG9zZSBsYW5ndWFnZXMsIGNhdXNpbmcgdGVjaG5vbG9naWNhbCBkaXNwYXJpdGllcy4gSG93ZXZlciwgb3VyIHdvcmsgaGlnaGxpZ2h0cyBhIGNydWNpYWwgc2hpZnQ6IHRoaXMgZGVmaWNpZW5jeSBub3cgcG9zZXMgYSByaXNrIHRvIGFsbCBMTE1zIHVzZXJzLiBQdWJsaWNseSBhdmFpbGFibGUgdHJhbnNsYXRpb24gQVBJcyBlbmFibGUgYW55b25lIHRvIGV4cGxvaXQgTExNcycgc2FmZXR5IHZ1bG5lcmFiaWxpdGllcy4gVGhlcmVmb3JlLCBvdXIgd29yayBjYWxscyBmb3IgYSBtb3JlIGhvbGlzdGljIHJlZC10ZWFtaW5nIGVmZm9ydHMgdG8gZGV2ZWxvcCByb2J1c3QgbXVsdGlsaW5ndWFsIHNhZmVndWFyZHMgd2l0aCB3aWRlIGxhbmd1YWdlIGNvdmVyYWdlLjwvZGVzY3JpcHRpb24+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJPdGhlciI+TmV1cklQUyBXb3Jrc2hvcCBvbiBTb2NpYWxseSBSZXNwb25zaWJsZSBMYW5ndWFnZSBNb2RlbGxpbmcgUmVzZWFyY2ggKFNvTGFSKSAyMDIzLiBCZXN0IFBhcGVyIEF3YXJkPC9kZXNjcmlwdGlvbj4KICA8L2Rlc2NyaXB0aW9ucz4KPC9yZXNvdXJjZT4=","url":"https://arxiv.org/abs/2310.02446","contentUrl":null,"metadataVersion":1,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":0,"citationsOverTime":[],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2023-10-05T01:27:07.000Z","registered":"2023-10-05T01:27:08.000Z","published":"2023","updated":"2024-01-30T03:17:50.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.2310.02446","type":"media"}},"references":{"data":[]},"citations":{"data":[]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}