{"data":{"id":"10.48550/arxiv.1711.00331","type":"dois","attributes":{"doi":"10.48550/arxiv.1711.00331","prefix":"10.48550","suffix":"arxiv.1711.00331","identifiers":[{"identifier":"1711.00331","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"1711.00331"}],"creators":[{"name":"Senel, Lutfi Kerem","nameType":"Personal","givenName":"Lutfi Kerem","familyName":"Senel","affiliation":[],"nameIdentifiers":[]},{"name":"Utlu, Ihsan","nameType":"Personal","givenName":"Ihsan","familyName":"Utlu","affiliation":[],"nameIdentifiers":[]},{"name":"Yucesoy, Veysel","nameType":"Personal","givenName":"Veysel","familyName":"Yucesoy","affiliation":[],"nameIdentifiers":[]},{"name":"Koc, Aykut","nameType":"Personal","givenName":"Aykut","familyName":"Koc","affiliation":[],"nameIdentifiers":[]},{"name":"Cukur, Tolga","nameType":"Personal","givenName":"Tolga","familyName":"Cukur","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"Semantic Structure and Interpretability of Word Embeddings"}],"publisher":"arXiv","container":{},"publicationYear":2017,"subjects":[{"lang":"en","subject":"Computation and Language (cs.CL)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2017-11-01T13:22:02Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2017-11-02T00:09:02Z","dateType":"Updated","dateInformation":"v1"},{"date":"2017-11-23T14:15:50Z","dateType":"Submitted","dateInformation":"v2"},{"date":"2017-11-27T01:07:18Z","dateType":"Updated","dateInformation":"v2"},{"date":"2018-05-16T07:10:55Z","dateType":"Submitted","dateInformation":"v3"},{"date":"2018-07-20T00:04:46Z","dateType":"Updated","dateInformation":"v3"},{"date":"2017-11","dateType":"Available","dateInformation":"v1"},{"date":"2017","dateType":"Issued"}],"language":null,"types":{"ris":"RPRT","bibtex":"article","citeproc":"article-journal","schemaOrg":"ScholarlyArticle","resourceType":"Article","resourceTypeGeneral":"Text"},"relatedIdentifiers":[{"relationType":"IsVersionOf","relatedIdentifier":"10.1109/taslp.2018.2837384","relatedIdentifierType":"DOI"}],"relatedItems":[],"sizes":[],"formats":[],"version":"3","rightsList":[{"rights":"arXiv.org perpetual, non-exclusive license","rightsUri":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/"}],"descriptions":[{"description":"Dense word embeddings, which encode semantic meanings of words to low dimensional vector spaces have become very popular in natural language processing (NLP) research due to their state-of-the-art performances in many NLP tasks. Word embeddings are substantially successful in capturing semantic relations among words, so a meaningful semantic structure must be present in the respective vector spaces. However, in many cases, this semantic structure is broadly and heterogeneously distributed across the embedding dimensions, which makes interpretation a big challenge. In this study, we propose a statistical method to uncover the latent semantic structure in the dense word embeddings. To perform our analysis we introduce a new dataset (SEMCAT) that contains more than 6500 words semantically grouped under 110 categories. We further propose a method to quantify the interpretability of the word embeddings; the proposed method is a practical alternative to the classical word intrusion test that requires human intervention.","descriptionType":"Abstract"},{"description":"11 Pages, 8 Figures, accepted by IEEE/ACM Transactions on Audio, Speech, and Language Processing","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4xNzExLjAwMzMxPC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MTcxMS4wMDMzMTwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5TZW5lbCwgTHV0ZmkgS2VyZW08L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkx1dGZpIEtlcmVtPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPlNlbmVsPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPlV0bHUsIEloc2FuPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5JaHNhbjwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5VdGx1PC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPll1Y2Vzb3ksIFZleXNlbDwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+VmV5c2VsPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPll1Y2Vzb3k8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+S29jLCBBeWt1dDwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+QXlrdXQ8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+S29jPC9mYW1pbHlOYW1lPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPkN1a3VyLCBUb2xnYTwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+VG9sZ2E8L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+Q3VrdXI8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlPlNlbWFudGljIFN0cnVjdHVyZSBhbmQgSW50ZXJwcmV0YWJpbGl0eSBvZiBXb3JkIEVtYmVkZGluZ3M8L3RpdGxlPgogIDwvdGl0bGVzPgogIDxwdWJsaXNoZXI+YXJYaXY8L3B1Ymxpc2hlcj4KICA8cHVibGljYXRpb25ZZWFyPjIwMTc8L3B1YmxpY2F0aW9uWWVhcj4KICA8c3ViamVjdHM+CiAgICA8c3ViamVjdCB4bWw6bGFuZz0iZW4iIHN1YmplY3RTY2hlbWU9ImFyWGl2Ij5Db21wdXRhdGlvbiBhbmQgTGFuZ3VhZ2UgKGNzLkNMKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHN1YmplY3RTY2hlbWU9IkZpZWxkcyBvZiBTY2llbmNlIGFuZCBUZWNobm9sb2d5IChGT1MpIj5GT1M6IENvbXB1dGVyIGFuZCBpbmZvcm1hdGlvbiBzY2llbmNlczwvc3ViamVjdD4KICA8L3N1YmplY3RzPgogIDxkYXRlcz4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMTctMTEtMDFUMTM6MjI6MDJaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMTctMTEtMDJUMDA6MDk6MDJaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MiI+MjAxNy0xMS0yM1QxNDoxNTo1MFo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MiI+MjAxNy0xMS0yN1QwMTowNzoxOFo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iU3VibWl0dGVkIiBkYXRlSW5mb3JtYXRpb249InYzIj4yMDE4LTA1LTE2VDA3OjEwOjU1WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJVcGRhdGVkIiBkYXRlSW5mb3JtYXRpb249InYzIj4yMDE4LTA3LTIwVDAwOjA0OjQ2WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJBdmFpbGFibGUiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMTctMTE8L2RhdGU+CiAgPC9kYXRlcz4KICA8cmVzb3VyY2VUeXBlIHJlc291cmNlVHlwZUdlbmVyYWw9IlRleHQiPkFydGljbGU8L3Jlc291cmNlVHlwZT4KICA8cmVsYXRlZElkZW50aWZpZXJzPgogICAgPHJlbGF0ZWRJZGVudGlmaWVyIHJlbGF0ZWRJZGVudGlmaWVyVHlwZT0iRE9JIiByZWxhdGlvblR5cGU9IklzVmVyc2lvbk9mIj4xMC4xMTA5L1RBU0xQLjIwMTguMjgzNzM4NDwvcmVsYXRlZElkZW50aWZpZXI+CiAgPC9yZWxhdGVkSWRlbnRpZmllcnM+CiAgPHZlcnNpb24+MzwvdmVyc2lvbj4KICA8cmlnaHRzTGlzdD4KICAgIDxyaWdodHMgcmlnaHRzVVJJPSJodHRwOi8vYXJ4aXYub3JnL2xpY2Vuc2VzL25vbmV4Y2x1c2l2ZS1kaXN0cmliLzEuMC8iPmFyWGl2Lm9yZyBwZXJwZXR1YWwsIG5vbi1leGNsdXNpdmUgbGljZW5zZTwvcmlnaHRzPgogIDwvcmlnaHRzTGlzdD4KICA8ZGVzY3JpcHRpb25zPgogICAgPGRlc2NyaXB0aW9uIGRlc2NyaXB0aW9uVHlwZT0iQWJzdHJhY3QiPkRlbnNlIHdvcmQgZW1iZWRkaW5ncywgd2hpY2ggZW5jb2RlIHNlbWFudGljIG1lYW5pbmdzIG9mIHdvcmRzIHRvIGxvdyBkaW1lbnNpb25hbCB2ZWN0b3Igc3BhY2VzIGhhdmUgYmVjb21lIHZlcnkgcG9wdWxhciBpbiBuYXR1cmFsIGxhbmd1YWdlIHByb2Nlc3NpbmcgKE5MUCkgcmVzZWFyY2ggZHVlIHRvIHRoZWlyIHN0YXRlLW9mLXRoZS1hcnQgcGVyZm9ybWFuY2VzIGluIG1hbnkgTkxQIHRhc2tzLiBXb3JkIGVtYmVkZGluZ3MgYXJlIHN1YnN0YW50aWFsbHkgc3VjY2Vzc2Z1bCBpbiBjYXB0dXJpbmcgc2VtYW50aWMgcmVsYXRpb25zIGFtb25nIHdvcmRzLCBzbyBhIG1lYW5pbmdmdWwgc2VtYW50aWMgc3RydWN0dXJlIG11c3QgYmUgcHJlc2VudCBpbiB0aGUgcmVzcGVjdGl2ZSB2ZWN0b3Igc3BhY2VzLiBIb3dldmVyLCBpbiBtYW55IGNhc2VzLCB0aGlzIHNlbWFudGljIHN0cnVjdHVyZSBpcyBicm9hZGx5IGFuZCBoZXRlcm9nZW5lb3VzbHkgZGlzdHJpYnV0ZWQgYWNyb3NzIHRoZSBlbWJlZGRpbmcgZGltZW5zaW9ucywgd2hpY2ggbWFrZXMgaW50ZXJwcmV0YXRpb24gYSBiaWcgY2hhbGxlbmdlLiBJbiB0aGlzIHN0dWR5LCB3ZSBwcm9wb3NlIGEgc3RhdGlzdGljYWwgbWV0aG9kIHRvIHVuY292ZXIgdGhlIGxhdGVudCBzZW1hbnRpYyBzdHJ1Y3R1cmUgaW4gdGhlIGRlbnNlIHdvcmQgZW1iZWRkaW5ncy4gVG8gcGVyZm9ybSBvdXIgYW5hbHlzaXMgd2UgaW50cm9kdWNlIGEgbmV3IGRhdGFzZXQgKFNFTUNBVCkgdGhhdCBjb250YWlucyBtb3JlIHRoYW4gNjUwMCB3b3JkcyBzZW1hbnRpY2FsbHkgZ3JvdXBlZCB1bmRlciAxMTAgY2F0ZWdvcmllcy4gV2UgZnVydGhlciBwcm9wb3NlIGEgbWV0aG9kIHRvIHF1YW50aWZ5IHRoZSBpbnRlcnByZXRhYmlsaXR5IG9mIHRoZSB3b3JkIGVtYmVkZGluZ3M7IHRoZSBwcm9wb3NlZCBtZXRob2QgaXMgYSBwcmFjdGljYWwgYWx0ZXJuYXRpdmUgdG8gdGhlIGNsYXNzaWNhbCB3b3JkIGludHJ1c2lvbiB0ZXN0IHRoYXQgcmVxdWlyZXMgaHVtYW4gaW50ZXJ2ZW50aW9uLjwvZGVzY3JpcHRpb24+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJPdGhlciI+MTEgUGFnZXMsIDggRmlndXJlcywgYWNjZXB0ZWQgYnkgSUVFRS9BQ00gVHJhbnNhY3Rpb25zIG9uIEF1ZGlvLCBTcGVlY2gsIGFuZCBMYW5ndWFnZSBQcm9jZXNzaW5nPC9kZXNjcmlwdGlvbj4KICA8L2Rlc2NyaXB0aW9ucz4KPC9yZXNvdXJjZT4=","url":"https://arxiv.org/abs/1711.00331","contentUrl":null,"metadataVersion":0,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":0,"citationsOverTime":[],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2022-03-04T01:33:02.000Z","registered":"2022-03-04T01:33:03.000Z","published":"2017","updated":"2022-03-04T01:33:03.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.1711.00331","type":"media"}},"references":{"data":[]},"citations":{"data":[]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}