{"data":{"id":"10.48550/arxiv.cmp-lg/9709008","type":"dois","attributes":{"doi":"10.48550/arxiv.cmp-lg/9709008","prefix":"10.48550","suffix":"arxiv.cmp-lg/9709008","identifiers":[{"identifier":"cmp-lg/9709008","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"cmp-lg/9709008"}],"creators":[{"name":"Jiang, Jay J.","nameType":"Personal","givenName":"Jay J.","familyName":"Jiang","affiliation":["University of Waterloo"],"nameIdentifiers":[]},{"name":"Conrath, David W.","nameType":"Personal","givenName":"David W.","familyName":"Conrath","affiliation":["McMaster University"],"nameIdentifiers":[]}],"titles":[{"title":"Semantic Similarity Based on Corpus Statistics and Lexical Taxonomy"}],"publisher":"arXiv","container":{},"publicationYear":1997,"subjects":[{"lang":"en","subject":"Computation and Language (cs.CL)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"1997-09-20T15:16:26Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2009-11-30T17:10:47Z","dateType":"Updated","dateInformation":"v1"},{"date":"1997-09","dateType":"Available","dateInformation":"v1"},{"date":"1997","dateType":"Issued"}],"language":null,"types":{"ris":"RPRT","bibtex":"article","citeproc":"article-journal","schemaOrg":"ScholarlyArticle","resourceType":"Article","resourceTypeGeneral":"Text"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"1","rightsList":[{"rights":"Assumed arXiv.org perpetual, non-exclusive license to distribute this article for submissions made before January 2004","rightsUri":"http://arxiv.org/licenses/assumed-1991-2003/"}],"descriptions":[{"description":"This paper presents a new approach for measuring semantic similarity/distance between words and concepts. It combines a lexical taxonomy structure with corpus statistical information so that the semantic distance between nodes in the semantic space constructed by the taxonomy can be better quantified with the computational evidence derived from a distributional analysis of corpus data. Specifically, the proposed measure is a combined approach that inherits the edge-based approach of the edge counting scheme, which is then enhanced by the node-based approach of the information content calculation. When tested on a common data set of word pair similarity ratings, the proposed approach outperforms other computational models. It gives the highest correlation value (r = 0.828) with a benchmark based on human similarity judgements, whereas an upper bound (r = 0.885) is observed when human subjects replicate the same task.","descriptionType":"Abstract"},{"description":"15 pages, Postscript only","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi5DTVAtTEcvOTcwOTAwODwvaWRlbnRpZmllcj4KICA8YWx0ZXJuYXRlSWRlbnRpZmllcnM+CiAgICA8YWx0ZXJuYXRlSWRlbnRpZmllciBhbHRlcm5hdGVJZGVudGlmaWVyVHlwZT0iYXJYaXYiPmNtcC1sZy85NzA5MDA4PC9hbHRlcm5hdGVJZGVudGlmaWVyPgogIDwvYWx0ZXJuYXRlSWRlbnRpZmllcnM+CiAgPGNyZWF0b3JzPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPkppYW5nLCBKYXkgSi48L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPkpheSBKLjwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5KaWFuZzwvZmFtaWx5TmFtZT4KICAgICAgPGFmZmlsaWF0aW9uPlVuaXZlcnNpdHkgb2YgV2F0ZXJsb288L2FmZmlsaWF0aW9uPgogICAgPC9jcmVhdG9yPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iUGVyc29uYWwiPkNvbnJhdGgsIERhdmlkIFcuPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5EYXZpZCBXLjwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5Db25yYXRoPC9mYW1pbHlOYW1lPgogICAgICA8YWZmaWxpYXRpb24+TWNNYXN0ZXIgVW5pdmVyc2l0eTwvYWZmaWxpYXRpb24+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlPlNlbWFudGljIFNpbWlsYXJpdHkgQmFzZWQgb24gQ29ycHVzIFN0YXRpc3RpY3MgYW5kIExleGljYWwgVGF4b25vbXk8L3RpdGxlPgogIDwvdGl0bGVzPgogIDxwdWJsaXNoZXI+YXJYaXY8L3B1Ymxpc2hlcj4KICA8cHVibGljYXRpb25ZZWFyPjE5OTc8L3B1YmxpY2F0aW9uWWVhcj4KICA8c3ViamVjdHM+CiAgICA8c3ViamVjdCB4bWw6bGFuZz0iZW4iIHN1YmplY3RTY2hlbWU9ImFyWGl2Ij5Db21wdXRhdGlvbiBhbmQgTGFuZ3VhZ2UgKGNzLkNMKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHN1YmplY3RTY2hlbWU9IkZpZWxkcyBvZiBTY2llbmNlIGFuZCBUZWNobm9sb2d5IChGT1MpIj5GT1M6IENvbXB1dGVyIGFuZCBpbmZvcm1hdGlvbiBzY2llbmNlczwvc3ViamVjdD4KICA8L3N1YmplY3RzPgogIDxkYXRlcz4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjE5OTctMDktMjBUMTU6MTY6MjZaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjEiPjIwMDktMTEtMzBUMTc6MTA6NDdaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IkF2YWlsYWJsZSIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MTk5Ny0wOTwvZGF0ZT4KICA8L2RhdGVzPgogIDxyZXNvdXJjZVR5cGUgcmVzb3VyY2VUeXBlR2VuZXJhbD0iVGV4dCI+QXJ0aWNsZTwvcmVzb3VyY2VUeXBlPgogIDx2ZXJzaW9uPjE8L3ZlcnNpb24+CiAgPHJpZ2h0c0xpc3Q+CiAgICA8cmlnaHRzIHJpZ2h0c1VSST0iaHR0cDovL2FyeGl2Lm9yZy9saWNlbnNlcy9hc3N1bWVkLTE5OTEtMjAwMy8iPkFzc3VtZWQgYXJYaXYub3JnIHBlcnBldHVhbCwgbm9uLWV4Y2x1c2l2ZSBsaWNlbnNlIHRvIGRpc3RyaWJ1dGUgdGhpcyBhcnRpY2xlIGZvciBzdWJtaXNzaW9ucyBtYWRlIGJlZm9yZSBKYW51YXJ5IDIwMDQ8L3JpZ2h0cz4KICA8L3JpZ2h0c0xpc3Q+CiAgPGRlc2NyaXB0aW9ucz4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9IkFic3RyYWN0Ij5UaGlzIHBhcGVyIHByZXNlbnRzIGEgbmV3IGFwcHJvYWNoIGZvciBtZWFzdXJpbmcgc2VtYW50aWMgc2ltaWxhcml0eS9kaXN0YW5jZSBiZXR3ZWVuIHdvcmRzIGFuZCBjb25jZXB0cy4gSXQgY29tYmluZXMgYSBsZXhpY2FsIHRheG9ub215IHN0cnVjdHVyZSB3aXRoIGNvcnB1cyBzdGF0aXN0aWNhbCBpbmZvcm1hdGlvbiBzbyB0aGF0IHRoZSBzZW1hbnRpYyBkaXN0YW5jZSBiZXR3ZWVuIG5vZGVzIGluIHRoZSBzZW1hbnRpYyBzcGFjZSBjb25zdHJ1Y3RlZCBieSB0aGUgdGF4b25vbXkgY2FuIGJlIGJldHRlciBxdWFudGlmaWVkIHdpdGggdGhlIGNvbXB1dGF0aW9uYWwgZXZpZGVuY2UgZGVyaXZlZCBmcm9tIGEgZGlzdHJpYnV0aW9uYWwgYW5hbHlzaXMgb2YgY29ycHVzIGRhdGEuIFNwZWNpZmljYWxseSwgdGhlIHByb3Bvc2VkIG1lYXN1cmUgaXMgYSBjb21iaW5lZCBhcHByb2FjaCB0aGF0IGluaGVyaXRzIHRoZSBlZGdlLWJhc2VkIGFwcHJvYWNoIG9mIHRoZSBlZGdlIGNvdW50aW5nIHNjaGVtZSwgd2hpY2ggaXMgdGhlbiBlbmhhbmNlZCBieSB0aGUgbm9kZS1iYXNlZCBhcHByb2FjaCBvZiB0aGUgaW5mb3JtYXRpb24gY29udGVudCBjYWxjdWxhdGlvbi4gV2hlbiB0ZXN0ZWQgb24gYSBjb21tb24gZGF0YSBzZXQgb2Ygd29yZCBwYWlyIHNpbWlsYXJpdHkgcmF0aW5ncywgdGhlIHByb3Bvc2VkIGFwcHJvYWNoIG91dHBlcmZvcm1zIG90aGVyIGNvbXB1dGF0aW9uYWwgbW9kZWxzLiBJdCBnaXZlcyB0aGUgaGlnaGVzdCBjb3JyZWxhdGlvbiB2YWx1ZSAociA9IDAuODI4KSB3aXRoIGEgYmVuY2htYXJrIGJhc2VkIG9uIGh1bWFuIHNpbWlsYXJpdHkganVkZ2VtZW50cywgd2hlcmVhcyBhbiB1cHBlciBib3VuZCAociA9IDAuODg1KSBpcyBvYnNlcnZlZCB3aGVuIGh1bWFuIHN1YmplY3RzIHJlcGxpY2F0ZSB0aGUgc2FtZSB0YXNrLjwvZGVzY3JpcHRpb24+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJPdGhlciI+MTUgcGFnZXMsIFBvc3RzY3JpcHQgb25seTwvZGVzY3JpcHRpb24+CiAgPC9kZXNjcmlwdGlvbnM+CjwvcmVzb3VyY2U+","url":"https://arxiv.org/abs/cmp-lg/9709008","contentUrl":null,"metadataVersion":0,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":0,"citationsOverTime":[],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2022-03-19T16:42:42.000Z","registered":"2022-03-19T16:42:43.000Z","published":"1997","updated":"2022-03-19T16:42:43.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.cmp-lg/9709008","type":"media"}},"references":{"data":[]},"citations":{"data":[]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}