{"data":{"id":"10.48550/arxiv.1906.03361","type":"dois","attributes":{"doi":"10.48550/arxiv.1906.03361","prefix":"10.48550","suffix":"arxiv.1906.03361","identifiers":[{"identifier":"1906.03361","identifierType":"arXiv"}],"alternateIdentifiers":[{"alternateIdentifierType":"arXiv","alternateIdentifier":"1906.03361"}],"creators":[{"name":"Amid, Ehsan","nameType":"Personal","givenName":"Ehsan","familyName":"Amid","affiliation":[],"nameIdentifiers":[]},{"name":"Warmuth, Manfred K.","nameType":"Personal","givenName":"Manfred K.","familyName":"Warmuth","affiliation":[],"nameIdentifiers":[]},{"name":"Anil, Rohan","nameType":"Personal","givenName":"Rohan","familyName":"Anil","affiliation":[],"nameIdentifiers":[]},{"name":"Koren, Tomer","nameType":"Personal","givenName":"Tomer","familyName":"Koren","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"Robust Bi-Tempered Logistic Loss Based on Bregman Divergences"}],"publisher":"arXiv","container":{},"publicationYear":2019,"subjects":[{"lang":"en","subject":"Machine Learning (cs.LG)","subjectScheme":"arXiv"},{"lang":"en","subject":"Machine Learning (stat.ML)","subjectScheme":"arXiv"},{"subject":"FOS: Computer and information sciences","subjectScheme":"Fields of Science and Technology (FOS)"},{"subject":"FOS: Computer and information sciences","schemeUri":"http://www.oecd.org/science/inno/38235147.pdf","subjectScheme":"Fields of Science and Technology (FOS)"}],"contributors":[],"dates":[{"date":"2019-06-08T00:08:38Z","dateType":"Submitted","dateInformation":"v1"},{"date":"2019-06-11T00:04:27Z","dateType":"Updated","dateInformation":"v1"},{"date":"2019-08-26T21:13:27Z","dateType":"Submitted","dateInformation":"v2"},{"date":"2019-08-28T00:03:36Z","dateType":"Updated","dateInformation":"v2"},{"date":"2019-09-23T16:08:54Z","dateType":"Submitted","dateInformation":"v3"},{"date":"2019-09-24T00:31:18Z","dateType":"Updated","dateInformation":"v3"},{"date":"2019-06","dateType":"Available","dateInformation":"v1"},{"date":"2019","dateType":"Issued"}],"language":null,"types":{"ris":"RPRT","bibtex":"article","citeproc":"article-journal","schemaOrg":"ScholarlyArticle","resourceType":"Article","resourceTypeGeneral":"Text"},"relatedIdentifiers":[],"relatedItems":[],"sizes":[],"formats":[],"version":"3","rightsList":[{"rights":"arXiv.org perpetual, non-exclusive license","rightsUri":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/"}],"descriptions":[{"description":"We introduce a temperature into the exponential function and replace the softmax output layer of neural nets by a high temperature generalization. Similarly, the logarithm in the log loss we use for training is replaced by a low temperature logarithm. By tuning the two temperatures we create loss functions that are non-convex already in the single layer case. When replacing the last layer of the neural nets by our bi-temperature generalization of logistic loss, the training becomes more robust to noise. We visualize the effect of tuning the two temperatures in a simple setting and show the efficacy of our method on large data sets. Our methodology is based on Bregman divergences and is superior to a related two-temperature method using the Tsallis divergence.","descriptionType":"Abstract"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC40ODU1MC9BUlhJVi4xOTA2LjAzMzYxPC9pZGVudGlmaWVyPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJhclhpdiI+MTkwNi4wMzM2MTwvYWx0ZXJuYXRlSWRlbnRpZmllcj4KICA8L2FsdGVybmF0ZUlkZW50aWZpZXJzPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5BbWlkLCBFaHNhbjwvY3JlYXRvck5hbWU+CiAgICAgIDxnaXZlbk5hbWU+RWhzYW48L2dpdmVuTmFtZT4KICAgICAgPGZhbWlseU5hbWU+QW1pZDwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5XYXJtdXRoLCBNYW5mcmVkIEsuPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5NYW5mcmVkIEsuPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPldhcm11dGg8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+QW5pbCwgUm9oYW48L2NyZWF0b3JOYW1lPgogICAgICA8Z2l2ZW5OYW1lPlJvaGFuPC9naXZlbk5hbWU+CiAgICAgIDxmYW1pbHlOYW1lPkFuaWw8L2ZhbWlseU5hbWU+CiAgICA8L2NyZWF0b3I+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJQZXJzb25hbCI+S29yZW4sIFRvbWVyPC9jcmVhdG9yTmFtZT4KICAgICAgPGdpdmVuTmFtZT5Ub21lcjwvZ2l2ZW5OYW1lPgogICAgICA8ZmFtaWx5TmFtZT5Lb3JlbjwvZmFtaWx5TmFtZT4KICAgIDwvY3JlYXRvcj4KICA8L2NyZWF0b3JzPgogIDx0aXRsZXM+CiAgICA8dGl0bGU+Um9idXN0IEJpLVRlbXBlcmVkIExvZ2lzdGljIExvc3MgQmFzZWQgb24gQnJlZ21hbiBEaXZlcmdlbmNlczwvdGl0bGU+CiAgPC90aXRsZXM+CiAgPHB1Ymxpc2hlcj5hclhpdjwvcHVibGlzaGVyPgogIDxwdWJsaWNhdGlvblllYXI+MjAxOTwvcHVibGljYXRpb25ZZWFyPgogIDxzdWJqZWN0cz4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPk1hY2hpbmUgTGVhcm5pbmcgKGNzLkxHKTwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiIgc3ViamVjdFNjaGVtZT0iYXJYaXYiPk1hY2hpbmUgTGVhcm5pbmcgKHN0YXQuTUwpPC9zdWJqZWN0PgogICAgPHN1YmplY3Qgc3ViamVjdFNjaGVtZT0iRmllbGRzIG9mIFNjaWVuY2UgYW5kIFRlY2hub2xvZ3kgKEZPUykiPkZPUzogQ29tcHV0ZXIgYW5kIGluZm9ybWF0aW9uIHNjaWVuY2VzPC9zdWJqZWN0PgogIDwvc3ViamVjdHM+CiAgPGRhdGVzPgogICAgPGRhdGUgZGF0ZVR5cGU9IlN1Ym1pdHRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAxOS0wNi0wOFQwMDowODozOFo8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iVXBkYXRlZCIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAxOS0wNi0xMVQwMDowNDoyN1o8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iU3VibWl0dGVkIiBkYXRlSW5mb3JtYXRpb249InYyIj4yMDE5LTA4LTI2VDIxOjEzOjI3WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJVcGRhdGVkIiBkYXRlSW5mb3JtYXRpb249InYyIj4yMDE5LTA4LTI4VDAwOjAzOjM2WjwvZGF0ZT4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjMiPjIwMTktMDktMjNUMTY6MDg6NTRaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiIGRhdGVJbmZvcm1hdGlvbj0idjMiPjIwMTktMDktMjRUMDA6MzE6MThaPC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IkF2YWlsYWJsZSIgZGF0ZUluZm9ybWF0aW9uPSJ2MSI+MjAxOS0wNjwvZGF0ZT4KICA8L2RhdGVzPgogIDxyZXNvdXJjZVR5cGUgcmVzb3VyY2VUeXBlR2VuZXJhbD0iVGV4dCI+QXJ0aWNsZTwvcmVzb3VyY2VUeXBlPgogIDx2ZXJzaW9uPjM8L3ZlcnNpb24+CiAgPHJpZ2h0c0xpc3Q+CiAgICA8cmlnaHRzIHJpZ2h0c1VSST0iaHR0cDovL2FyeGl2Lm9yZy9saWNlbnNlcy9ub25leGNsdXNpdmUtZGlzdHJpYi8xLjAvIj5hclhpdi5vcmcgcGVycGV0dWFsLCBub24tZXhjbHVzaXZlIGxpY2Vuc2U8L3JpZ2h0cz4KICA8L3JpZ2h0c0xpc3Q+CiAgPGRlc2NyaXB0aW9ucz4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9IkFic3RyYWN0Ij5XZSBpbnRyb2R1Y2UgYSB0ZW1wZXJhdHVyZSBpbnRvIHRoZSBleHBvbmVudGlhbCBmdW5jdGlvbiBhbmQgcmVwbGFjZSB0aGUgc29mdG1heCBvdXRwdXQgbGF5ZXIgb2YgbmV1cmFsIG5ldHMgYnkgYSBoaWdoIHRlbXBlcmF0dXJlIGdlbmVyYWxpemF0aW9uLiBTaW1pbGFybHksIHRoZSBsb2dhcml0aG0gaW4gdGhlIGxvZyBsb3NzIHdlIHVzZSBmb3IgdHJhaW5pbmcgaXMgcmVwbGFjZWQgYnkgYSBsb3cgdGVtcGVyYXR1cmUgbG9nYXJpdGhtLiBCeSB0dW5pbmcgdGhlIHR3byB0ZW1wZXJhdHVyZXMgd2UgY3JlYXRlIGxvc3MgZnVuY3Rpb25zIHRoYXQgYXJlIG5vbi1jb252ZXggYWxyZWFkeSBpbiB0aGUgc2luZ2xlIGxheWVyIGNhc2UuIFdoZW4gcmVwbGFjaW5nIHRoZSBsYXN0IGxheWVyIG9mIHRoZSBuZXVyYWwgbmV0cyBieSBvdXIgYmktdGVtcGVyYXR1cmUgZ2VuZXJhbGl6YXRpb24gb2YgbG9naXN0aWMgbG9zcywgdGhlIHRyYWluaW5nIGJlY29tZXMgbW9yZSByb2J1c3QgdG8gbm9pc2UuIFdlIHZpc3VhbGl6ZSB0aGUgZWZmZWN0IG9mIHR1bmluZyB0aGUgdHdvIHRlbXBlcmF0dXJlcyBpbiBhIHNpbXBsZSBzZXR0aW5nIGFuZCBzaG93IHRoZSBlZmZpY2FjeSBvZiBvdXIgbWV0aG9kIG9uIGxhcmdlIGRhdGEgc2V0cy4gT3VyIG1ldGhvZG9sb2d5IGlzIGJhc2VkIG9uIEJyZWdtYW4gZGl2ZXJnZW5jZXMgYW5kIGlzIHN1cGVyaW9yIHRvIGEgcmVsYXRlZCB0d28tdGVtcGVyYXR1cmUgbWV0aG9kIHVzaW5nIHRoZSBUc2FsbGlzIGRpdmVyZ2VuY2UuPC9kZXNjcmlwdGlvbj4KICA8L2Rlc2NyaXB0aW9ucz4KPC9yZXNvdXJjZT4=","url":"https://arxiv.org/abs/1906.03361","contentUrl":null,"metadataVersion":0,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":0,"citationsOverTime":[],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2022-02-28T06:12:58.000Z","registered":"2022-02-28T06:12:59.000Z","published":"2019","updated":"2022-02-28T06:12:59.000Z"},"relationships":{"client":{"data":{"id":"arxiv.content","type":"clients"}},"provider":{"data":{"id":"arxiv","type":"providers"}},"media":{"data":{"id":"10.48550/arxiv.1906.03361","type":"media"}},"references":{"data":[]},"citations":{"data":[]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}}