{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T06:50:41Z","timestamp":1780555841147,"version":"3.54.1"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,16]]},"DOI":"10.1109\/asru57964.2023.10389790","type":"proceedings-article","created":{"date-parts":[[2024,1,19]],"date-time":"2024-01-19T13:38:40Z","timestamp":1705671520000},"page":"1-7","source":"Crossref","is-referenced-by-count":17,"title":["Speech Emotion Diarization: Which Emotion Appears When?"],"prefix":"10.1109","author":[{"given":"Yingzhi","family":"Wang","sequence":"first","affiliation":[{"name":"Zaion Lab,Zaion,France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mirco","family":"Ravanelli","sequence":"additional","affiliation":[{"name":"Mila - Quebec AI Institute,Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alya","family":"Yacoubi","sequence":"additional","affiliation":[{"name":"Zaion Lab,Zaion,France"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2011.5771357"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2936124"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3068045"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2019.12.001"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683163"},{"key":"ref6","article-title":"A fine-tuned wav2vec 2.0\/hubert benchmark for speech emotion recognition, speaker verification and spoken language understanding","volume-title":"arXiv preprint arXiv:2111.02735","author":"Wang"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747460"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.concog.2008.03.019"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1037\/h0077714"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/BF02686918"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2010.2057200"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2008-192"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2944808"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/fg.2013.6553805"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2011.20"},{"key":"ref16","article-title":"Unmasking the face: A guide to recognizing emotions from facial clues","author":"Ekman"},{"key":"ref17","article-title":"Emotion recognition from speech with recurrent neural networks","volume-title":"arXiv preprint arXiv:1701.08071","author":"Chernykh"},{"key":"ref18","article-title":"On fine-grained temporal emotion recognition in video: How to trade off recognition accuracy with annotation complexity?","author":"Zhang"},{"key":"ref19","article-title":"Speechbrain: A general-purpose speech toolkit","volume-title":"arXiv preprint arXiv:2106.04624","author":"Ravanelli"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74889-2_13"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2014-57"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICSPCS.2015.7391796"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1242"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2017.02.013"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0196391"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2005-446"},{"key":"ref29","article-title":"Attention is all you need","volume-title":"Advances in neural information processing systems","volume":"30","author":"Vaswani"},{"key":"ref30","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"NeurIPS","author":"Baevski"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10002"},{"key":"ref34","doi-asserted-by":"crossref","DOI":"10.1037\/0022-3514.53.4.712","article-title":"Universals and cultural differences in facial expressions of emotion","volume-title":"Nebraska symposium on motivation. University of Nebraska Press","author":"Ekman"},{"key":"ref35","article-title":"The emotional voices database: Towards controlling the emotion dimension in voice generation systems","volume-title":"arXiv preprint arXiv:1806.09514","author":"Adigwe"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413391"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1349"}],"event":{"name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Taipei, Taiwan","start":{"date-parts":[[2023,12,16]]},"end":{"date-parts":[[2023,12,20]]}},"container-title":["2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10388490\/10389614\/10389790.pdf?arnumber=10389790","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T11:36:43Z","timestamp":1706009803000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10389790\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,16]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/asru57964.2023.10389790","relation":{},"subject":[],"published":{"date-parts":[[2023,12,16]]}}}