2014 |
Theodora Chaspari, Dimitrios Dimitriadis, Petros Maragos Emotion classification of speech using modulation features Conference European Signal Processing Conference, 2014, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{170, title = {Emotion classification of speech using modulation features}, author = { Theodora Chaspari and Dimitrios Dimitriadis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ChaspariDimitriadisMaragos_EmotionRecognitionSpeech_EUSIPCO2014_cr.pdf}, issn = {22195491}, year = {2014}, date = {2014-01-01}, booktitle = {European Signal Processing Conference}, pages = {1552--1556}, abstract = {Automatic classification of a speaker's affective state is one of the major challenges in signal processing community, since it can improve Human-Computer interaction and give insights into the nature of emotions from psychology perspective. The amplitude and frequency control of sound production influences strongly the affective voice content. In this paper, we take advantage of the inherent speech modulations and propose the use of instant amplitude- and frequency-derived features for efficient emotion recognition. Our results indicate that these features can further increase the performance of the widely-used spectral-prosodic information, achieving improvements on two emotional databases, the Berlin Database of Emotional Speech and the recently collected Athens Emotional States Inventory.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Automatic classification of a speaker's affective state is one of the major challenges in signal processing community, since it can improve Human-Computer interaction and give insights into the nature of emotions from psychology perspective. The amplitude and frequency control of sound production influences strongly the affective voice content. In this paper, we take advantage of the inherent speech modulations and propose the use of instant amplitude- and frequency-derived features for efficient emotion recognition. Our results indicate that these features can further increase the performance of the widely-used spectral-prosodic information, achieving improvements on two emotional databases, the Berlin Database of Emotional Speech and the recently collected Athens Emotional States Inventory. |
2006 |
Georgios Evangelopoulos, Petros Maragos Multiband modulation energy tracking for noisy speech detection Journal Article IEEE Transactions on Audio, Speech and Language Processing, 14 (6), pp. 2024–2038, 2006, ISSN: 15587916. Abstract | BibTeX | Links: [PDF] @article{122, title = {Multiband modulation energy tracking for noisy speech detection}, author = {Georgios Evangelopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EvangelopoulosMaragos_MultibandModEnergyTrackNoisSpeechDetect_ieeetASLP2006.pdf}, doi = {10.1109/TASL.2006.872625}, issn = {15587916}, year = {2006}, date = {2006-01-01}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {14}, number = {6}, pages = {2024--2038}, abstract = {The ability to accurately locate the boundaries of speech activity is an important attribute of any modern speech recognition, processing, or transmission system. The effort in this paper is the development of efficient, sophisticated features for speech detection in noisy environments, using ideas and techniques from recent advances in speech modeling and analysis, like presence of modulations in speech formants, energy separation and multiband filtering. First we present a method, conceptually based on a classic speech-silence discrimination procedure, that uses some newly developed, short-time signal analysis tools and provide for it a detection theoretic motivation. The new energy and spectral content representations are derived through filtering the signal in various frequency bands, estimating the Teager-Kaiser energy for each and demodulating the most active one in order to derive the signal's dominant AM-FM components. This modulation approach demonstrated an improved robustness in noise over the classic algorithm, reaching an average error reduction of 33.5% under 5-30-dB noise. Second, by incorporating alternative modulation energy features in voice activity detection, improvement in overall misclassification error of a high hit rate detector reached 7.5% and 9.5% on different benchmarks}, keywords = {}, pubstate = {published}, tppubtype = {article} } The ability to accurately locate the boundaries of speech activity is an important attribute of any modern speech recognition, processing, or transmission system. The effort in this paper is the development of efficient, sophisticated features for speech detection in noisy environments, using ideas and techniques from recent advances in speech modeling and analysis, like presence of modulations in speech formants, energy separation and multiband filtering. First we present a method, conceptually based on a classic speech-silence discrimination procedure, that uses some newly developed, short-time signal analysis tools and provide for it a detection theoretic motivation. The new energy and spectral content representations are derived through filtering the signal in various frequency bands, estimating the Teager-Kaiser energy for each and demodulating the most active one in order to derive the signal's dominant AM-FM components. This modulation approach demonstrated an improved robustness in noise over the classic algorithm, reaching an average error reduction of 33.5% under 5-30-dB noise. Second, by incorporating alternative modulation energy features in voice activity detection, improvement in overall misclassification error of a high hit rate detector reached 7.5% and 9.5% on different benchmarks |
2002 |
D Dimitriadis, P Maragos, A Potamianos Modulation features for speech recognition Journal Article International Conference on Acoustics, 1 , pp. I–377–I–380, 2002. @article{76c, title = {Modulation features for speech recognition}, author = {D Dimitriadis and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMaragosPotamianos_RobustAMFM_Features_SpeechRecognition_ieeeSPL2005.pdf}, year = {2002}, date = {2002-01-01}, journal = {International Conference on Acoustics}, volume = {1}, pages = {I--377--I--380}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
D Dimitriadis, P Maragos, A Potamianos Modulation features for speech recognition Conference International Conference on Acoustics, 1 , 2002. @conference{253, title = {Modulation features for speech recognition}, author = { D Dimitriadis and P Maragos and A Potamianos}, year = {2002}, date = {2002-01-01}, booktitle = {International Conference on Acoustics}, volume = {1}, pages = {I--377--I--380}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
V Pitsikalis, P Maragos Speech analysis and feature extraction using chaotic models Conference International Conference on Acoustics, 1 , 2002. @conference{252, title = {Speech analysis and feature extraction using chaotic models}, author = { V Pitsikalis and P Maragos}, year = {2002}, date = {2002-01-01}, booktitle = {International Conference on Acoustics}, volume = {1}, pages = {I--533--I--536 vol.1}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Copyright Notice:
Some material presented is available for download to ensure timely dissemination of scholarly and technical work. Copyright and all rights therein are retained by authors or by other copyright holders. All persons copying this information are expected to adhere to the terms and constraints invoked by each author’s copyright. In most cases, these works may not be reposted without the explicit permission of the copyright holder.
The work already published by the IEEE is under its copyright. Personal use of such material is permitted. However, permission to reprint/republish the material for advertising or promotional purposes, or for creating new collective works for resale or redistribution to servers or lists, or to reuse any copyrighted component of the work in other works must be obtained from the IEEE.