2014 |
Epameinondas Antonakos, Vassilis Pitsikalis, Petros Maragos Classification of extreme facial events in sign language videos Journal Article Eurasip Journal on Image and Video Processing, 2014 , 2014, ISSN: 16875281. @article{143, title = {Classification of extreme facial events in sign language videos}, author = {Epameinondas Antonakos and Vassilis Pitsikalis and Petros Maragos}, doi = {10.1186/1687-5281-2014-14}, issn = {16875281}, year = {2014}, date = {2014-01-01}, journal = {Eurasip Journal on Image and Video Processing}, volume = {2014}, abstract = {We propose a new approach for Extreme States Classification (ESC) on feature spaces of facial cues in sign language (SL) videos. The method is built upon Active Appearance Model (AAM) face tracking and feature extraction of global and local AAMs. ESC is applied on various facial cues-as, for instance, pose rotations, head movements and eye blinking-leading to the detection of extreme states such as left/right, up/down and open/closed. Given the importance of such facial events in SL analysis, we apply ESC to detect visual events on SL videos, including both American (ASL) and Greek (GSL) corpora, yielding promising qualitative and quantitative results. Further, we show the potential of ESC for assistive annotation tools and demonstrate a link of the detections with indicative higher-level linguistic events. Given the lack of facial annotated data and the fact that manual annotations are highly time-consuming, ESC results indicate that the framework can have significant impact on SL processing and analysis. textcopyright 2014 Antonakos et al.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We propose a new approach for Extreme States Classification (ESC) on feature spaces of facial cues in sign language (SL) videos. The method is built upon Active Appearance Model (AAM) face tracking and feature extraction of global and local AAMs. ESC is applied on various facial cues-as, for instance, pose rotations, head movements and eye blinking-leading to the detection of extreme states such as left/right, up/down and open/closed. Given the importance of such facial events in SL analysis, we apply ESC to detect visual events on SL videos, including both American (ASL) and Greek (GSL) corpora, yielding promising qualitative and quantitative results. Further, we show the potential of ESC for assistive annotation tools and demonstrate a link of the detections with indicative higher-level linguistic events. Given the lack of facial annotated data and the fact that manual annotations are highly time-consuming, ESC results indicate that the framework can have significant impact on SL processing and analysis. textcopyright 2014 Antonakos et al. |
2010 |
Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Model-level data-driven sub-units for signs in videos of continuous sign language Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2010, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{191, title = {Model-level data-driven sub-units for signs in videos of continuous sign language}, author = { Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/TheodorakisPitsikalisMaragos_ModelDatadrivenSubunitsSignsVideoContSignLang_ICASSP2010.pdf}, doi = {10.1109/ICASSP.2010.5495875}, issn = {15206149}, year = {2010}, date = {2010-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {2262--2265}, abstract = {We investigate the issue of sign language automatic phonetic sub-unit modeling, that is completely data driven and without any prior phonetic information. A first step of visual processing leads to simple and effective region-based visual features. Prior to the sub-unit modeling we propose to employ a pronunciation clustering step with respect to each sign. Afterwards, for each sign and pronunciation group we find the time segmentation at the hidden Markov model (HMM) level. The models employed refer to movements as a sequence of dominant hand positions. The constructed segments are exploited explicitly at the model level via hierarchical clustering of HMMs and lead to the data-driven movement sub-unit construction. The constructed movement sub-units are evaluated in qualitative analysis experiments on data from the Boston University (BU)-400 American Sign Language corpus showing promising results. textcopyright2010 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We investigate the issue of sign language automatic phonetic sub-unit modeling, that is completely data driven and without any prior phonetic information. A first step of visual processing leads to simple and effective region-based visual features. Prior to the sub-unit modeling we propose to employ a pronunciation clustering step with respect to each sign. Afterwards, for each sign and pronunciation group we find the time segmentation at the hidden Markov model (HMM) level. The models employed refer to movements as a sequence of dominant hand positions. The constructed segments are exploited explicitly at the model level via hierarchical clustering of HMMs and lead to the data-driven movement sub-unit construction. The constructed movement sub-units are evaluated in qualitative analysis experiments on data from the Boston University (BU)-400 American Sign Language corpus showing promising results. textcopyright2010 IEEE. |
2009 |
S.E. Fotinea, Eleni Efthimiou, George Caridakis, Olga Diamanti, N. Mitsou, K. Karpouzis, C. Tzafestas, P. Maragos DIANOEMA: Visual analysis and sign recognition for GSL modelling and robot teleoperation Conference Language and Speech, 2009. @conference{37b, title = {DIANOEMA: Visual analysis and sign recognition for GSL modelling and robot teleoperation}, author = { S.E. Fotinea and Eleni Efthimiou and George Caridakis and Olga Diamanti and N. Mitsou and K. Karpouzis and C. Tzafestas and P. Maragos}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.149.1666&rep=rep1&type=pdf}, year = {2009}, date = {2009-01-01}, booktitle = {Language and Speech}, pages = {2--4}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Copyright Notice:
Some material presented is available for download to ensure timely dissemination of scholarly and technical work. Copyright and all rights therein are retained by authors or by other copyright holders. All persons copying this information are expected to adhere to the terms and constraints invoked by each author’s copyright. In most cases, these works may not be reposted without the explicit permission of the copyright holder.
The work already published by the IEEE is under its copyright. Personal use of such material is permitted. However, permission to reprint/republish the material for advertising or promotional purposes, or for creating new collective works for resale or redistribution to servers or lists, or to reuse any copyrighted component of the work in other works must be obtained from the IEEE.