2024 |
L Liapi, E Manoudi, M Revelou, K Christodoulou, P Koutras, P Maragos, Argiro Vatakis Time perception in film viewing: A modulation of scene's duration estimates as a function of film editing Journal Article Acta Psychologica, 244 , pp. 104206, 2024. @article{liapi2024time, title = {Time perception in film viewing: A modulation of scene's duration estimates as a function of film editing}, author = {L Liapi and E Manoudi and M Revelou and K Christodoulou and P Koutras and P Maragos and Argiro Vatakis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2024_LiapiVataki_TimePerceptionInFilmViewing-ModulationOfSceneDuration_ActaPsychol.pdf}, doi = {10.1016/j.actpsy.2024.104206}, year = {2024}, date = {2024-01-01}, journal = {Acta Psychologica}, volume = {244}, pages = {104206}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, P P Filntisis, N Efthymiou, C Garoufis, G Retsinas, T Sounapoglou, I Maglogiannis, P Tsanakas, N Smyrnis, P Maragos Person Identification and Relapse Detection from Continuous Recordings of Biosignals Challenge: Overview and Results Journal Article IEEE Open Journal of Signal Processing, 2024. @article{zlatintsi2024person, title = {Person Identification and Relapse Detection from Continuous Recordings of Biosignals Challenge: Overview and Results}, author = {A Zlatintsi and P P Filntisis and N Efthymiou and C Garoufis and G Retsinas and T Sounapoglou and I Maglogiannis and P Tsanakas and N Smyrnis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Zlatintsi_e-PreventionChallengeOverview_OJSP-2024_preprint.pdf}, doi = {10.1109/OJSP.2024.3376300}, year = {2024}, date = {2024-01-01}, journal = {IEEE Open Journal of Signal Processing}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
George Retsinas, Panagiotis P Filntisis, Radek Danecek, Victoria F Abrevaya, Anastasios Roussos, Timo Bolkart, Petros Maragos 3D Facial Expressions through Analysis-by-Neural-Synthesis Inproceedings Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2490-2501, 2024. @inproceedings{Retsinas2024, title = {3D Facial Expressions through Analysis-by-Neural-Synthesis}, author = {George Retsinas and Panagiotis P Filntisis and Radek Danecek and Victoria F Abrevaya and Anastasios Roussos and Timo Bolkart and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Retsinas_SMIRK-3D_Facial_Expressions_through_Analysis-by-Neural-Synthesis_CVPR2024.pdf}, year = {2024}, date = {2024-06-17}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, pages = {2490-2501}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
I Kordonis, E Theodosis, G Retsinas, P Maragos Matrix Factorization in Tropical and Mixed Tropical-Linear Algebras Inproceedings Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seoul, Korea, 2024. @inproceedings{Kordonis2024, title = {Matrix Factorization in Tropical and Mixed Tropical-Linear Algebras}, author = {I Kordonis and E Theodosis and G Retsinas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Kordonis_MatrixFactorizationInMixedTropicalLinearAlegbras_ICASSP2024.pdf}, year = {2024}, date = {2024-04-01}, booktitle = {Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)}, address = {Seoul, Korea}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
N Efthymiou, G Retsinas, P P Filntisis, P Maragos Augmenting Transformer Autoencoders with Phenotype Classification for Robust Detection of Psychotic Relapses Inproceedings Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seoul, Korea, 2024. @inproceedings{Efthymiou2024, title = {Augmenting Transformer Autoencoders with Phenotype Classification for Robust Detection of Psychotic Relapses}, author = {N Efthymiou and G Retsinas and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Efthymiou_TransformerPhenotypeClassif-DetectPsychoticRelapses_ICASSP2024.pdf}, year = {2024}, date = {2024-04-01}, booktitle = {Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)}, address = {Seoul, Korea}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2023 |
K Papadimitriou, G Potamianos, G Sapountzaki, T Goulas, E Efthimiou, S ‐E Fotinea, P Maragos Greek sign language recognition for an education platform Journal Article Universal Access in the Information Society, 2023. @article{papadimitriou2023greek, title = {Greek sign language recognition for an education platform}, author = {K Papadimitriou and G Potamianos and G Sapountzaki and T Goulas and E Efthimiou and S ‐E Fotinea and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2023_Papadimitriou_GreekSignLangRecognForEducation_UAIS.pdf}, doi = {10.1007/s10209-023-01017-7}, year = {2023}, date = {2023-01-01}, journal = {Universal Access in the Information Society}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
G Retsinas, N Efthymiou, D Anagnostopoulou, P Maragos Mushroom Detection and Three Dimensional Pose Estimation from Multi-View Point Clouds Journal Article Sensors, 23 (7), pp. 3576, 2023. @article{retsinas2023mushroom, title = {Mushroom Detection and Three Dimensional Pose Estimation from Multi-View Point Clouds}, author = {G Retsinas and N Efthymiou and D Anagnostopoulou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2023_Retsinas_MushromDetection-and-3D-PoseEstimation_Sensors.pdf}, doi = {10.3390/s23073576}, year = {2023}, date = {2023-01-01}, journal = {Sensors}, volume = {23}, number = {7}, pages = {3576}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
E Kalisperakis, T Karantinos, M Lazaridi, V Garyfalli, P P Filntisis, A Zlatintsi, N Efthymiou, A Mantas, L Mantonakis, T Mougiakos, I Maglogiannis, P Tsanakas, P Maragos, N Smyrnis Smartwatch digital phenotypes predict positive and negative symptom variation in a longitudinal monitoring study of patients with psychotic disorders Journal Article Frontiers in Psychiatry, 14 , 2023. @article{kalisperakis2023smartwatch, title = {Smartwatch digital phenotypes predict positive and negative symptom variation in a longitudinal monitoring study of patients with psychotic disorders}, author = {E Kalisperakis and T Karantinos and M Lazaridi and V Garyfalli and P P Filntisis and A Zlatintsi and N Efthymiou and A Mantas and L Mantonakis and T Mougiakos and I Maglogiannis and P Tsanakas and P Maragos and N Smyrnis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2023_KalisperakisEtAl_ePrevention_FrontiersPsychiatry.pdf}, doi = {10.3389/fpsyt.2023.1024965}, year = {2023}, date = {2023-01-01}, journal = {Frontiers in Psychiatry}, volume = {14}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
C Garoufis, A Zlatintsi, P Maragos Multi-Source Contrastive Learning from Musical Audio Conference Proc. 20th Sound and Music Computing Conference (SMC 2023), Stockholm, Sweden, 2023. BibTeX | Links: [PDF] [Poster] [Slides] @conference{garoufis2023multi, title = {Multi-Source Contrastive Learning from Musical Audio}, author = {C Garoufis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_SMC2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_SMC2023_poster.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_SMC2023_slides.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 20th Sound and Music Computing Conference (SMC 2023)}, address = {Stockholm, Sweden}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D Charitou, C Garoufis, A Zlatintsi, P Maragos Exploring Polyphonic Accompaniment Generation using Generative Adversarial Networks Conference Proc. 20th Sound and Music Computing Conference (SMC 2023), Stockholm, Sweden, 2023. BibTeX | Links: [PDF] [Slides] [Poster] @conference{charitou2023exploring, title = {Exploring Polyphonic Accompaniment Generation using Generative Adversarial Networks}, author = {D Charitou and C Garoufis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Charitou_SMC2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Charitou_SMC2023_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Charitou_SMC2023_poster.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 20th Sound and Music Computing Conference (SMC 2023)}, address = {Stockholm, Sweden}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
N Efthymiou, G Retsinas, P P Filntisis, A Zlatintsi, E Kalisperakis, V Garyfalli, T Karantinos, M Lazaridi, N Smyrnis, P Maragos From Digital Phenotype Identification To Detection Of Psychotic Relapses Conference Proc. IEEE International Conference on Healthcare Informatics, Houston, TX, USA, 2023. BibTeX | Links: [PDF] [Slides] @conference{efthymiou2023digital, title = {From Digital Phenotype Identification To Detection Of Psychotic Relapses}, author = {N Efthymiou and G Retsinas and P P Filntisis and A Zlatintsi and E Kalisperakis and V Garyfalli and T Karantinos and M Lazaridi and N Smyrnis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Efthymiou_ICHI2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Efthymiou_ICHI2023_slides.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. IEEE International Conference on Healthcare Informatics}, address = {Houston, TX, USA}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D Anagnostopoulou, G Retsinas, N Efthymiou, P P Filntisis, P Maragos A Realistic Synthetic Mushroom Scenes Dataset Conference Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop, Vancouver, Canada, 2023. BibTeX | Links: [PDF] [Poster] @conference{anagnostopoulou2023realistic, title = {A Realistic Synthetic Mushroom Scenes Dataset}, author = {D Anagnostopoulou and G Retsinas and N Efthymiou and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_CVPRW2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_CVPRW2023_poster.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop}, address = {Vancouver, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Retsinas, N Efthymiou, P Maragos Mushroom Segmentation and 3D Pose Estimation From Point Clouds Using Fully Convolutional Geometric Features and Implicit Pose Encoding Conference Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop, Vancouver, Canada, 2023. @conference{retsinas2023mushroomb, title = {Mushroom Segmentation and 3D Pose Estimation From Point Clouds Using Fully Convolutional Geometric Features and Implicit Pose Encoding}, author = {G Retsinas and N Efthymiou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Retsinas_CVPRW2023_Mushroom_Segmentation_and_3D_Pose_Estimation_From_Point_Clouds_Using_paper.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop}, address = {Vancouver, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P P Filntisis, G Retsinas, F Paraperas-Papantoniou, A Katsamanis, A Roussos, P Maragos SPECTRE: Visual Speech-Informed Perceptual 3D Facial Expression Reconstruction from Videos Conference Proc. 2023 IEEE/CVF Conf. on Computer Vision and Pattern Recognition (CVPR), 5th Workshop and Competition on Affective Behavior Analysis in-the-wild (ABAW), Vancouver, Canada, 2023. @conference{filntisis2023spectre, title = {SPECTRE: Visual Speech-Informed Perceptual 3D Facial Expression Reconstruction from Videos}, author = {P P Filntisis and G Retsinas and F Paraperas-Papantoniou and A Katsamanis and A Roussos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Filntisis_CVPRW2023_SPECTRE_Visual_Speech-Informed_Perceptual_3D_Facial_Expression_Reconstruction_From_Videos_paper.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 2023 IEEE/CVF Conf. on Computer Vision and Pattern Recognition (CVPR), 5th Workshop and Competition on Affective Behavior Analysis in-the-wild (ABAW)}, address = {Vancouver, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C O Tze, P P Filntisis, A -L Dimou, A Roussos, P Maragos Neural Sign Reenactor: Deep Photorealistic Sign Language Retargeting Conference Proc. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), AI for Content Creation Workshop (AI4CC), Vancouver, Canada, 2023. BibTeX | Links: [PDF] [Poster] @conference{tze2023neural, title = {Neural Sign Reenactor: Deep Photorealistic Sign Language Retargeting}, author = {C O Tze and P P Filntisis and A -L Dimou and A Roussos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Tze_CVPRW2023_Neural_Sign_Reenactor_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Tze_CVPRW2023_Neural_Sign_Reenactor_Poster.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), AI for Content Creation Workshop (AI4CC)}, address = {Vancouver, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Retsinas, G Sfikas, P P Filntisis, P Maragos Newton-based Trainable Learning Rate Conference Proc. 48th IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2023), Rhodes, Greece, 2023. @conference{retsinas2023newton, title = {Newton-based Trainable Learning Rate}, author = {G Retsinas and G Sfikas and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Retsinas_ICASSP2023_Newton-Based-Trainable-Learning-Rate.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 48th IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2023)}, address = {Rhodes, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
E Fekas, A Zlatintsi, P P Filntisis, C Garoufis, N Efthymiou, P Maragos Relapse Prediction from Long-Term Wearable Data using Self-Supervised Learning and Survival Analysis Conference Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023), Rhodes Island, 2023. BibTeX | Links: [PDF] [Slides] @conference{fekas2023relapse, title = {Relapse Prediction from Long-Term Wearable Data using Self-Supervised Learning and Survival Analysis}, author = {E Fekas and A Zlatintsi and P P Filntisis and C Garoufis and N Efthymiou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Fekas_ICASSP2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Fekas_ICASSP2023_slides.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023)}, address = {Rhodes Island}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D N Makropoulos, A Tsiami, A Prospathopoulos, D Kassis, A Frantzzis, E Skarsoulis, G Piperakis, P Maragos Convolutional Recurrent Neural Networks for the Classification of Cetacean Bioacoustic Patterns Conference Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023), Rhodes Island, 2023. BibTeX | Links: [PDF] [Poster] @conference{makropoulos2023convolutional, title = {Convolutional Recurrent Neural Networks for the Classification of Cetacean Bioacoustic Patterns}, author = {D N Makropoulos and A Tsiami and A Prospathopoulos and D Kassis and A Frantzzis and E Skarsoulis and G Piperakis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Makropoulos_ICASSP2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Makropoulos_ICASSP2023_poster.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023)}, address = {Rhodes Island}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Tzathas, P Maragos, A Roussos 3D Neural Sculpting (3DNS): Editing Neural Signed Distance Functions Conference Proc. IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 2023. @conference{tzathas2023neural, title = {3D Neural Sculpting (3DNS): Editing Neural Signed Distance Functions}, author = {P Tzathas and P Maragos and A Roussos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Tzathas_3D_Neural_Sculpting_3DNS_Editing_Neural_Signed_Distance_Functions_WACV2023.pdf}, year = {2023}, date = {2023-01-01}, booktitle = {Proc. IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
E Tsaprazlis, G Smyrnis, A G Dimakis, P Maragos Enhancing CLIP with a Third Modality Inproceedings Proc. 37th Conference on Neural Information Processing Systems (NeurIPS 2023): Workshop on Self-Supervised Learning - Theory and Practice, New Orleans, 2023. @inproceedings{Tsaprazlis2023, title = {Enhancing CLIP with a Third Modality}, author = {E Tsaprazlis and G Smyrnis and A G Dimakis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/TsaprazlisEtAl_Enhance-CLIP-with-ThirdModality_NIPSW2023.pdf}, year = {2023}, date = {2023-12-01}, booktitle = {Proc. 37th Conference on Neural Information Processing Systems (NeurIPS 2023): Workshop on Self-Supervised Learning - Theory and Practice}, address = {New Orleans}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
A Glentis-Georgoulakis, G Retsinas, P Maragos Feather: An Elegant Solution to Effective DNN Sparsification Inproceedings Proc. 34th Bristish Machine Vision Conference (BMVC 2023), Aberdeen, UK, 2023. @inproceedings{Glentis-Georgoulakis2023, title = {Feather: An Elegant Solution to Effective DNN Sparsification}, author = {A Glentis-Georgoulakis and G Retsinas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/GlentisEtAl_Feather-EffectiveSolution-to-DNN-Sparsification_BMVC2023.pdf}, year = {2023}, date = {2023-11-01}, booktitle = {Proc. 34th Bristish Machine Vision Conference (BMVC 2023)}, address = {Aberdeen, UK}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
M Konstantinou, G Retsinas, P Maragos Enhancing Action Recognition in Vehicle Environments With Human Pose Information Inproceedings Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023), 2023. @inproceedings{Konstantinou2023, title = {Enhancing Action Recognition in Vehicle Environments With Human Pose Information}, author = {M Konstantinou and G Retsinas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Konstantinou_ActionRecogn-in-VehicleEnvironment_PETRA2023.pdf}, year = {2023}, date = {2023-07-01}, booktitle = {Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
N Kegkeroglou, P P Filntisis, P Maragos Medical Face Masks and Emotion Recognition from the Body: Insights from a Deep Learning Perspective Inproceedings Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023), 2023. @inproceedings{Kegkeroglou2023, title = {Medical Face Masks and Emotion Recognition from the Body: Insights from a Deep Learning Perspective}, author = {N Kegkeroglou and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Kegkeroglou_MedicalFaceMasks-EmotionRecognFromBody_PETRA2023.pdf}, year = {2023}, date = {2023-07-01}, booktitle = {Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2022 |
R Dromnelle, E Renaudo, M Chetouani, P Maragos, R Chatila, B Girard, M Khamassi Reducing Computational Cost During Robot Navigation and Human-Robot Interaction with a Human-Inspired Reinforcement Learning Architectures Journal Article International Journal of Social Robotics, 2022. @article{dromnelle2022reducing, title = {Reducing Computational Cost During Robot Navigation and Human-Robot Interaction with a Human-Inspired Reinforcement Learning Architectures}, author = {R Dromnelle and E Renaudo and M Chetouani and P Maragos and R Chatila and B Girard and M Khamassi}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_DromnelleEtAl_RL-ReduceComputationRobotNavigation-HRI_IJSR.pdf}, doi = {10.1007/s12369-022-00942-6}, year = {2022}, date = {2022-01-01}, journal = {International Journal of Social Robotics}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, P P Filntisis, C Garoufis, N Efthymiou, P Maragos, A Manychtas, I Maglogiannis, P Tsanakas, T Sounapoglou, E Kalisperakis, T Karantinos, M Lazaridi, V Garyfali, A Mantas, L Mantonakis, N Smyrnis e-Prevention: Advanced Support System for Monitoring and Relapse Prevention in Patients with Psychotic Disorders Analysing Long-Term Multimodal Data from Wearables and Video Captures Journal Article Sensors, 22 (19), pp. 7544, 2022. @article{zlatintsi2022e-prevention, title = {e-Prevention: Advanced Support System for Monitoring and Relapse Prevention in Patients with Psychotic Disorders Analysing Long-Term Multimodal Data from Wearables and Video Captures}, author = {A Zlatintsi and P P Filntisis and C Garoufis and N Efthymiou and P Maragos and A Manychtas and I Maglogiannis and P Tsanakas and T Sounapoglou and E Kalisperakis and T Karantinos and M Lazaridi and V Garyfali and A Mantas and L Mantonakis and N Smyrnis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_ZlatintsiEtAl_EPrevention_SENSORS2022.pdf}, doi = {10.3390/s22197544}, year = {2022}, date = {2022-01-01}, journal = {Sensors}, volume = {22}, number = {19}, pages = {7544}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
N Tsilivis, A Tsiamis, P Maragos Toward a Sparsity Theory on Weighted Lattices Journal Article Journal of Mathematical Imaging and Vision, 2022. @article{tsilivis2022toward, title = {Toward a Sparsity Theory on Weighted Lattices}, author = {N Tsilivis and A Tsiamis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_TsilivisTsiamisMaragos_SparsityTheoryOnWeightedLattices_JMIV.pdf}, doi = {10.1007/s10851-022-01075-1}, year = {2022}, date = {2022-01-01}, journal = {Journal of Mathematical Imaging and Vision}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
N Efthymiou, P P Filntisis, P Koutras, A Tsiami, J Hadfield, G Potamianos, P Maragos ChildBot: Multi-robot perception and interaction with children Journal Article Robotics and Autonomous Systems, 150 , pp. 103975, 2022. @article{efthymiou2022childbot, title = {ChildBot: Multi-robot perception and interaction with children}, author = {N Efthymiou and P P Filntisis and P Koutras and A Tsiami and J Hadfield and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_EfthymiouEtAl_ChildBot-MultiRobotPerception-InteractionChildren_RAS.pdf}, doi = {10.1016/j.robot.2021.103975}, year = {2022}, date = {2022-01-01}, journal = {Robotics and Autonomous Systems}, volume = {150}, pages = {103975}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
D Anagnostopoulou, N Efthymiou, C Papailiou, P Maragos Child Engagement Estimation in Heterogeneous Child-Robot Interactions Using Spatiotemporal Visual Cues Conference Proc. 2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2022), Kyoto, Japan, 2022. @conference{anagnostopoulou2022child, title = {Child Engagement Estimation in Heterogeneous Child-Robot Interactions Using Spatiotemporal Visual Cues}, author = {D Anagnostopoulou and N Efthymiou and C Papailiou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_IROS2022_paper.pdf}, year = {2022}, date = {2022-10-01}, booktitle = {Proc. 2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2022)}, address = {Kyoto, Japan}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C Garoufis, A Zlatintsi, P P Filntisis, N Efthymiou, E Kalisperakis, T Karantinos, V Garyfalli, M Lazaridi, N Smyrnis, P Maragos Towards Unsupervised Subject-Independent Speech-Based Relapse Detection in Patients with Psychosis using Variational Autoencoders Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. BibTeX | Links: [PDF] [Slides] @conference{garoufis2022towards, title = {Towards Unsupervised Subject-Independent Speech-Based Relapse Detection in Patients with Psychosis using Variational Autoencoders}, author = {C Garoufis and A Zlatintsi and P P Filntisis and N Efthymiou and E Kalisperakis and T Karantinos and V Garyfalli and M Lazaridi and N Smyrnis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_GaroufisEtAl_UnsupervisedSpeechBasedRelapseDetectionVAES_EUSIPCO2022.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_SubjectIndependentRelapseDetectionAudioVAEs_EUSIPCO22_slides.pdf}, year = {2022}, date = {2022-09-01}, booktitle = {Proc. 30th European Signal Processing Conference (EUSIPCO)}, address = {Belgrade, Serbia}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
M Panagiotou, A Zlatintsi, P P Filntisis, A J Roumeliotis, N Efthymiou, P Maragos A Comparative Study of Autoencoder Architectures for Mental Health Analysis using Wearable Sensors Data Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. BibTeX | Links: [PDF] [Slides] @conference{panagiotou2022comparative, title = {A Comparative Study of Autoencoder Architectures for Mental Health Analysis using Wearable Sensors Data}, author = {M Panagiotou and A Zlatintsi and P P Filntisis and A J Roumeliotis and N Efthymiou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_PanagiotouEtAl_ComStudyAutoencodersMentalHealthWearables_EUSIPCO2022.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Panagiotou_EUSIPCO2022_Presentation_slides.pdf}, year = {2022}, date = {2022-09-01}, booktitle = {Proc. 30th European Signal Processing Conference (EUSIPCO)}, address = {Belgrade, Serbia}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Papantonakis, C Garoufis, P Maragos Multi-band Masking for Waveform-based Singing Voice Separation Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. BibTeX | Links: [PDF] [Poster] @conference{papantonakis2022multi, title = {Multi-band Masking for Waveform-based Singing Voice Separation}, author = {P Papantonakis and C Garoufis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Papantonakis_MultibandMaskingSVS_EUSIPCO22_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Papantonakis_MultibandMaskingSVS_EUSIPCO22_Poster.pdf}, year = {2022}, date = {2022-08-01}, booktitle = {Proc. 30th European Signal Processing Conference (EUSIPCO)}, address = {Belgrade, Serbia}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I Asmanis, P Mermigkas, G Chalvatzaki, J Peters, P Maragos A Semantic Enhancement of Unified Geometric Representations for Improving Indoor Visual SLAM Conference Proc. 19th Int'l Conf. on Ubiquitous Robots (UR 2022), Jeju, Korea, 2022. @conference{asmanis2022semantic, title = {A Semantic Enhancement of Unified Geometric Representations for Improving Indoor Visual SLAM}, author = {I Asmanis and P Mermigkas and G Chalvatzaki and J Peters and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_AsmanisMermigkas_SemanticEnhanceGeomRepres-IndoorVisualSLAM_UR.pdf}, year = {2022}, date = {2022-07-01}, booktitle = {Proc. 19th Int'l Conf. on Ubiquitous Robots (UR 2022)}, address = {Jeju, Korea}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Retsinas, P Filntisis, N Kardaris, P Maragos Attribute-based Gesture Recognition: Generalization to Unseen Classes Conference Proc. 14th Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022), Nafplio, Greece, 2022. @conference{retsinas2022attribute, title = {Attribute-based Gesture Recognition: Generalization to Unseen Classes}, author = {G Retsinas and P Filntisis and N Kardaris and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Retsinas_IVMSP2022_paper.pdf}, year = {2022}, date = {2022-06-01}, booktitle = {Proc. 14th Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022)}, address = {Nafplio, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C O Tze, P Filntisis, A Roussos, P Maragos Cartoonized Anonymization of Sign Language Videos Conference Proc. 14th IEEE Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022), Nafplio, Greece, 2022. @conference{tze2022cartoonized, title = {Cartoonized Anonymization of Sign Language Videos}, author = {C O Tze and P Filntisis and A Roussos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Tze_IVMSP2022_Cartoonized-Anonymization-Sign-Videos_paper.pdf}, year = {2022}, date = {2022-06-01}, booktitle = {Proc. 14th IEEE Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022)}, address = {Nafplio, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
F Paraperas-Papantoniou, P P Filntisis, P Maragos, A Roussos Neural Emotion Director: Speech-preserving semantic control of facial expressions in “in-the-wild” videos Conference Proc. 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, USA, 2022, (CVPR-2022 Best Paper Finalist). BibTeX | Links: [PDF] [Poster] [Supp] @conference{paraperas2022neural, title = {Neural Emotion Director: Speech-preserving semantic control of facial expressions in “in-the-wild” videos}, author = {F Paraperas-Papantoniou and P P Filntisis and P Maragos and A Roussos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Paraperas_NED-SpeechPreservingSemanticControlFacialExpressions_CVPR2022_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Paraperas_cvpr2022_NED_poster.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Paraperas_NED_CVPR2022_supplemental-material.pdf}, year = {2022}, date = {2022-06-01}, booktitle = {Proc. 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, address = {New Orleans, USA}, note = {CVPR-2022 Best Paper Finalist}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
K Avramidis, C Garoufis, A Zlatintsi, P Maragos Enhancing Affective Representations of Music-Induced EEG through Multimodal Supervision and Latent Domain Adaptation Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. BibTeX | Links: [PDF] [Poster] @conference{avramidis2022enhancing, title = {Enhancing Affective Representations of Music-Induced EEG through Multimodal Supervision and Latent Domain Adaptation}, author = {K Avramidis and C Garoufis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_MusicEEGCrossModal_ICASSP22_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_ICASSP2022-poster.pdf}, year = {2022}, date = {2022-05-01}, booktitle = {Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Bastas, S Koutoupis, M K.-Papakostas, V Katsouros, P Maragos A Few-sample Strategy for Guitar Tablature Transcription Based on Inharmonicity Analysis and Playability Constraints Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. @conference{bastas2022few, title = {A Few-sample Strategy for Guitar Tablature Transcription Based on Inharmonicity Analysis and Playability Constraints}, author = {G Bastas and S Koutoupis and M K.-Papakostas and V Katsouros and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/BastasKoutoupis_TablatureTranscription_ICASSP22_Paper.pdf}, year = {2022}, date = {2022-05-01}, booktitle = {Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
M Parelli, K Papadimitriou, G Potamianos, G Pavlakos, P Maragos Spatio-Temporal Graph Convolutional Networks for Continuous Sign Language Recognition Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. @conference{parelli2022spatio, title = {Spatio-Temporal Graph Convolutional Networks for Continuous Sign Language Recognition}, author = {M Parelli and K Papadimitriou and G Potamianos and G Pavlakos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ParelliPapadimitriou_SignLanguageRecognitionGCNNs_ICASSP22_Paper.pdf}, year = {2022}, date = {2022-05-01}, booktitle = {Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Misiakos, G Smyrnis, G Retsinas, P Maragos Neural Network Approximation based on Hausdorff Distance of Tropical Zonotopes Conference Proc. Int’l Conf. on Learning Representations (ICLR 2022), 2022. BibTeX | Links: [PDF] [Poster] [Slides] @conference{misiakos2022neural, title = {Neural Network Approximation based on Hausdorff Distance of Tropical Zonotopes}, author = {P Misiakos and G Smyrnis and G Retsinas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Misiakos_ICLR2022_TropicalGeometry_paper.pdf https://iclr.cc/virtual/2022/poster/5971 http://robotics.ntua.gr/wp-content/uploads/sites/2/Misiakos_ICLR2022_TropicalGeometry_slides.pdf}, year = {2022}, date = {2022-01-01}, booktitle = {Proc. Int’l Conf. on Learning Representations (ICLR 2022)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2021 |
Petros Maragos, Vasileios Charisopoulos, Emmanouil Theodosis Tropical Geometry and Machine Learning Journal Article Proceedings of the IEEE, 109 (5), pp. 728-755, 2021. @article{MCT21, title = {Tropical Geometry and Machine Learning}, author = {Petros Maragos and Vasileios Charisopoulos and Emmanouil Theodosis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/MaragosCharisopoulosTheodosis_TGML_PIEEE20211.pdf}, doi = {10.1109/JPROC.2021.3065238}, year = {2021}, date = {2021-12-31}, journal = {Proceedings of the IEEE}, volume = {109}, number = {5}, pages = {728-755}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
George Moustris, Nikolaos Kardaris, Antigoni Tsiami, Georgia Chalvatzaki, Petros Koutras, Athanasios Dometios, Paris Oikonomou, Costas Tzafestas, Petros Maragos, Eleni Efthimiou, Xanthi Papageorgiou, Stavroula-Evita Fotinea, Yiannis Koumpouros, Anna Vacalopoulou, Effie Papageorgiou, Alexandra Karavasili, Foteini Koureta, Dimitris Dimou, Alexandros Nikolakakis, Konstantinos Karaiskos, Panagiotis Mavridis The i-Walk Lightweight Assistive Rollator: First Evaluation Study Journal Article Frontiers in Robotics and AI, 8 , pp. 272, 2021, ISSN: 2296-9144. Abstract | BibTeX | Links: [PDF] @article{10.3389/frobt.2021.677542, title = {The i-Walk Lightweight Assistive Rollator: First Evaluation Study}, author = {George Moustris and Nikolaos Kardaris and Antigoni Tsiami and Georgia Chalvatzaki and Petros Koutras and Athanasios Dometios and Paris Oikonomou and Costas Tzafestas and Petros Maragos and Eleni Efthimiou and Xanthi Papageorgiou and Stavroula-Evita Fotinea and Yiannis Koumpouros and Anna Vacalopoulou and Effie Papageorgiou and Alexandra Karavasili and Foteini Koureta and Dimitris Dimou and Alexandros Nikolakakis and Konstantinos Karaiskos and Panagiotis Mavridis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/frobt-08-677542.pdf}, doi = {10.3389/frobt.2021.677542}, issn = {2296-9144}, year = {2021}, date = {2021-12-30}, journal = {Frontiers in Robotics and AI}, volume = {8}, pages = {272}, abstract = {Robots can play a significant role as assistive devices for people with movement impairment and mild cognitive deficit. In this paper we present an overview of the lightweight i-Walk intelligent robotic rollator, which offers cognitive and mobility assistance to the elderly and to people with light to moderate mobility impairment. The utility, usability, safety and technical performance of the device is investigated through a clinical study, which took place at a rehabilitation center in Greece involving real patients with mild to moderate cognitive and mobility impairment. This first evaluation study comprised a set of scenarios in a number of pre-defined use cases, including physical rehabilitation exercises, as well as mobility and ambulation involved in typical daily living activities of the patients. The design and implementation of this study is discussed in detail, along with the obtained results, which include both an objective and a subjective evaluation of the system operation, based on a set of technical performance measures and a validated questionnaire for the analysis of qualitative data, respectively. The study shows that the technical modules performed satisfactory under real conditions, and that the users generally hold very positive views of the platform, considering it safe and reliable.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Robots can play a significant role as assistive devices for people with movement impairment and mild cognitive deficit. In this paper we present an overview of the lightweight i-Walk intelligent robotic rollator, which offers cognitive and mobility assistance to the elderly and to people with light to moderate mobility impairment. The utility, usability, safety and technical performance of the device is investigated through a clinical study, which took place at a rehabilitation center in Greece involving real patients with mild to moderate cognitive and mobility impairment. This first evaluation study comprised a set of scenarios in a number of pre-defined use cases, including physical rehabilitation exercises, as well as mobility and ambulation involved in typical daily living activities of the patients. The design and implementation of this study is discussed in detail, along with the obtained results, which include both an objective and a subjective evaluation of the system operation, based on a set of technical performance measures and a validated questionnaire for the analysis of qualitative data, respectively. The study shows that the technical modules performed satisfactory under real conditions, and that the users generally hold very positive views of the platform, considering it safe and reliable. |
Nikos Melanitis, Petros Maragos A linear method for camera pair self-calibration Journal Article Computer Vision and Image Understanding, 210 , pp. 103223, 2021. @article{MeMa21, title = {A linear method for camera pair self-calibration}, author = {Nikos Melanitis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_MelanitisMaragos_LinearCameraSelfPairCalibration_CVIU.pdf}, doi = {https://doi.org/10.1016/j.cviu.2021.103223}, year = {2021}, date = {2021-09-01}, journal = {Computer Vision and Image Understanding}, volume = {210}, pages = {103223}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
N Efthymiou, P P Filntisis, G Potamianos, P Maragos Visual Robotic Perception System with Incremental Learning for Child–Robot Interaction Scenarios Journal Article Technologies, 9 (4), pp. 86, 2021. @article{efthymiou2021visual, title = {Visual Robotic Perception System with Incremental Learning for Child–Robot Interaction Scenarios}, author = {N Efthymiou and P P Filntisis and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_EfthymiouEtAl_VisualRobotPerceptionSystem-ChildRobotInteract_Technologies-1.pdf}, doi = {10.3390/technologies9040086}, year = {2021}, date = {2021-01-01}, journal = {Technologies}, volume = {9}, number = {4}, pages = {86}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
M. Diomataris, N. Gkanatsios, V. Pitsikalis, P. Maragos Grounding Consistency: Distilling Spatial Common Sense for Precise Visual Relationship Detection Conference Proceedings of International Conference on Computer Vision (ICCV-2021), 2021. @conference{Diomataris2021, title = {Grounding Consistency: Distilling Spatial Common Sense for Precise Visual Relationship Detection}, author = {M. Diomataris, N. Gkanatsios, V. Pitsikalis and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/DiomatarisEtAl_GroundingConsistency-VisualRelationsDetection_ICCV2021.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/DiomatarisEtAl_GroundingConsistency-VisualRelationsDetection_ICCV2021_supp.pdf}, year = {2021}, date = {2021-12-31}, booktitle = {Proceedings of International Conference on Computer Vision (ICCV-2021)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Antoniadis, P P Filntisis, P Maragos Exploiting Emotional Dependencies with Graph Convolutional Networks for Facial Expression Recognition Conference Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021), 2021. @conference{Antoniadis2021, title = {Exploiting Emotional Dependencies with Graph Convolutional Networks for Facial Expression Recognition}, author = {P Antoniadis and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_AntoniadisEtAl_Emotion-GCN-FacialExpressionRecogn_FG-1.pdf}, year = {2021}, date = {2021-12-01}, booktitle = {Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I Pikoulis, P P Filntisis, P Maragos Leveraging Semantic Scene Characteristics and Multi-Stream Convolutional Architectures in a Contextual Approach for Video-Based Visual Emotion Recognition in the Wild Conference Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021), 2021. @conference{Pikoulis2021, title = {Leveraging Semantic Scene Characteristics and Multi-Stream Convolutional Architectures in a Contextual Approach for Video-Based Visual Emotion Recognition in the Wild}, author = {I Pikoulis and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_PikoulisEtAl_VideoEmotionRecognInTheWild-SemanticMultiStreamContext_FG-1.pdf}, year = {2021}, date = {2021-12-01}, booktitle = {Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
V. Vasileiou, N. Kardaris, P. Maragos Exploring Temporal Context and Human Movement Dynamics for Online Action Detection in Videos Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [PDF] [Slides] @conference{Vasileiou2021, title = {Exploring Temporal Context and Human Movement Dynamics for Online Action Detection in Videos}, author = {V. Vasileiou, N. Kardaris and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Vasileiou_EUSIPCO21_Enhancing_temporal_context_for_online_action_detection_in_videos_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Vasileiou_EUSIPCO21_presentation_slides.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. P. Filntisis, N. Efthymiou, G. Potamianos,, P. Maragos An Audiovisual Child Emotion Recognition System for Child-Robot Interaction Applications Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [Slides] [PDF] @conference{Filntisis2021, title = {An Audiovisual Child Emotion Recognition System for Child-Robot Interaction Applications}, author = {P. P. Filntisis, N. Efthymiou, G. Potamianos, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Filntisis_EUSIPCO2021_ChildEmotionRecogn_presentation_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_FilntisisEtAl_AV-ChildEmotionRecognSystem-ChildRobotInteract_EUSIPCO.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Garoufis, A. Zlatintsi,, P. Maragos HTMD-NET: A Hybrid Masking-Denoising Approach to Time-Domain Monaural Singing Voice Separation Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [Slides] [PDF] @conference{Garoufis2021, title = {HTMD-NET: A Hybrid Masking-Denoising Approach to Time-Domain Monaural Singing Voice Separation}, author = {C. Garoufis, A. Zlatintsi, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_EUSIPCO2021_HTMDNet_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_EUSIPCO2021_HTMDNet1_Paper.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
K. Avramidis, A. Zlatintsi, C. Garoufis,, P. Maragos Multiscale Fractal Analysis on EEG Signals for Music-Induced Emotion Recognition Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [Slides] [PDF] @conference{Avramidis2021, title = {Multiscale Fractal Analysis on EEG Signals for Music-Induced Emotion Recognition}, author = {K. Avramidis, A. Zlatintsi, C. Garoufis, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_EUSIPCO2021_MFA-EEG-MusicEmotion_presentation_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_AvramidisEtAl_MFA-EEG_MusicEmotionRecogn_EUSIPCO.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Giannoulis, G. Potamianos,, P. Maragos Overlapped Sound Event Classification via Multi- Channel Sound Separation Network Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [PDF] [Slides] @conference{Giannoulis2021, title = {Overlapped Sound Event Classification via Multi- Channel Sound Separation Network}, author = {P. Giannoulis, G. Potamianos, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Giannoulis_EUSIPCO21_OverlapSoundEventClassif_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Giannoulis_EUSIPCO21_presentation_slides.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Garoufis, A. Zlatintsi, P. P. Filntisis, N. Efthymiou, E. Kalisperakis, V. Garyfalli, T. Karantinos, L. Mantonakis, N. Smyrnis, P. Maragos An Unsupervised Learning Approach for Detecting Relapses from Spontaneous Speech in Patients with Psychosis Conference Proc. IEEE-EMBS International Conference on Biomedical and Health Informatics (BHI-2021), 2021. BibTeX | Links: [PDF] [Poster] @conference{Garoufis2021b, title = {An Unsupervised Learning Approach for Detecting Relapses from Spontaneous Speech in Patients with Psychosis}, author = {C. Garoufis, A. Zlatintsi, P. P. Filntisis, N. Efthymiou, E. Kalisperakis, V. Garyfalli, T. Karantinos, L. Mantonakis, N. Smyrnis and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_BHI2021_UnsupervisedLearningRelapseDetection_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_BHI21_Poster.pdf}, year = {2021}, date = {2021-07-31}, booktitle = {Proc. IEEE-EMBS International Conference on Biomedical and Health Informatics (BHI-2021)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Dafni Anagnostopoulou, Niki Efthymiou, Christina Papailiou, Petros Maragos Engagement Estimation During Child Robot Interaction Using DeepConvolutional Networks Focusing on ASD Children Conference Proc. IEEE Int'l Conf. Robotics and Automation (ICRA-2021), Xi'an, 2021. BibTeX | Links: [PDF] [Video] [Slides] @conference{AnagnostopoulouICRA2021, title = {Engagement Estimation During Child Robot Interaction Using DeepConvolutional Networks Focusing on ASD Children}, author = {Dafni Anagnostopoulou and Niki Efthymiou and Christina Papailiou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_Anagnostopoulou_EngagementEstimationChildRobotInteraction_ICRA.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_ICRA21_presentation1.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_ICRA21_slides1.pdf}, year = {2021}, date = {2021-06-01}, booktitle = {Proc. IEEE Int'l Conf. Robotics and Automation (ICRA-2021)}, address = {Xi'an}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Agelos Kratimenos, Georgios Pavlakos, Petros Maragos Independent Sign Language Recognition with 3D Body, Hands, and Face Reconstruction Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. BibTeX | Links: [PDF] [Slides] [Video] [Poster] @conference{Kratimenos_icassp21, title = {Independent Sign Language Recognition with 3D Body, Hands, and Face Reconstruction}, author = {Agelos Kratimenos and Georgios Pavlakos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_KratimenosPavlakosMaragos_IsolatedSignLangRecogn3Dreconstruct_ICASSP.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Kratimenos_ICASSP2021_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Kratimenos_ICASSP2021_video.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/Kratimenos_ICASSP2021_poster.pdf }, year = {2021}, date = {2021-06-01}, booktitle = {Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021)}, address = {Toronto}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikos Tsilivis, Anastasios Tsiamis, Petros Maragos Sparsity in Max-Plus Algebra and Applications in Multivariate Convex Regression Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. @conference{TTM21, title = {Sparsity in Max-Plus Algebra and Applications in Multivariate Convex Regression}, author = {Nikos Tsilivis and Anastasios Tsiamis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_TsilivisEtAl_SparseTropicalRegression_ICASSP.pdf}, year = {2021}, date = {2021-06-01}, booktitle = {Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021)}, address = {Toronto}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikolaos Dimitriadis, Petros Maragos Advances in Morphological Neural Networks: Training, Pruning and Enforcing Shape Constraints Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. BibTeX | Links: [PDF] [Slides] [Video] [Poster] @conference{DM21, title = {Advances in Morphological Neural Networks: Training, Pruning and Enforcing Shape Constraints}, author = {Nikolaos Dimitriadis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_DimitriadisMaragos_AdvancesMorphologicNeuralNets_ICASSP.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/icassp2021-slides-Dimitriadis-Maragos.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/icassp2021-presentation-Dimitriadis-Maragos.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/icassp2021-poster_Dimitriadis_Maragos.pdf}, year = {2021}, date = {2021-06-01}, booktitle = {Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021)}, address = {Toronto}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Kleanthis Avramidis, Agelos Kratimenos, Christos Garoufis, Athanasia Zlatintsi, Petros Maragos Deep Convolutional and Recurrent Networks for Polyphonic Instrument Classification from Monophonic Raw Audio Waveforms Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. BibTeX | Links: [PDF] [Slides] [Video] [Poster] @conference{AvramidisIC2021, title = {Deep Convolutional and Recurrent Networks for Polyphonic Instrument Classification from Monophonic Raw Audio Waveforms}, author = {Kleanthis Avramidis and Agelos Kratimenos and Christos Garoufis and Athanasia Zlatintsi and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_AvramidisKratimenos_PolyphonicInstrumentClassification_ICASSP.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_ICASSP2021_IC2_Slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_ICASSP2021_IC2_Presentation.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_ICASSP2021_IC2_Poster.pdf}, year = {2021}, date = {2021-06-01}, booktitle = {Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021)}, address = {Toronto}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Xanthi S Papageorgiou, Athanasios C Dometios, Costas S Tzafestas Towards a User Adaptive Assistive Robot: Learning from Demonstration Using Navigation Functions Conference 2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2021. @conference{papageorgiou2021towards, title = {Towards a User Adaptive Assistive Robot: Learning from Demonstration Using Navigation Functions}, author = {Xanthi S Papageorgiou and Athanasios C Dometios and Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Papageorgiou_NFLearning_IROS2021.pdf}, year = {2021}, date = {2021-01-01}, booktitle = {2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {965-970}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Danai Efstathiou, Georgia Chalvatzaki, Athanasios Dometios, Dionisios Spiliopoulos, Costas S Tzafestas Deep Leg Tracking by Detection and Gait Analysis in 2D Range Data for Intelligent Robotic Assistants Conference 2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2021. BibTeX | Links: [Slides] [PDF] @conference{efstathiou2021deep, title = {Deep Leg Tracking by Detection and Gait Analysis in 2D Range Data for Intelligent Robotic Assistants}, author = {Danai Efstathiou and Georgia Chalvatzaki and Athanasios Dometios and Dionisios Spiliopoulos and Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Efstathiou_DeepLegTracking_IROS2021_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Efstathiou_DeepLegTracking_IROS2021.pdf}, year = {2021}, date = {2021-01-01}, booktitle = {2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {2657-2662}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2020 |
K Kritsis, C Garoufis, A Zlatintsi, M Bouillon, C Acosta, D Martín-Albo, R Piechaud, P Maragos, V Katsouros iMuSciCA Workbench: Web-based Music Activities For Science Education Journal Article Journal of the Audio Engineering Society, 68 (10), pp. 738-746, 2020. @article{kritsis2020imuscia, title = {iMuSciCA Workbench: Web-based Music Activities For Science Education}, author = {K Kritsis and C Garoufis and A Zlatintsi and M Bouillon and C Acosta and D Martín-Albo and R Piechaud and P Maragos and V Katsouros}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_iMuSciCa-WebMusicActivitiesForScienceEducation_JAES.pdf}, doi = {10.17743/jaes.2020.0021}, year = {2020}, date = {2020-10-01}, journal = {Journal of the Audio Engineering Society}, volume = {68}, number = {10}, pages = {738-746}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Christian Werner, Athanasios C Dometios, Costas S Tzafestas, Petros Maragos, Jürgen M Bauer, Klaus Hauer Evaluating the task effectiveness and user satisfaction with different operation modes of an assistive bathing robot in older adults Journal Article Assistive Technology, 0 , 2020, (PMID: 32286163). @article{doi:10.1080/10400435.2020.1755744, title = {Evaluating the task effectiveness and user satisfaction with different operation modes of an assistive bathing robot in older adults}, author = {Christian Werner and Athanasios C Dometios and Costas S Tzafestas and Petros Maragos and Jürgen M Bauer and Klaus Hauer}, url = {https://doi.org/10.1080/10400435.2020.1755744}, doi = {10.1080/10400435.2020.1755744}, year = {2020}, date = {2020-07-08}, journal = {Assistive Technology}, volume = {0}, publisher = {Taylor & Francis}, note = {PMID: 32286163}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, A C Dometios, N Kardaris, I Rodomagoulakis, P Koutras, X Papageorgiou, P Maragos, C S Tzafestas, P Vartholomeos, K Hauer, C Werner, R Annicchiarico, M G Lombardi, F Adriano, T Asfour, A M Sabatini, C Laschi, M Cianchetti, A Güler, I Kokkinos, B Klein, R López I-Support: A robotic platform of an assistive bathing robot for the elderly population Journal Article Robotics and Autonomous Systems, 126 , pp. 103451, 2020, ISSN: 0921-8890. Abstract | BibTeX | Links: [Webpage] @article{ZLATINTSI2020103451, title = {I-Support: A robotic platform of an assistive bathing robot for the elderly population}, author = {A Zlatintsi and A C Dometios and N Kardaris and I Rodomagoulakis and P Koutras and X Papageorgiou and P Maragos and C S Tzafestas and P Vartholomeos and K Hauer and C Werner and R Annicchiarico and M G Lombardi and F Adriano and T Asfour and A M Sabatini and C Laschi and M Cianchetti and A Güler and I Kokkinos and B Klein and R López}, url = {http://www.sciencedirect.com/science/article/pii/S0921889019304968}, doi = {https://doi.org/10.1016/j.robot.2020.103451}, issn = {0921-8890}, year = {2020}, date = {2020-04-10}, journal = {Robotics and Autonomous Systems}, volume = {126}, pages = {103451}, abstract = {In this paper we present a prototype integrated robotic system, the I-Support bathing robot, that aims at supporting new aspects of assisted daily-living activities on a real-life scenario. The paper focuses on describing and evaluating key novel technological features of the system, with the emphasis on cognitive human–robot interaction modules and their evaluation through a series of clinical validation studies. The I-Support project on its whole has envisioned the development of an innovative, modular, ICT-supported service robotic system that assists frail seniors to safely and independently complete an entire sequence of physically and cognitively demanding bathing tasks, such as properly washing their back and their lower limbs. A variety of innovative technologies have been researched and a set of advanced modules of sensing, cognition, actuation and control have been developed and seamlessly integrated to enable the system to adapt to the target population abilities. These technologies include: human activity monitoring and recognition, adaptation of a motorized chair for safe transfer of the elderly in and out the bathing cabin, a context awareness system that provides full environmental awareness, as well as a prototype soft robotic arm and a set of user-adaptive robot motion planning and control algorithms. This paper focuses in particular on the multimodal action recognition system, developed to monitor, analyze and predict user actions with a high level of accuracy and detail in real-time, which are then interpreted as robotic tasks. In the same framework, the analysis of human actions that have become available through the project’s multimodal audio–gestural dataset, has led to the successful modeling of Human–Robot Communication, achieving an effective and natural interaction between users and the assistive robotic platform. In order to evaluate the I-Support system, two multinational validation studies were conducted under realistic operating conditions in two clinical pilot sites. Some of the findings of these studies are presented and analyzed in the paper, showing good results in terms of: (i) high acceptability regarding the system usability by this particularly challenging target group, the elderly end-users, and (ii) overall task effectiveness of the system in different operating modes.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper we present a prototype integrated robotic system, the I-Support bathing robot, that aims at supporting new aspects of assisted daily-living activities on a real-life scenario. The paper focuses on describing and evaluating key novel technological features of the system, with the emphasis on cognitive human–robot interaction modules and their evaluation through a series of clinical validation studies. The I-Support project on its whole has envisioned the development of an innovative, modular, ICT-supported service robotic system that assists frail seniors to safely and independently complete an entire sequence of physically and cognitively demanding bathing tasks, such as properly washing their back and their lower limbs. A variety of innovative technologies have been researched and a set of advanced modules of sensing, cognition, actuation and control have been developed and seamlessly integrated to enable the system to adapt to the target population abilities. These technologies include: human activity monitoring and recognition, adaptation of a motorized chair for safe transfer of the elderly in and out the bathing cabin, a context awareness system that provides full environmental awareness, as well as a prototype soft robotic arm and a set of user-adaptive robot motion planning and control algorithms. This paper focuses in particular on the multimodal action recognition system, developed to monitor, analyze and predict user actions with a high level of accuracy and detail in real-time, which are then interpreted as robotic tasks. In the same framework, the analysis of human actions that have become available through the project’s multimodal audio–gestural dataset, has led to the successful modeling of Human–Robot Communication, achieving an effective and natural interaction between users and the assistive robotic platform. In order to evaluate the I-Support system, two multinational validation studies were conducted under realistic operating conditions in two clinical pilot sites. Some of the findings of these studies are presented and analyzed in the paper, showing good results in terms of: (i) high acceptability regarding the system usability by this particularly challenging target group, the elderly end-users, and (ii) overall task effectiveness of the system in different operating modes. |
Christian Werner, Nikos Kardaris, Petros Koutras, Athanasia Zlatintsi, Petros Maragos, Jürgen M Bauer, Klaus Hauer Improving gesture-based interaction between an assistive bathing robot and older adults via user training on the gestural commands Journal Article Archives of Gerontology and Geriatrics, 87 , pp. 103996, 2020, ISSN: 0167-4943. Abstract | BibTeX | Links: [Webpage] @article{WERNER2020103996, title = {Improving gesture-based interaction between an assistive bathing robot and older adults via user training on the gestural commands}, author = {Christian Werner and Nikos Kardaris and Petros Koutras and Athanasia Zlatintsi and Petros Maragos and Jürgen M Bauer and Klaus Hauer}, url = {http://www.sciencedirect.com/science/article/pii/S0167494319302390}, doi = {https://doi.org/10.1016/j.archger.2019.103996}, issn = {0167-4943}, year = {2020}, date = {2020-03-01}, journal = {Archives of Gerontology and Geriatrics}, volume = {87}, pages = {103996}, abstract = {Background Gesture-based human-robot interaction (HRI) depends on the technical performance of the robot-integrated gesture recognition system (GRS) and on the gestural performance of the robot user, which has been shown to be rather low in older adults. Training of gestural commands (GCs) might improve the quality of older users’ input for gesture-based HRI, which in turn may lead to an overall improved HRI. Objective To evaluate the effects of a user training on gesture-based HRI between an assistive bathing robot and potential elderly robot users. Methods Twenty-five older adults with bathing disability participated in this quasi-experimental, single-group, pre-/post-test study and underwent a specific user training (10−15 min) on GCs for HRI with the assistive bathing robot. Outcomes measured before and after training included participants’ gestural performance assessed by a scoring method of an established test of gesture production (TULIA) and sensor-based gestural performance (SGP) scores derived from the GRS-recorded data, and robot’s command recognition rate (CRR). Results Gestural performance (TULIA = +57.1 ± 56.2 %, SGP scores = +41.1 ± 74.4 %) and CRR (+31.9 ± 51.2 %) significantly improved over training (p < .001). Improvements in gestural performance and CRR were highly associated with each other (r = 0.80–0.81, p < .001). Participants with lower initial gestural performance and higher gerontechnology anxiety benefited most from the training. Conclusions Our study highlights that training in gesture-based HRI with an assistive bathing robot is highly beneficial for the quality of older users’ GCs, leading to higher CRRs of the robot-integrated GRS, and thus to an overall improved HRI.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Gesture-based human-robot interaction (HRI) depends on the technical performance of the robot-integrated gesture recognition system (GRS) and on the gestural performance of the robot user, which has been shown to be rather low in older adults. Training of gestural commands (GCs) might improve the quality of older users’ input for gesture-based HRI, which in turn may lead to an overall improved HRI. Objective To evaluate the effects of a user training on gesture-based HRI between an assistive bathing robot and potential elderly robot users. Methods Twenty-five older adults with bathing disability participated in this quasi-experimental, single-group, pre-/post-test study and underwent a specific user training (10−15 min) on GCs for HRI with the assistive bathing robot. Outcomes measured before and after training included participants’ gestural performance assessed by a scoring method of an established test of gesture production (TULIA) and sensor-based gestural performance (SGP) scores derived from the GRS-recorded data, and robot’s command recognition rate (CRR). Results Gestural performance (TULIA = +57.1 ± 56.2 %, SGP scores = +41.1 ± 74.4 %) and CRR (+31.9 ± 51.2 %) significantly improved over training (p < .001). Improvements in gestural performance and CRR were highly associated with each other (r = 0.80–0.81, p < .001). Participants with lower initial gestural performance and higher gerontechnology anxiety benefited most from the training. Conclusions Our study highlights that training in gesture-based HRI with an assistive bathing robot is highly beneficial for the quality of older users’ GCs, leading to higher CRRs of the robot-integrated GRS, and thus to an overall improved HRI. |
V. Tassopoulou, G. Retsinas,, P. Maragos Enhancing Handwritten Text Recognition with N-gram sequence decomposition and Multitask Learning Conference Intl' Conference Pattern Recognition (ICPR 2020), Milan, Italy, 2020. @conference{Tassopoulou2020, title = {Enhancing Handwritten Text Recognition with N-gram sequence decomposition and Multitask Learning}, author = {V. Tassopoulou and G. Retsinas, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_TassopoulouEtAl_EnhaceHandwrittenTextRecognition-MultitaskLearn_ICPR-1.pdf}, year = {2020}, date = {2020-12-04}, booktitle = {Intl' Conference Pattern Recognition (ICPR 2020)}, address = {Milan, Italy}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikolaos Gkanatsios, Vassilis Pitsikalis, Petros Maragos From Saturation to Zero-Shot Visual Relationship Detection Using Local Context Conference British Machine Vision Conference, Online, 2020. @conference{Gkanatsios2020, title = {From Saturation to Zero-Shot Visual Relationship Detection Using Local Context}, author = {Nikolaos Gkanatsios and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_GPM_VisualRelationDetection_BMCV.pdf}, year = {2020}, date = {2020-09-10}, booktitle = { British Machine Vision Conference}, address = {Online}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgia Chalvatzaki, Petros Koutras, Antigoni Tsiami, Costas S. Tzafestas, Petros Maragos i-Walk Intelligent Assessment System: Activity, Mobility, Intention, Communication Conference Proc. 16th European Computer Vision Conference Workshops (ECCVW) – 8th Int’l Workshop on Assistive Computer Vision and Robotics (ACVR-2020), Online, 2020. @conference{Chalvatzaki2020, title = {i-Walk Intelligent Assessment System: Activity, Mobility, Intention, Communication}, author = {Georgia Chalvatzaki and Petros Koutras and Antigoni Tsiami and Costas S. Tzafestas and Petros Maragos}, url = {https://robotics.ntua.gr/wp-content/uploads/2020_ChalvatzakiKoutrasTsiami+_iWalkIntelligentAssessmentSystem_ECCVW-ACVR.pdf}, year = {2020}, date = {2020-08-01}, booktitle = {Proc. 16th European Computer Vision Conference Workshops (ECCVW) – 8th Int’l Workshop on Assistive Computer Vision and Robotics (ACVR-2020)}, address = {Online}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgios Smyrnis, Petros Maragos Multiclass Neural Network Minimization via Tropical Newton Polytope Approximation Conference International Conference on Machine Learning (ICML), Online, 2020. @conference{Smyrnis2020, title = {Multiclass Neural Network Minimization via Tropical Newton Polytope Approximation}, author = {Georgios Smyrnis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_SmyrnisMaragos_MulticlassNNminimiz-TropicPolytopApproxim_ICML1.pdf}, year = {2020}, date = {2020-07-01}, booktitle = {International Conference on Machine Learning (ICML)}, address = {Online}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I. Maglogiannis, A. Zlatintsi, A. Menychtas, D. Papadimatos, P.P. Filntisis, N. Efthymiou, G. Retsinas, P. Tsanakas,, P. Maragos An intelligent cloud-based platform for effective monitoring of patients with psychotic disorders Conference Int’l Conf. on Artificial Intelligence Applications and Innovation (AIAI-2020), Halkidiki, Greece, 2020. @conference{Maglogiannis2020, title = {An intelligent cloud-based platform for effective monitoring of patients with psychotic disorders}, author = {I. Maglogiannis, A. Zlatintsi, A. Menychtas, D. Papadimatos, P.P. Filntisis, N. Efthymiou, G. Retsinas, P. Tsanakas, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_MaglogiannisEtAl_e-Prevention_IntelligentCloudPlatform_AIAI-1.pdf}, year = {2020}, date = {2020-06-04}, booktitle = {Int’l Conf. on Artificial Intelligence Applications and Innovation (AIAI-2020)}, address = {Halkidiki, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A. Tsiami, P. Koutras, P. Maragos STAViS: Spatio-Temporal AudioVisual Saliency Network Conference IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Seattle, USA, 2020. @conference{Tsiami2020, title = {STAViS: Spatio-Temporal AudioVisual Saliency Network}, author = {A. Tsiami and P. Koutras and P. Maragos}, year = {2020}, date = {2020-06-01}, booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, address = {Seattle, USA}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G. Potamianos, K. Papadimitriou, E. Efthimiou, S-E Fotinea, G. Sapountzaki, P. Maragos SL-ReDu: Greek Sign Language Recognition for Educational Applications. Project Description and Early Results Conference PETRA '20: Proceedings of the 13th ACM International Conference on PErvasive Technologies Related to Assistive Environments, Corfu, Greece, 2020. @conference{Potamianos2020, title = {SL-ReDu: Greek Sign Language Recognition for Educational Applications. Project Description and Early Results}, author = {G. Potamianos, K. Papadimitriou, E. Efthimiou, S-E Fotinea, G. Sapountzaki and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_PotamianosEtAl_SL-ReDu_PETRA.pdf}, year = {2020}, date = {2020-06-01}, booktitle = {PETRA '20: Proceedings of the 13th ACM International Conference on PErvasive Technologies Related to Assistive Environments}, address = {Corfu, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C Garoufis, A Zlatintsi, P Maragos An LSTM-Based Dynamic Chord Progression Generation System for Interactive Music Performance Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. @conference{9053992, title = {An LSTM-Based Dynamic Chord Progression Generation System for Interactive Music Performance}, author = {C Garoufis and A Zlatintsi and P Maragos}, year = {2020}, date = {2020-05-01}, booktitle = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {4502-4506}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Retsinas, P P Filntisis, N Efthymiou, E Theodosis, A Zlatintsi, P Maragos Person Identification Using Deep Convolutional Neural Networks on Short-Term Signals from Wearable Sensors Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. @conference{9053910, title = {Person Identification Using Deep Convolutional Neural Networks on Short-Term Signals from Wearable Sensors}, author = {G Retsinas and P P Filntisis and N Efthymiou and E Theodosis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/icassp2020_retsinas.pdf}, year = {2020}, date = {2020-05-01}, booktitle = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {3657-3661}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Smyrnis, P Maragos, G Retsinas Maxpolynomial Division with Application To Neural Network Simplification Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. @conference{9053540, title = {Maxpolynomial Division with Application To Neural Network Simplification}, author = {G Smyrnis and P Maragos and G Retsinas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_SMR_MaxpolynomialDivision-NNsimplification_ICASSP.pdf}, year = {2020}, date = {2020-05-01}, booktitle = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {4192-4196}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos, E Theodosis Multivariate Tropical Regression and Piecewise-Linear Surface Fitting Conference Proc. 45th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2020), Barcelona, 2020. BibTeX | Links: [Video] [PDF] [Slides] @conference{MaTh20b, title = {Multivariate Tropical Regression and Piecewise-Linear Surface Fitting}, author = {P Maragos and E Theodosis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_MultivariateTropicRegression_ICASSP2020_presentation_video.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_MaragosTheodosis_MultivariateTropicalRegression-PWLsurfaceFitting_ICASSP.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_MultivariateTropicRegression_ICASSP2020_presentation_slides.pdf}, year = {2020}, date = {2020-05-01}, booktitle = {Proc. 45th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2020)}, address = {Barcelona}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Maragos, E. Theodosis Tropical Geometry and Piecewise-Linear Approximation of Curves and Surfaces on Weighted Lattices Book Chapter M. Breuss A. Bruckstein, Kiselman C; Maragos, P (Ed.): Shape Analysis: Euclidean, Discrete and Algebraic Geometric Methods, Springer, 2020. @inbook{Maragos2020, title = {Tropical Geometry and Piecewise-Linear Approximation of Curves and Surfaces on Weighted Lattices}, author = {P. Maragos and E. Theodosis}, editor = {M. Breuss, A. Bruckstein, C. Kiselman and P. Maragos}, year = {2020}, date = {2020-12-31}, booktitle = {Shape Analysis: Euclidean, Discrete and Algebraic Geometric Methods}, publisher = {Springer}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
I Marougkas, P Koutras, N Kardaris, G Retsinas, G Chalvatzaki, P Maragos How to track your dragon: A Multi-Attentional Framework for Real-time RGB-D 6-DOF Object Pose Tracking Inproceedings Proc. European Conference on Computer Vision Workshops (ECCVW) - 6th Int’l Workshop on Recovering 6D Object Pose, 2020. @inproceedings{marougkas2020how, title = {How to track your dragon: A Multi-Attentional Framework for Real-time RGB-D 6-DOF Object Pose Tracking}, author = {I Marougkas and P Koutras and N Kardaris and G Retsinas and G Chalvatzaki and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/MarougkasEtAl_How-to-Track-your-Dragon_ECCVW-R6D2020-4.pdf}, year = {2020}, date = {2020-01-01}, booktitle = {Proc. European Conference on Computer Vision Workshops (ECCVW) - 6th Int’l Workshop on Recovering 6D Object Pose}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
P. P. Filntisis, N. Efthymiou, G. Potamianos, P. Maragos Emotion Understanding in Videos Through Body, Context, and Visual-Semantic Embedding Loss Workshop Proc. 16th European Computer Vision Conference Workshops (ECCVW) - Workshop on Bodily Expressed Emotion Understanding, 2020. @workshop{Filntisis2020, title = {Emotion Understanding in Videos Through Body, Context, and Visual-Semantic Embedding Loss}, author = {P. P. Filntisis and N. Efthymiou and G. Potamianos and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Emotion_understanding_in_videos_through_body__context__and_visual_semantic_embedding_loss-1.pdf https://github.com/filby89/NTUA-BEEU-eccv2020}, year = {2020}, date = {2020-08-01}, booktitle = {Proc. 16th European Computer Vision Conference Workshops (ECCVW) - Workshop on Bodily Expressed Emotion Understanding}, keywords = {}, pubstate = {published}, tppubtype = {workshop} } |
2019 |
Georgia Chalvatzaki, Xanthi S Papageorgiou, Petros Maragos, Costas S Tzafestas Learn to adapt to human walking: A Model-based Reinforcement Learning Approach for a Robotic Assistant Rollator Journal Article IEEE Robotics and Automation Letters (with IROS option), 4 (4), pp. 3774–3781, 2019. @article{chalvatzaki2019learn, title = {Learn to adapt to human walking: A Model-based Reinforcement Learning Approach for a Robotic Assistant Rollator}, author = {Georgia Chalvatzaki and Xanthi S Papageorgiou and Petros Maragos and Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/19-0390_03_MS.pdf}, year = {2019}, date = {2019-12-31}, journal = {IEEE Robotics and Automation Letters (with IROS option)}, volume = {4}, number = {4}, pages = {3774--3781}, publisher = {IEEE}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P P Filntisis, N Efthymiou, P Koutras, G Potamianos, P Maragos Fusing Body Posture With Facial Expressions for Joint Recognition of Affect in Child–Robot Interaction Journal Article IEEE Robotics and Automation Letters (with IROS option), 4 (4), pp. 4011-4018, 2019. Abstract | BibTeX | Links: [PDF] @article{8769871, title = {Fusing Body Posture With Facial Expressions for Joint Recognition of Affect in Child–Robot Interaction}, author = {P P Filntisis and N Efthymiou and P Koutras and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/RAL_2019-5.pdf}, doi = {10.1109/LRA.2019.2930434}, year = {2019}, date = {2019-10-01}, journal = {IEEE Robotics and Automation Letters (with IROS option)}, volume = {4}, number = {4}, pages = {4011-4018}, abstract = {In this letter, we address the problem of multi-cue affect recognition in challenging scenarios such as child–robot interaction. Toward this goal we propose a method for automatic recognition of affect that leverages body expressions alongside facial ones, as opposed to traditional methods that typically focus only on the latter. Our deep-learning based method uses hierarchical multi-label annotations and multi-stage losses, can be trained both jointly and separately, and offers us computational models for both individual modalities, as well as for the whole body emotion. We evaluate our method on a challenging child–robot interaction database of emotional expressions collected by us, as well as on the GEneva multimodal emotion portrayal public database of acted emotions by adults, and show that the proposed method achieves significantly better results than facial-only expression baselines.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this letter, we address the problem of multi-cue affect recognition in challenging scenarios such as child–robot interaction. Toward this goal we propose a method for automatic recognition of affect that leverages body expressions alongside facial ones, as opposed to traditional methods that typically focus only on the latter. Our deep-learning based method uses hierarchical multi-label annotations and multi-stage losses, can be trained both jointly and separately, and offers us computational models for both individual modalities, as well as for the whole body emotion. We evaluate our method on a challenging child–robot interaction database of emotional expressions collected by us, as well as on the GEneva multimodal emotion portrayal public database of acted emotions by adults, and show that the proposed method achieves significantly better results than facial-only expression baselines. |
I Rodomagoulakis, P Maragos Improved Frequency Modulation Features for Multichannel Distant Speech Recognition Journal Article IEEE Journal of Selected Topics in Signal Processing, 13 (4), pp. 841-849, 2019. Abstract | BibTeX | Links: [PDF] @article{8744655, title = {Improved Frequency Modulation Features for Multichannel Distant Speech Recognition}, author = {I Rodomagoulakis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_RodomagoulakisMaragos_ImprovedFreqModFeaturesMultichanDistantSpeechRecogn_JSTSP.pdf}, doi = {10.1109/JSTSP.2019.2923372}, year = {2019}, date = {2019-08-01}, journal = {IEEE Journal of Selected Topics in Signal Processing}, volume = {13}, number = {4}, pages = {841-849}, abstract = {Frequency modulation features capture the fine structure of speech formants that constitute beneficial to the traditional energy-based cepstral features by carrying supplementary information. Improvements have been demonstrated mainly in Gaussian mixture model (GMM)-hidden Markov model (HMM) systems for small and large vocabulary tasks. Yet, they have limited applications in deep neural network (DNN)-HMM systems and distant speech recognition (DSR) tasks. Herein, we elaborate on their integration within state-of-the-art front-end schemes that include post-processing of MFCCs resulting in discriminant and speaker-adapted features of large temporal contexts. We explore: 1) multichannel demodulation schemes for multi-microphone setups; 2) richer descriptors of frequency modulations; and 3) feature transformation and combination via hierarchical deep networks. We present results for tandem and hybrid recognition with GMM and DNN acoustic models, respectively. The improved modulation features are combined efficiently with MFCCs yielding modest and consistent improvements in multichannel DSR tasks on reverberant and noisy environments, where recognition rates are far from human performance.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Frequency modulation features capture the fine structure of speech formants that constitute beneficial to the traditional energy-based cepstral features by carrying supplementary information. Improvements have been demonstrated mainly in Gaussian mixture model (GMM)-hidden Markov model (HMM) systems for small and large vocabulary tasks. Yet, they have limited applications in deep neural network (DNN)-HMM systems and distant speech recognition (DSR) tasks. Herein, we elaborate on their integration within state-of-the-art front-end schemes that include post-processing of MFCCs resulting in discriminant and speaker-adapted features of large temporal contexts. We explore: 1) multichannel demodulation schemes for multi-microphone setups; 2) richer descriptors of frequency modulations; and 3) feature transformation and combination via hierarchical deep networks. We present results for tandem and hybrid recognition with GMM and DNN acoustic models, respectively. The improved modulation features are combined efficiently with MFCCs yielding modest and consistent improvements in multichannel DSR tasks on reverberant and noisy environments, where recognition rates are far from human performance. |
Anastasios Tsiamis, Petros Maragos Sparsity in max-plus algebra and systems Journal Article Discrete Event Dynamic Systems, 29 (2), pp. 163–189, 2019. @article{DBLP:journals/deds/TsiamisM19, title = {Sparsity in max-plus algebra and systems}, author = {Anastasios Tsiamis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/TsiamisMaragos_SparsityInMaxplusAlgebraAndSystems_DEDS2019.pdf}, doi = {10.1007/s10626-019-00281-1}, year = {2019}, date = {2019-01-01}, journal = {Discrete Event Dynamic Systems}, volume = {29}, number = {2}, pages = {163--189}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Antigoni Tsiami, Petros Koutras, Athanasios Katsamanis, Argiro Vatakis, Petros Maragos A behaviorally inspired fusion approach for computational audiovisual saliency modeling Journal Article Signal Processing: Image Communication, 76 , pp. 186 - 200, 2019. Abstract | BibTeX | Links: [PDF] @article{TSIAMI2019186, title = {A behaviorally inspired fusion approach for computational audiovisual saliency modeling}, author = {Antigoni Tsiami and Petros Koutras and Athanasios Katsamanis and Argiro Vatakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/TsiamiEtAl_BehaviorInspiredFusionCompAudioVisSaliencyModel_SPIC2019_preprint.pdf}, doi = {https://doi.org/10.1016/j.image.2019.05.001}, year = {2019}, date = {2019-01-01}, journal = {Signal Processing: Image Communication}, volume = {76}, pages = {186 - 200}, abstract = {Human attention is highly influenced by multi-modal combinations of perceived sensory information and especially audiovisual information. Although systematic behavioral experiments have provided evidence that human attention is multi-modal, most bottom-up computational attention models, namely saliency models for fixation prediction, focus on visual information, largely ignoring auditory input. In this work, we aim to bridge the gap between findings from neuroscience concerning audiovisual attention and the computational attention modeling, by creating a 2-D bottom-up audiovisual saliency model. We experiment with various fusion schemes for integrating state-of-the-art auditory and visual saliency models in a single audiovisual attention/saliency model based on behavioral findings, that we validate in two experimental levels: (1) using results from behavioral experiments aiming to reproduce the results in a mostly qualitative manner and to ensure that our modeling is in line with behavioral findings, and (2) using 6 different databases with audiovisual human eye-tracking data. For this last purpose, we have also collected eye-tracking data for two databases: ETMD, a movie database that contains highly edited videos (movie clips), and SumMe, a database that contains unstructured and unedited user videos. Experimental results indicate that our proposed audiovisual fusion schemes in most cases improve performance compared to visual-only models, without any prior knowledge of the video/audio content. Also, they can be generalized and applied to any auditory saliency model and any visual spatio-temporal saliency model.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Human attention is highly influenced by multi-modal combinations of perceived sensory information and especially audiovisual information. Although systematic behavioral experiments have provided evidence that human attention is multi-modal, most bottom-up computational attention models, namely saliency models for fixation prediction, focus on visual information, largely ignoring auditory input. In this work, we aim to bridge the gap between findings from neuroscience concerning audiovisual attention and the computational attention modeling, by creating a 2-D bottom-up audiovisual saliency model. We experiment with various fusion schemes for integrating state-of-the-art auditory and visual saliency models in a single audiovisual attention/saliency model based on behavioral findings, that we validate in two experimental levels: (1) using results from behavioral experiments aiming to reproduce the results in a mostly qualitative manner and to ensure that our modeling is in line with behavioral findings, and (2) using 6 different databases with audiovisual human eye-tracking data. For this last purpose, we have also collected eye-tracking data for two databases: ETMD, a movie database that contains highly edited videos (movie clips), and SumMe, a database that contains unstructured and unedited user videos. Experimental results indicate that our proposed audiovisual fusion schemes in most cases improve performance compared to visual-only models, without any prior knowledge of the video/audio content. Also, they can be generalized and applied to any auditory saliency model and any visual spatio-temporal saliency model. |
Panagiotis Giannoulis, Gerasimos Potamianos, Petros Maragos Room-localized speech activity detection in multi-microphone smart homes Journal Article EURASIP Journal on Audio, Speech, and Music Processing, 2019 (1), pp. 15, 2019, ISSN: 1687-4722. Abstract | BibTeX | Links: [PDF] @article{Giannoulis2019, title = {Room-localized speech activity detection in multi-microphone smart homes}, author = {Panagiotis Giannoulis and Gerasimos Potamianos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_GiannoulisEtAl_RoomlocalizedSAD-MultiMicrophoneSmartHomes_EURASIP-JASM.pdfhttps://doi.org/10.1186/s13636-019-0158-8}, doi = {10.1186/s13636-019-0158-8}, issn = {1687-4722}, year = {2019}, date = {2019-01-01}, journal = {EURASIP Journal on Audio, Speech, and Music Processing}, volume = {2019}, number = {1}, pages = {15}, abstract = {Voice-enabled interaction systems in domestic environments have attracted significant interest recently, being the focus of smart home research projects and commercial voice assistant home devices. Within the multi-module pipelines of such systems, speech activity detection (SAD) constitutes a crucial component, providing input to their activation and speech recognition subsystems. In typical multi-room domestic environments, SAD may also convey spatial intelligence to the interaction, in addition to its traditional temporal segmentation output, by assigning speech activity at the room level. Such room-localized SAD can, for example, disambiguate user command referents, allow localized system feedback, and enable parallel voice interaction sessions by multiple subjects in different rooms. In this paper, we investigate a room-localized SAD system for smart homes equipped with multiple microphones distributed in multiple rooms, significantly extending our earlier work. The system employs a two-stage algorithm, incorporating a set of hand-crafted features specially designed to discriminate room-inside vs. room-outside speech at its second stage, refining SAD hypotheses obtained at its first stage by traditional statistical modeling and acoustic front-end processing. Both algorithmic stages exploit multi-microphone information, combining it at the signal, feature, or decision level. The proposed approach is extensively evaluated on both simulated and real data recorded in a multi-room, multi-microphone smart home, significantly outperforming alternative baselines. Further, it remains robust to reduced microphone setups, while also comparing favorably to deep learning-based alternatives.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Voice-enabled interaction systems in domestic environments have attracted significant interest recently, being the focus of smart home research projects and commercial voice assistant home devices. Within the multi-module pipelines of such systems, speech activity detection (SAD) constitutes a crucial component, providing input to their activation and speech recognition subsystems. In typical multi-room domestic environments, SAD may also convey spatial intelligence to the interaction, in addition to its traditional temporal segmentation output, by assigning speech activity at the room level. Such room-localized SAD can, for example, disambiguate user command referents, allow localized system feedback, and enable parallel voice interaction sessions by multiple subjects in different rooms. In this paper, we investigate a room-localized SAD system for smart homes equipped with multiple microphones distributed in multiple rooms, significantly extending our earlier work. The system employs a two-stage algorithm, incorporating a set of hand-crafted features specially designed to discriminate room-inside vs. room-outside speech at its second stage, refining SAD hypotheses obtained at its first stage by traditional statistical modeling and acoustic front-end processing. Both algorithmic stages exploit multi-microphone information, combining it at the signal, feature, or decision level. The proposed approach is extensively evaluated on both simulated and real data recorded in a multi-room, multi-microphone smart home, significantly outperforming alternative baselines. Further, it remains robust to reduced microphone setups, while also comparing favorably to deep learning-based alternatives. |
Georgia Chalvatzaki, Xanthi S. Papageorgiou, Petros Maragos, Costas S. Tzafestas Comparing the Impact of Robotic Rollator Control Schemes on Elderly Gait using on-line LRF-based Gait Analysis Conference Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. @conference{Chalvatzaki2019b, title = {Comparing the Impact of Robotic Rollator Control Schemes on Elderly Gait using on-line LRF-based Gait Analysis}, author = {Georgia Chalvatzaki and Xanthi S. Papageorgiou and Petros Maragos and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ChalvatzakiEtAl_Comparing-the-Impact-of-Robotic-Rollator_MoRobAE-ICRA2019.pdf}, year = {2019}, date = {2019-12-01}, booktitle = { Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA)}, address = {Montreal, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
RecNets: Channel-wise Recurrent Convolutional Neural Networks RecNets: Channel-wise Recurrent Convolutional Neural Networks Conference Proceedings of the British Machine Vision Conference (BMVC), Cardiff, 2019. @conference{Networks2019, title = {RecNets: Channel-wise Recurrent Convolutional Neural Networks}, author = {RecNets: Channel-wise Recurrent Convolutional Neural Networks}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/BMVC_RecNets.pdf}, year = {2019}, date = {2019-09-19}, booktitle = {Proceedings of the British Machine Vision Conference (BMVC)}, journal = {Proceedings of the British Machine Vision Conference (BMVC),}, address = {Cardiff}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgia Chalvatzaki, Petros Koutras, Jack Hadfield, Xanthi S. Papageorgiou, Costas S. Tzafestas, Petros Maragos On-line Human Gait Stability Prediction using LSTMs for the fusion of Deep-based Pose Estimation and LRF-based Augmented Gait State Estimation in an Intelligent Robotic Rollator Conference IEEE International Conference on Robotics & Automation (ICRA), Montreal, Canada, 2019. @conference{Chalvatzaki2019d, title = {On-line Human Gait Stability Prediction using LSTMs for the fusion of Deep-based Pose Estimation and LRF-based Augmented Gait State Estimation in an Intelligent Robotic Rollator}, author = {Georgia Chalvatzaki and Petros Koutras and Jack Hadfield and Xanthi S. Papageorgiou and Costas S. Tzafestas and Petros Maragos}, year = {2019}, date = {2019-09-19}, booktitle = {IEEE International Conference on Robotics & Automation (ICRA)}, address = {Montreal, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Garoufis, A. Zlatintsi, K. Kritsis, P.P. Filntisis, V. Katsouros, and P. Maragos An Environment for Gestural Interaction with 3D Virual Musical Instruments as an Educational Tool Conference Proc. 27th European Conf.(EUSIPCO-19), A Coruna, Spain, 2019. @conference{Garoufis2019, title = {An Environment for Gestural Interaction with 3D Virual Musical Instruments as an Educational Tool}, author = {C. Garoufis and A. Zlatintsi and K. Kritsis and P.P. Filntisis and V. Katsouros and and P. Maragos }, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_GZKFKM_GestureInteractWithVirtualMusicInstrumentsForEducation_EUSIPCO-1-1.pdf}, year = {2019}, date = {2019-09-01}, booktitle = {Proc. 27th European Conf.(EUSIPCO-19)}, address = {A Coruna, Spain}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Maragos Tropical Geometry, Mathematical Morphology and Weighted Lattices Conference Proc. 14th Int’l Symposium on Mathematical Morphology (ISMM-2019), Saarbruecken, Germany, 2019. @conference{Maragos2019, title = {Tropical Geometry, Mathematical Morphology and Weighted Lattices}, author = {P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_Maragos_TropicalGeometry-MM-WeightedLattices_ISMM-1.pdf}, year = {2019}, date = {2019-05-21}, booktitle = {Proc. 14th Int’l Symposium on Mathematical Morphology (ISMM-2019)}, pages = {3–15}, address = {Saarbruecken, Germany}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
E Theodosis, P Maragos Tropical Modeling of Weighted Transducer Algorithms on Graphs Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP), 2019, ISSN: 2379-190X. Abstract | BibTeX | Links: [PDF] @conference{8683127, title = {Tropical Modeling of Weighted Transducer Algorithms on Graphs}, author = {E Theodosis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_TheodosisMaragos_TropicalModeling-Algorithms_ICASSP.pdf}, doi = {10.1109/ICASSP.2019.8683127}, issn = {2379-190X}, year = {2019}, date = {2019-05-01}, booktitle = {Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP)}, pages = {8653-8657}, abstract = {Weighted Finite State Transducers (WFSTs) are versatile graphical automata that can model a great number of problems, ranging from automatic speech recognition to DNA sequencing. Traditional computer science algorithms are employed when working with these automata in order to optimize their size, but also the run time of decoding algorithms. However, these algorithms are not unified under a common framework that would allow for their treatment as a whole. Moreover, the inherent geometrical representation of WFSTs, coupled with the topology-preserving algorithms that operate on them make the structures ideal for tropical analysis. The benefits of such analysis have a twofold nature; first, matrix operations offer a connection to nonlinear vector space and spectral theory, and, second, tropical algebra offers a connection to tropical geometry. In this work we model some of the most frequently used algorithms in WFSTs by using tropical algebra; this provides a theoretical unification and allows us to also analyze aspects of their tropical geometry.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Weighted Finite State Transducers (WFSTs) are versatile graphical automata that can model a great number of problems, ranging from automatic speech recognition to DNA sequencing. Traditional computer science algorithms are employed when working with these automata in order to optimize their size, but also the run time of decoding algorithms. However, these algorithms are not unified under a common framework that would allow for their treatment as a whole. Moreover, the inherent geometrical representation of WFSTs, coupled with the topology-preserving algorithms that operate on them make the structures ideal for tropical analysis. The benefits of such analysis have a twofold nature; first, matrix operations offer a connection to nonlinear vector space and spectral theory, and, second, tropical algebra offers a connection to tropical geometry. In this work we model some of the most frequently used algorithms in WFSTs by using tropical algebra; this provides a theoretical unification and allows us to also analyze aspects of their tropical geometry. |
G. Chalvatzaki, P. Koutras, J. Hadfield, X. S. Papageorgiou, C. S. Tzafestas, P. Maragos LSTM-based Network for Human Gait Stability Prediction in an Intelligent Robotic Rollator Conference Proc. 2019 IEEE International Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. @conference{Chalvatzaki2019, title = {LSTM-based Network for Human Gait Stability Prediction in an Intelligent Robotic Rollator}, author = {G. Chalvatzaki and P. Koutras and J. Hadfield and X. S. Papageorgiou and C. S. Tzafestas and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ChalvatzakiEtAl_LSTMGaitStab_ICRA2019.pdf}, year = {2019}, date = {2019-05-01}, booktitle = {Proc. 2019 IEEE International Conference on Robotics and Automation (ICRA)}, address = {Montreal, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Xanthi S Papageorgiou, Georgia Chalvatzaki, Eleni Efthimiou, Stavroula-Evita Fotinea, Alexandra Karavasili, Costas S Tzafestas, Petros Maragos, Anna Vacalopoulou, Theodore Goulas User Centered HRI Design for an Intelligent Robotic Rollator Conference Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. @conference{papageorgiouuser, title = {User Centered HRI Design for an Intelligent Robotic Rollator}, author = {Xanthi S Papageorgiou and Georgia Chalvatzaki and Eleni Efthimiou and Stavroula-Evita Fotinea and Alexandra Karavasili and Costas S Tzafestas and Petros Maragos and Anna Vacalopoulou and Theodore Goulas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/PapageorgiouEtAl_UserCenteredHRI_MoRobAE-ICRA2019.pdf}, year = {2019}, date = {2019-05-01}, booktitle = {Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA)}, address = {Montreal, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos Tropical Geometry, Mathematical Morphology and Weighted Lattices Conference Mathematical Morphology and Its Applications to Signal and Image Processing - 14th International Symposium, ISMM 2019, Saarbrücken, Germany, July 8-10, 2019, Proceedings, 2019. @conference{DBLP:conf/ismm/Maragos19, title = {Tropical Geometry, Mathematical Morphology and Weighted Lattices}, author = {Petros Maragos}, url = {https://doi.org/10.1007/978-3-030-20867-7_1}, doi = {10.1007/978-3-030-20867-7_1}, year = {2019}, date = {2019-01-01}, booktitle = {Mathematical Morphology and Its Applications to Signal and Image Processing - 14th International Symposium, ISMM 2019, Saarbrücken, Germany, July 8-10, 2019, Proceedings}, pages = {3--15}, crossref = {DBLP:conf/ismm/2019}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Koutras, Petros Maragos SUSiNet: See, Understand and Summarize it Conference Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, 2019. @conference{koutras2019susinet, title = {SUSiNet: See, Understand and Summarize it}, author = {Petros Koutras and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Koutras_SUSiNet_See_Understand_and_Summarize_It_CVPRW_2019_paper.pdf}, year = {2019}, date = {2019-01-01}, booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
N Gkanatsios, V Pitsikalis, P Koutras, A Zlatintsi, P Maragos Deeply Supervised Multimodal Attentional Translation Embeddings for Visual Relationship Detection Conference 2019 IEEE International Conference on Image Processing (ICIP), 2019. @conference{8803106, title = {Deeply Supervised Multimodal Attentional Translation Embeddings for Visual Relationship Detection}, author = {N Gkanatsios and V Pitsikalis and P Koutras and A Zlatintsi and P Maragos}, year = {2019}, date = {2019-01-01}, booktitle = {2019 IEEE International Conference on Image Processing (ICIP)}, pages = {1840-1844}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos Tropical Geometry, Mathematical Morphology and Weighted Lattices Conference Mathematical Morphology and Its Applications to Signal and Image Processing, Springer International Publishing, Cham, 2019, ISBN: 978-3-030-20867-7. @conference{10.1007/978-3-030-20867-7_1, title = {Tropical Geometry, Mathematical Morphology and Weighted Lattices}, author = {Petros Maragos}, editor = {Bernhard Burgeth and Andreas Kleefeld and Beno{^i}t Naegel and Nicolas Passat and Benjamin Perret}, isbn = {978-3-030-20867-7}, year = {2019}, date = {2019-01-01}, booktitle = {Mathematical Morphology and Its Applications to Signal and Image Processing}, pages = {3--15}, publisher = {Springer International Publishing}, address = {Cham}, abstract = {Mathematical Morphology and Tropical Geometry share the same max/min-plus scalar arithmetic and matrix algebra. In this paper we summarize their common ideas and algebraic structure, generalize and extend both of them using weighted lattices and a max-$$backslashstar $$⋆algebra with an arbitrary binary operation $$backslashstar $$⋆that distributes over max, and outline applications to geometry, image analysis, and optimization. Further, we outline the optimal solution of max-$$backslashstar $$⋆equations using weighted lattice adjunctions, and apply it to optimal regression for fitting max-$$backslashstar $$⋆tropical curves on arbitrary data.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mathematical Morphology and Tropical Geometry share the same max/min-plus scalar arithmetic and matrix algebra. In this paper we summarize their common ideas and algebraic structure, generalize and extend both of them using weighted lattices and a max-$$backslashstar $$⋆algebra with an arbitrary binary operation $$backslashstar $$⋆that distributes over max, and outline applications to geometry, image analysis, and optimization. Further, we outline the optimal solution of max-$$backslashstar $$⋆equations using weighted lattice adjunctions, and apply it to optimal regression for fitting max-$$backslashstar $$⋆tropical curves on arbitrary data. |
George Retsinas, Athena Elafrou, Georgios Goumas, Petros Maragos RecNets: Channel-wise Recurrent Convolutional Neural Networks Conference British Machine Vision Conference (BMVC-2019), Cardiff, UK, 2019. @conference{retsinas2019recnets, title = {RecNets: Channel-wise Recurrent Convolutional Neural Networks}, author = {George Retsinas and Athena Elafrou and Georgios Goumas and Petros Maragos}, year = {2019}, date = {2019-01-01}, booktitle = {British Machine Vision Conference (BMVC-2019)}, address = {Cardiff, UK}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Jack Hadfield, Georgia Chalvatzaki, Petros Koutras, Mehdi Khamassi, Costas S Tzafestas, Petros Maragos A Deep Learning Approach for Multi-View Engagement Estimation of Children in a Child-Robot Joint Attention Task Conference 2019 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2019), Macau, China, 2019. @conference{hadfield:hal-02324118, title = {A Deep Learning Approach for Multi-View Engagement Estimation of Children in a Child-Robot Joint Attention Task}, author = {Jack Hadfield and Georgia Chalvatzaki and Petros Koutras and Mehdi Khamassi and Costas S Tzafestas and Petros Maragos}, url = {https://hal.archives-ouvertes.fr/hal-02324118}, year = {2019}, date = {2019-01-01}, booktitle = {2019 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2019)}, address = {Macau, China}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Christos Sakaridis, Nikos Kolotouros, Kimon Drakopoulos, Petros Maragos Chapter 4 - Active contour methods on arbitrary graphs based on partial differential equations Incollection Kimmel, R; Tai, X -C (Ed.): Processing, Analyzing and Learning of Images, Shapes, and Forms: Part 2, 20 , pp. 149-190, Elsevier North-Holland, 2019. @incollection{SKDM19, title = {Chapter 4 - Active contour methods on arbitrary graphs based on partial differential equations}, author = {Christos Sakaridis and Nikos Kolotouros and Kimon Drakopoulos and Petros Maragos}, editor = {R Kimmel and X -C Tai}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_SKDM_ActiveContoursOnGraphsPDEs_chapter4_HNA-20_NH.pdf}, doi = {https://doi.org/10.1016/bs.hna.2019.07.002}, year = {2019}, date = {2019-01-01}, booktitle = {Processing, Analyzing and Learning of Images, Shapes, and Forms: Part 2}, volume = {20}, pages = {149-190}, publisher = {Elsevier North-Holland}, series = {Handbook of Numerical Analysis}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } |
2018 |
A C Dometios, Y Zhou, X S Papageorgiou, C S Tzafestas, T Asfour Vision-Based Online Adaptation of Motion Primitives to Dynamic Surfaces: Application to an Interactive Robotic Wiping Task Journal Article IEEE Robotics and Automation Letters, 3 (3), pp. 1410-1417, 2018. Abstract | BibTeX | Links: [PDF] @article{DZPTA18, title = {Vision-Based Online Adaptation of Motion Primitives to Dynamic Surfaces: Application to an Interactive Robotic Wiping Task}, author = {A C Dometios and Y Zhou and X S Papageorgiou and C S Tzafestas and T Asfour}, url = {http://robotics.ntua.gr/wp-content/publications/Dometios18_Vision-Based_Online_Adaptation_Motion_Primitives.pdf}, doi = {10.1109/LRA.2018.2800031}, year = {2018}, date = {2018-07-01}, journal = {IEEE Robotics and Automation Letters}, volume = {3}, number = {3}, pages = {1410-1417}, abstract = {Elderly or disabled people usually need augmented nursing attention both in home and clinical environments, especially to perform bathing activities. The development of an assistive robotic bath system, which constitutes a central motivation of this work, would increase the independence and safety of this procedure, ameliorating in this way the everyday life for this group of people. In general terms, the main goal of this work is to enable natural, physical human-robot interaction, involving human-friendly and user-adaptive on-line robot motion planning and interaction control. For this purpose, we employ imitation learning using a leader-follower framework called Coordinate Change Dynamic Movement Primitives (CC-DMP), in order to incorporate the expertise of professional carers for bathing sequences. In this letter, we propose a vision-based washing system, combining CC-DMP framework with a perception-based controller, to adapt the motion of robot’s end effector on moving and deformable surfaces, such as a human body part. The controller guarantees globally uniformly asymptotic convergence to the leader movement primitive while ensuring avoidance of restricted areas, such as sensitive skin body areas. We experimentally tested our approach on a setup including the humanoid robot ARMAR-III and a Kinect v2 camera. The robot executes motions learned from the publicly available KIT whole-body human motion database, achieving good tracking performance in challenging interactive task scenarios.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Elderly or disabled people usually need augmented nursing attention both in home and clinical environments, especially to perform bathing activities. The development of an assistive robotic bath system, which constitutes a central motivation of this work, would increase the independence and safety of this procedure, ameliorating in this way the everyday life for this group of people. In general terms, the main goal of this work is to enable natural, physical human-robot interaction, involving human-friendly and user-adaptive on-line robot motion planning and interaction control. For this purpose, we employ imitation learning using a leader-follower framework called Coordinate Change Dynamic Movement Primitives (CC-DMP), in order to incorporate the expertise of professional carers for bathing sequences. In this letter, we propose a vision-based washing system, combining CC-DMP framework with a perception-based controller, to adapt the motion of robot’s end effector on moving and deformable surfaces, such as a human body part. The controller guarantees globally uniformly asymptotic convergence to the leader movement primitive while ensuring avoidance of restricted areas, such as sensitive skin body areas. We experimentally tested our approach on a setup including the humanoid robot ARMAR-III and a Kinect v2 camera. The robot executes motions learned from the publicly available KIT whole-body human motion database, achieving good tracking performance in challenging interactive task scenarios. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas, P Maragos Augmented Human State Estimation Using Interacting Multiple Model Particle Filters With Probabilistic Data Association Journal Article IEEE Robotics and Automation Letters, 3 (3), pp. 1872-1879, 2018, ISSN: 2377-3766. @article{8276229, title = {Augmented Human State Estimation Using Interacting Multiple Model Particle Filters With Probabilistic Data Association}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_ChalvatzakiEtAl_HumanStateEstim-IMM-ParticleFilters-PDA_ieeeRAL.pdf}, doi = {10.1109/LRA.2018.2800084}, issn = {2377-3766}, year = {2018}, date = {2018-07-01}, journal = {IEEE Robotics and Automation Letters}, volume = {3}, number = {3}, pages = {1872-1879}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Ioannis Kordonis, Petros Maragos, George P Papavassilopoulos Stochastic stability in Max-Product and Max-Plus Systems with Markovian Jumps Journal Article Automatica, 92 , pp. 123–132, 2018, ISSN: 00051098. Abstract | BibTeX | Links: [PDF] @article{348, title = {Stochastic stability in Max-Product and Max-Plus Systems with Markovian Jumps}, author = {Ioannis Kordonis and Petros Maragos and George P Papavassilopoulos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KMP_StochStabilityInMPsystemsMarkovJumps_Automatica_preprint.pdf}, doi = {10.1016/j.automatica.2018.03.008}, issn = {00051098}, year = {2018}, date = {2018-01-01}, journal = {Automatica}, volume = {92}, pages = {123--132}, abstract = {We study Max-Product and Max-Plus Systems with Markovian Jumps and focus on stochastic stability problems. At first, a Lyapunov function is derived for the asymptotically stable deterministic Max-Product Systems. This Lyapunov function is then adjusted to derive sufficient conditions for the stochastic stability of Max-Product systems with Markovian Jumps. Many step Lyapunov functions are then used to derive necessary and sufficient conditions for stochastic stability. The results for the Max-Product systems are then applied to Max-Plus systems with Markovian Jumps, using an isomorphism and almost sure bounds for the asymptotic behavior of the state are obtained. A numerical example illustrating the application of the stability results on a production system is also given.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We study Max-Product and Max-Plus Systems with Markovian Jumps and focus on stochastic stability problems. At first, a Lyapunov function is derived for the asymptotically stable deterministic Max-Product Systems. This Lyapunov function is then adjusted to derive sufficient conditions for the stochastic stability of Max-Product systems with Markovian Jumps. Many step Lyapunov functions are then used to derive necessary and sufficient conditions for stochastic stability. The results for the Max-Product systems are then applied to Max-Plus systems with Markovian Jumps, using an isomorphism and almost sure bounds for the asymptotic behavior of the state are obtained. A numerical example illustrating the application of the stability results on a production system is also given. |
Lampros Flokas, Petros Maragos Online Wideband Spectrum Sensing Using Sparsity Journal Article IEEE Journal of Selected Topics in Signal Processing, 12 (1), pp. 35–44, 2018, ISSN: 19324553. Abstract | BibTeX | Links: [PDF] @article{349, title = {Online Wideband Spectrum Sensing Using Sparsity}, author = {Lampros Flokas and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/FlokasMaragos_OnlineWideSpectrumSensingUsingSparsity_JSTSP_preprint.pdf}, doi = {10.1109/JSTSP.2018.2797422}, issn = {19324553}, year = {2018}, date = {2018-01-01}, journal = {IEEE Journal of Selected Topics in Signal Processing}, volume = {12}, number = {1}, pages = {35--44}, abstract = {Wideband spectrum sensing is an essential part of cognitive radio systems. Exact spectrum estimation is usually inefficient as it requires sampling rates at or above the Nyquist rate. Using prior information on the structure of the signal could allow near exact reconstruction at much lower sampling rates. Sparsity of the sampled signal in the frequency domain is one of the popular priors studied for cognitive radio applications. Reconstruction of signals under sparsity assumptions has been studied rigorously by researchers in the field of Compressed Sensing (CS). CS algorithms that operate on batches of samples are known to be robust but can be computationally costly, making them unsuitable for cheap low power cognitive radio devices that require spectrum sensing in real time. On the other hand, online algorithms that are based on variations of the Least Mean Squares (LMS) algorithm have very simple updates so they are computationally efficient and can easily adapt in real time to changes of the underlying spectrum. In this paper we will present two variations of the LMS algorithm that enforce sparsity in the estimated spectrum given an upper bound on the number of non- zero coefficients. Assuming that the number of non-zero elements in the spectrum is known we show that under conditions the hard threshold operation can only reduce the error of our estimation. We will also show that we can estimate the number of non-zero elements of the spectrum at each iteration based on our online estimations. Finally, we numerically compare our algorithm with other online sparsity-inducing algorithms in the literature.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Wideband spectrum sensing is an essential part of cognitive radio systems. Exact spectrum estimation is usually inefficient as it requires sampling rates at or above the Nyquist rate. Using prior information on the structure of the signal could allow near exact reconstruction at much lower sampling rates. Sparsity of the sampled signal in the frequency domain is one of the popular priors studied for cognitive radio applications. Reconstruction of signals under sparsity assumptions has been studied rigorously by researchers in the field of Compressed Sensing (CS). CS algorithms that operate on batches of samples are known to be robust but can be computationally costly, making them unsuitable for cheap low power cognitive radio devices that require spectrum sensing in real time. On the other hand, online algorithms that are based on variations of the Least Mean Squares (LMS) algorithm have very simple updates so they are computationally efficient and can easily adapt in real time to changes of the underlying spectrum. In this paper we will present two variations of the LMS algorithm that enforce sparsity in the estimated spectrum given an upper bound on the number of non- zero coefficients. Assuming that the number of non-zero elements in the spectrum is known we show that under conditions the hard threshold operation can only reduce the error of our estimation. We will also show that we can estimate the number of non-zero elements of the spectrum at each iteration based on our online estimations. Finally, we numerically compare our algorithm with other online sparsity-inducing algorithms in the literature. |
Mehdi Khamassi, George Velentzas, Theodore Tsitsimis, Costas Tzafestas Robot fast adaptation to changes in human engagement during simulated dynamic social interaction with active exploration in parameterized reinforcement learning Journal Article IEEE Transactions on Cognitive and Developmental Systems, 10 , pp. 881 - 893, 2018. Abstract | BibTeX | Links: [PDF] @article{BFB99, title = {Robot fast adaptation to changes in human engagement during simulated dynamic social interaction with active exploration in parameterized reinforcement learning}, author = {Mehdi Khamassi and George Velentzas and Theodore Tsitsimis and Costas Tzafestas}, url = {http://robotics.ntua.gr/wp-content/publications/Khamassi_TCDS2018.pdf}, doi = {10.1109/TCDS.2018.2843122}, year = {2018}, date = {2018-01-01}, journal = { IEEE Transactions on Cognitive and Developmental Systems}, volume = {10}, pages = {881 - 893}, publisher = {IEEE}, abstract = {Dynamic uncontrolled human-robot interactions (HRI) require robots to be able to adapt to changes in the human’s behavior and intentions. Among relevant signals, non-verbal cues such as the human’s gaze can provide the robot with important information about the human’s current engagement in the task, and whether the robot should continue its current behavior or not. However, robot reinforcement learning (RL) abilities to adapt to these non-verbal cues are still underdeveloped. Here we propose an active exploration algorithm for RL during HRI where the reward function is the weighted sum of the human’s current engagement and variations of this engagement. We use a parameterized action space where a meta-learning algorithm is applied to simultaneously tune the exploration in discrete action space (e.g. moving an object) and in the space of continuous characteristics of movement (e.g. velocity, direction, strength, expressivity). We first show that this algorithm reaches state-of-the-art performance in the non-stationary multi-armed bandit paradigm. We then apply it to a simulated HRI task, and show that it outperforms continuous parameterized RL with either passive or active exploration based on different existing methods. We finally test the performance in a more realistic test of the same HRI task, where a practical approach is followed to estimate human engagement through visual cues of the head pose. The algorithm can detect and adapt to perturbations in human engagement with different durations. Altogether, these results suggest a novel efficient and robust framework for robot learning during dynamic HRI scenarios.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Dynamic uncontrolled human-robot interactions (HRI) require robots to be able to adapt to changes in the human’s behavior and intentions. Among relevant signals, non-verbal cues such as the human’s gaze can provide the robot with important information about the human’s current engagement in the task, and whether the robot should continue its current behavior or not. However, robot reinforcement learning (RL) abilities to adapt to these non-verbal cues are still underdeveloped. Here we propose an active exploration algorithm for RL during HRI where the reward function is the weighted sum of the human’s current engagement and variations of this engagement. We use a parameterized action space where a meta-learning algorithm is applied to simultaneously tune the exploration in discrete action space (e.g. moving an object) and in the space of continuous characteristics of movement (e.g. velocity, direction, strength, expressivity). We first show that this algorithm reaches state-of-the-art performance in the non-stationary multi-armed bandit paradigm. We then apply it to a simulated HRI task, and show that it outperforms continuous parameterized RL with either passive or active exploration based on different existing methods. We finally test the performance in a more realistic test of the same HRI task, where a practical approach is followed to estimate human engagement through visual cues of the head pose. The algorithm can detect and adapt to perturbations in human engagement with different durations. Altogether, these results suggest a novel efficient and robust framework for robot learning during dynamic HRI scenarios. |
N. Efthymiou, P. Koutras, P. ~P. Filntisis, G. Potamianos, P. Maragos MULTI-VIEW FUSION FOR ACTION RECOGNITION IN CHILD-ROBOT INTERACTION Conference Proc. IEEE Int'l Conf. on Image Processing, Athens, Greece, 2018. Abstract | BibTeX | Links: [PDF] @conference{efthymiou18action, title = {MULTI-VIEW FUSION FOR ACTION RECOGNITION IN CHILD-ROBOT INTERACTION}, author = { N. Efthymiou and P. Koutras and P. ~P. Filntisis and G. Potamianos and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/EfthymiouKoutrasFilntisis_MultiViewFusActRecognChildRobotInteract_ICIP18.pdf}, year = {2018}, date = {2018-10-01}, booktitle = {Proc. IEEE Int'l Conf. on Image Processing}, address = {Athens, Greece}, abstract = {Answering the challenge of leveraging computer vision methods in order to enhance Human Robot Interaction (HRI) experience, this work explores methods that can expand the capabilities of an action recognition system in such tasks. A multi-view action recognition system is proposed for integration in HRI scenarios with special users, such as children, in which there is limited data for training and many state-of-the-art techniques face difficulties. Different feature extraction approaches, encoding methods and fusion techniques are combined and tested in order to create an efficient system that recognizes children pantomime actions. This effort culminates in the integration of a robotic platform and is evaluated under an alluring Children Robot Interaction scenario.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Answering the challenge of leveraging computer vision methods in order to enhance Human Robot Interaction (HRI) experience, this work explores methods that can expand the capabilities of an action recognition system in such tasks. A multi-view action recognition system is proposed for integration in HRI scenarios with special users, such as children, in which there is limited data for training and many state-of-the-art techniques face difficulties. Different feature extraction approaches, encoding methods and fusion techniques are combined and tested in order to create an efficient system that recognizes children pantomime actions. This effort culminates in the integration of a robotic platform and is evaluated under an alluring Children Robot Interaction scenario. |
J. Hadfield, P. Koutras, N. Efthymiou, G. Potamianos, C.S. Tzafestas, P. Maragos Object Assembly Guidance in Child-Robot Interaction using RGB-D based 3D Tracking Conference Proc. of 2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2018), Madrid, Spain, 2018. @conference{Hadfield2018, title = {Object Assembly Guidance in Child-Robot Interaction using RGB-D based 3D Tracking}, author = {J. Hadfield and P. Koutras and N. Efthymiou and G. Potamianos and C.S. Tzafestas and P. Maragos}, year = {2018}, date = {2018-10-01}, booktitle = {Proc. of 2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2018)}, journal = {Proc. of 2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2018)}, address = {Madrid, Spain}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A. Zlatintsi, P.P. Filntisis, C. Garoufis, A. Tsiami, K. Kritsis, M.A. Kaliakatsos-Papakostas, A. Gkiokas, V. Katsouros, P. Maragos A Web-based Real-Time Kinect Application for Gestural Interaction with Virtual Musical Instruments Conference Proc. of Audio Mostly Conference (AM’18), Wrexham, North Wales, UK, 2018. @conference{Zlatintsi2018, title = {A Web-based Real-Time Kinect Application for Gestural Interaction with Virtual Musical Instruments}, author = {A. Zlatintsi and P.P. Filntisis and C. Garoufis and A. Tsiami and K. Kritsis and M.A. Kaliakatsos-Papakostas and A. Gkiokas and V. Katsouros and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ZlatintsiEtAl_WebBasedRealTimeKinectAppGestInteractVMI_ΑΜ18-1.pdf}, year = {2018}, date = {2018-09-01}, booktitle = {Proc. of Audio Mostly Conference (AM’18)}, address = {Wrexham, North Wales, UK}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Garoufis, A. Zlatintsi, P. Maragos A Collaborative System for Composing Music via Motion Using a Kinect Sensor and Skeletal Data Conference Proc. 15th International Sound & Music Computing Conference (SMC-2018), Limmesol, Cyprus, 2018. @conference{Garoufis2018, title = {A Collaborative System for Composing Music via Motion Using a Kinect Sensor and Skeletal Data}, author = {C. Garoufis and A. Zlatintsi and P. Maragos}, year = {2018}, date = {2018-07-01}, booktitle = {Proc. 15th International Sound & Music Computing Conference (SMC-2018)}, address = {Limmesol, Cyprus}, abstract = {This paper describes MoveSynth, a performance system for two players, who interact with it and collaborate with each other in various ways, including full-body movements, arm postures and continuous gestures, to compose music in real time. The system uses a Kinect sensor, in order to track the performers’ positions, as well as their arm and hand movements. In the system’s current state, the musical parameters that the performers can influence include the pitch and the volume of the music, the timbre of the sound, as well as the time interval between successive notes. We extensively experimented using various classifiers in order to detect the one that gives the optimal results regarding the task of continuous gesture and arm posture recognition, accomplishing 92.11% for continuous gestures and 99.33% for arm postures, using an 1-NN classifier with a condensed search space in both cases. Additionally, the qualitative results of the usability testing of the final system, which was performed by 9 users, are encouraging and identify possible avenues for further exploration and improvement.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper describes MoveSynth, a performance system for two players, who interact with it and collaborate with each other in various ways, including full-body movements, arm postures and continuous gestures, to compose music in real time. The system uses a Kinect sensor, in order to track the performers’ positions, as well as their arm and hand movements. In the system’s current state, the musical parameters that the performers can influence include the pitch and the volume of the music, the timbre of the sound, as well as the time interval between successive notes. We extensively experimented using various classifiers in order to detect the one that gives the optimal results regarding the task of continuous gesture and arm posture recognition, accomplishing 92.11% for continuous gestures and 99.33% for arm postures, using an 1-NN classifier with a condensed search space in both cases. Additionally, the qualitative results of the usability testing of the final system, which was performed by 9 users, are encouraging and identify possible avenues for further exploration and improvement. |
X S Papageorgiou, G Chalvatzaki, A Dometios, C S Tzafestas Human-Centered Service Robotic Systems for Assisted Living Conference Proceedings of the 27th International Conference on Robotics in Alpe-Adria Danube Region (RAAD 2018), 2018. Abstract | BibTeX | Links: [PDF] @conference{RAAD2018, title = {Human-Centered Service Robotic Systems for Assisted Living}, author = {X S Papageorgiou and G Chalvatzaki and A Dometios and C S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/publications/RAAD2018.pdf}, year = {2018}, date = {2018-06-01}, booktitle = {Proceedings of the 27th International Conference on Robotics in Alpe-Adria Danube Region (RAAD 2018)}, abstract = {Mobility impairment is a common problem for the elderly population which relates to difficulties in performing Activities of Daily Living (ADLs) and consequently leads to restrictions and the degradation of the living standards of the elders. When designing a user-friendly assistive device for mobility constrained people, the variable spectrum of disabilities is a factor that should affect the design process, since people with different impairments have different needs to be covered by the device, thus an adaptive behavior of those systems is necessary. Also, the performance of bathing activities includes several challenges for the elderly people, since such tasks require body flexibility. In this paper, we present current frameworks and solutions for intelligent robotic systems for assistive living involving human robot interaction in a natural interface. Our aim is to build such systems, in order to increase the independence and safety of these procedures. To achieve human - robot interaction in a natural way, we have to adapt the expertise of carers regarding bathing motions and walking assistance. The main goal of this work is to present recent research results towards the development of two real-life use cases incorporating intelligent robotic systems, aiming to support mobility and bathing activities for the elderly in order to provide context-aware and user-adaptive assistance.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mobility impairment is a common problem for the elderly population which relates to difficulties in performing Activities of Daily Living (ADLs) and consequently leads to restrictions and the degradation of the living standards of the elders. When designing a user-friendly assistive device for mobility constrained people, the variable spectrum of disabilities is a factor that should affect the design process, since people with different impairments have different needs to be covered by the device, thus an adaptive behavior of those systems is necessary. Also, the performance of bathing activities includes several challenges for the elderly people, since such tasks require body flexibility. In this paper, we present current frameworks and solutions for intelligent robotic systems for assistive living involving human robot interaction in a natural interface. Our aim is to build such systems, in order to increase the independence and safety of these procedures. To achieve human - robot interaction in a natural way, we have to adapt the expertise of carers regarding bathing motions and walking assistance. The main goal of this work is to present recent research results towards the development of two real-life use cases incorporating intelligent robotic systems, aiming to support mobility and bathing activities for the elderly in order to provide context-aware and user-adaptive assistance. |
G Bouritsas, P Koutras, A Zlatintsi, Petros Maragos Multimodal Visual Concept Learning with Weakly Supervised Techniques Conference Proc. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Salt Lake City, Utah, USA, 2018. Abstract | BibTeX | Links: [PDF] @conference{BKA+18, title = {Multimodal Visual Concept Learning with Weakly Supervised Techniques}, author = {G Bouritsas and P Koutras and A Zlatintsi and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_BKZM_MultimodalVisualConceptLearningWeaklySupervisedTechniques_CVPR.pdf}, year = {2018}, date = {2018-06-01}, booktitle = {Proc. IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, address = { Salt Lake City, Utah, USA}, abstract = {Despite the availability of a huge amount of video data accompanied by descriptive texts, it is not always easy to exploit the information contained in natural language in order to automatically recognize video concepts. Towards this goal, in this paper we use textual cues as means of supervision, introducing two weakly supervised techniques that extend the Multiple Instance Learning (MIL) framework: the Fuzzy Sets Multiple Instance Learning (FSMIL) and the Probabilistic Labels Multiple Instance Learning (PLMIL). The former encodes the spatio-temporal imprecision of the linguistic descriptions with Fuzzy Sets, while the latter models different interpretations of each description’s semantics with Probabilistic Labels, both formulated through a convex optimization algorithm. In addition, we provide a novel technique to extract weak labels in the presence of complex semantics, that consists of semantic similarity computations. We evaluate our methods on two distinct problems, namely face and action recognition, in the challenging and realistic setting of movies accompanied by their screenplays, contained in the COGNIMUSE database. We show that, on both tasks, our method considerably outperforms a state-of-the-art weakly supervised approach, as well as other baselines.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Despite the availability of a huge amount of video data accompanied by descriptive texts, it is not always easy to exploit the information contained in natural language in order to automatically recognize video concepts. Towards this goal, in this paper we use textual cues as means of supervision, introducing two weakly supervised techniques that extend the Multiple Instance Learning (MIL) framework: the Fuzzy Sets Multiple Instance Learning (FSMIL) and the Probabilistic Labels Multiple Instance Learning (PLMIL). The former encodes the spatio-temporal imprecision of the linguistic descriptions with Fuzzy Sets, while the latter models different interpretations of each description’s semantics with Probabilistic Labels, both formulated through a convex optimization algorithm. In addition, we provide a novel technique to extract weak labels in the presence of complex semantics, that consists of semantic similarity computations. We evaluate our methods on two distinct problems, namely face and action recognition, in the challenging and realistic setting of movies accompanied by their screenplays, contained in the COGNIMUSE database. We show that, on both tasks, our method considerably outperforms a state-of-the-art weakly supervised approach, as well as other baselines. |
E. Theodosis, P. Maragos Analysis of the Viterbi Algorithm Using Tropical Algebra and Geometry Conference Proc. IEEE International Workshop on Signal Processing Advances in Wireless Communications (SPAWC-18), Kalamata, Greece,, 2018. @conference{Theodosis2018, title = {Analysis of the Viterbi Algorithm Using Tropical Algebra and Geometry}, author = {E. Theodosis and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_TheodosisMaragos_AnalysisViterbi-TropicalAlgebraGeometry_SPAWC.pdf}, year = {2018}, date = {2018-06-01}, booktitle = {Proc. IEEE International Workshop on Signal Processing Advances in Wireless Communications (SPAWC-18)}, address = {Kalamata, Greece,}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A Tsiami, P Koutras, Niki Efthymiou, P P Filntisis, G Potamianos, P Maragos Multi3: Multi-sensory Perception System for Multi-modal Child Interaction with Multiple Robots Conference IEEE International Conference on Robotics and Automation (ICRA), Brisbane, Australia, 2018. Abstract | BibTeX | Links: [PDF] @conference{multi3, title = {Multi3: Multi-sensory Perception System for Multi-modal Child Interaction with Multiple Robots}, author = {A Tsiami and P Koutras and Niki Efthymiou and P P Filntisis and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/2018_TsiamiEtAl_Multi3-MultisensorMultimodalChildInteractMultRobots_ICRA.pdf}, year = {2018}, date = {2018-05-01}, booktitle = {IEEE International Conference on Robotics and Automation (ICRA)}, address = {Brisbane, Australia}, abstract = {Child-robot interaction is an interdisciplinary research area that has been attracting growing interest, primarily focusing on edutainment applications. A crucial factor to the successful deployment and wide adoption of such applications remains the robust perception of the child's multimodal actions, when interacting with the robot in a natural and untethered fashion. Since robotic sensory and perception capabilities are platform-dependent and most often rather limited, we propose a multiple Kinect-based system to perceive the child-robot interaction scene that is robot-independent and suitable for indoors interaction scenarios. The audio-visual input from the Kinect sensors is fed into speech, gesture, and action recognition modules, appropriately developed in this paper to address the challenging nature of child-robot interaction. For this purpose, data from multiple children are collected and used for module training or adaptation. Further, information from the multiple sensors is fused to enhance module performance. The perception system is integrated in a modular multi-robot architecture demonstrating its flexibility and scalability with different robotic platforms. The whole system, called Multi3, is evaluated, both objectively at the module level and subjectively in its entirety, under appropriate child-robot interaction scenarios containing several carefully designed games between children and robots.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Child-robot interaction is an interdisciplinary research area that has been attracting growing interest, primarily focusing on edutainment applications. A crucial factor to the successful deployment and wide adoption of such applications remains the robust perception of the child's multimodal actions, when interacting with the robot in a natural and untethered fashion. Since robotic sensory and perception capabilities are platform-dependent and most often rather limited, we propose a multiple Kinect-based system to perceive the child-robot interaction scene that is robot-independent and suitable for indoors interaction scenarios. The audio-visual input from the Kinect sensors is fed into speech, gesture, and action recognition modules, appropriately developed in this paper to address the challenging nature of child-robot interaction. For this purpose, data from multiple children are collected and used for module training or adaptation. Further, information from the multiple sensors is fused to enhance module performance. The perception system is integrated in a modular multi-robot architecture demonstrating its flexibility and scalability with different robotic platforms. The whole system, called Multi3, is evaluated, both objectively at the module level and subjectively in its entirety, under appropriate child-robot interaction scenarios containing several carefully designed games between children and robots. |
A Zlatintsi, I Rodomagoulakis, P Koutras, A ~C Dometios, V Pitsikalis, C ~S Tzafestas, P Maragos Multimodal Signal Processing and Learning Aspects of Human-Robot Interaction for an Assistive Bathing Robot Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing, Calgary, Canada, 2018. Abstract | BibTeX | Links: [PDF] @conference{ZRK+18, title = {Multimodal Signal Processing and Learning Aspects of Human-Robot Interaction for an Assistive Bathing Robot}, author = {A Zlatintsi and I Rodomagoulakis and P Koutras and A ~C Dometios and V Pitsikalis and C ~S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Zlatintsi+_I-SUPPORT_ICASSP18.pdf}, year = {2018}, date = {2018-04-01}, booktitle = {Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing}, address = {Calgary, Canada}, abstract = {We explore new aspects of assistive living on smart human-robot interaction (HRI) that involve automatic recognition and online validation of speech and gestures in a natural interface, providing social features for HRI. We introduce a whole framework and resources of a real-life scenario for elderly subjects supported by an assistive bathing robot, addressing health and hygiene care issues. We contribute a new dataset and a suite of tools used for data acquisition and a state-of-the-art pipeline for multimodal learning within the framework of the I-Support bathing robot, with emphasis on audio and RGB-D visual streams. We consider privacy issues by evaluating the depth visual stream along with the RGB, using Kinect sensors. The audio-gestural recognition task on this new dataset yields up to 84.5%, while the online validation of the I-Support system on elderly users accomplishes up to 84% when the two modalities are fused together. The results are promising enough to support further research in the area of multimodal recognition for assistive social HRI, considering the difficulties of the specific task.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We explore new aspects of assistive living on smart human-robot interaction (HRI) that involve automatic recognition and online validation of speech and gestures in a natural interface, providing social features for HRI. We introduce a whole framework and resources of a real-life scenario for elderly subjects supported by an assistive bathing robot, addressing health and hygiene care issues. We contribute a new dataset and a suite of tools used for data acquisition and a state-of-the-art pipeline for multimodal learning within the framework of the I-Support bathing robot, with emphasis on audio and RGB-D visual streams. We consider privacy issues by evaluating the depth visual stream along with the RGB, using Kinect sensors. The audio-gestural recognition task on this new dataset yields up to 84.5%, while the online validation of the I-Support system on elderly users accomplishes up to 84% when the two modalities are fused together. The results are promising enough to support further research in the area of multimodal recognition for assistive social HRI, considering the difficulties of the specific task. |
A Tsiami, P P Filntisis, N Efthymiou, P Koutras, G Potamianos, P Maragos FAR-FIELD AUDIO-VISUAL SCENE PERCEPTION OF MULTI-PARTY HUMAN-ROBOT INTERACTION FOR CHILDREN AND ADULTS Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP), Calgary, Canada, 2018. Abstract | BibTeX | Links: [PDF] @conference{tsiamifar, title = {FAR-FIELD AUDIO-VISUAL SCENE PERCEPTION OF MULTI-PARTY HUMAN-ROBOT INTERACTION FOR CHILDREN AND ADULTS}, author = {A Tsiami and P P Filntisis and N Efthymiou and P Koutras and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/2018_TsiamiEtAl_FarfieldAVperceptionHRI-ChildrenAdults_ICASSP.pdf}, year = {2018}, date = {2018-04-01}, booktitle = {Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP)}, address = {Calgary, Canada}, abstract = {Human-robot interaction (HRI) is a research area of growing interest with a multitude of applications for both children and adult user groups, as, for example, in edutainment and social robotics. Crucial, however, to its wider adoption remains the robust perception of HRI scenes in natural, untethered, and multi-party interaction scenarios, across user groups. Towards this goal, we investigate three focal HRI perception modules operating on data from multiple audio-visual sensors that observe the HRI scene from the far-field, thus bypassing limitations and platform-dependency of contemporary robotic sensing. In particular, the developed modules fuse intra- and/or inter-modality data streams to perform: (i) audio-visual speaker localization; (ii) distant speech recognition; and (iii) visual recognition of hand-gestures. Emphasis is also placed on ensuring high speech and gesture recognition rates for both children and adults. Development and objective evaluation of the three modules is conducted on a corpus of both user groups, collected by our far-field multi-sensory setup, for an interaction scenario of a question-answering ``guess-the-object'' collaborative HRI game with a ``Furhat'' robot. In addition, evaluation of the game incorporating the three developed modules is reported. Our results demonstrate robust far-field audio-visual perception of the multi-party HRI scene.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Human-robot interaction (HRI) is a research area of growing interest with a multitude of applications for both children and adult user groups, as, for example, in edutainment and social robotics. Crucial, however, to its wider adoption remains the robust perception of HRI scenes in natural, untethered, and multi-party interaction scenarios, across user groups. Towards this goal, we investigate three focal HRI perception modules operating on data from multiple audio-visual sensors that observe the HRI scene from the far-field, thus bypassing limitations and platform-dependency of contemporary robotic sensing. In particular, the developed modules fuse intra- and/or inter-modality data streams to perform: (i) audio-visual speaker localization; (ii) distant speech recognition; and (iii) visual recognition of hand-gestures. Emphasis is also placed on ensuring high speech and gesture recognition rates for both children and adults. Development and objective evaluation of the three modules is conducted on a corpus of both user groups, collected by our far-field multi-sensory setup, for an interaction scenario of a question-answering ``guess-the-object'' collaborative HRI game with a ``Furhat'' robot. In addition, evaluation of the game incorporating the three developed modules is reported. Our results demonstrate robust far-field audio-visual perception of the multi-party HRI scene. |
Jack Hadfield, Petros Koutras, Niki Efthymiou, Gerasimos Potamianos, Costas S Tzafestas, Petros Maragos Object assembly guidance in child-robot interaction using RGB-D based 3d tracking Conference 2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), IEEE 2018. @conference{hadfield2018object, title = {Object assembly guidance in child-robot interaction using RGB-D based 3d tracking}, author = {Jack Hadfield and Petros Koutras and Niki Efthymiou and Gerasimos Potamianos and Costas S Tzafestas and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_HadfieldEtAl_ObjectAssemblyGuidance-ChildRobotInteraction_IROS.pdf}, year = {2018}, date = {2018-01-01}, booktitle = {2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {347--354}, organization = {IEEE}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2017 |
P. Maragos Dynamical systems on weighted lattices: general theory, Journal Article Math. Control Signals Syst., 29 (1), 2017. @article{Maragos2017, title = {Dynamical systems on weighted lattices: general theory,}, author = {P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2017_Maragos_DynamicalSystemsOnWeightedLattices_MCSS.pdf}, doi = {10.1007/s00498-017-0207-8}, year = {2017}, date = {2017-12-01}, journal = {Math. Control Signals Syst.}, volume = {29}, number = {1}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
I. Rodomagoulakis, A. Katsamanis, G. Potamianos, P. Giannoulis, A. Tsiami, P. Maragos Room-localized spoken command recognition in multi-room, multi-microphone environments Journal Article Computer Speech & Language, 46 , pp. 419-443, 2017. @article{Rodomagoulakis2017, title = {Room-localized spoken command recognition in multi-room, multi-microphone environments}, author = {I. Rodomagoulakis and A. Katsamanis and G. Potamianos and P. Giannoulis and A. Tsiami and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/RKPGTM_SCR-RoomlocalizedMultiroomMultimic_csl2017.pdf}, year = {2017}, date = {2017-11-01}, journal = {Computer Speech & Language}, volume = {46}, pages = {419-443}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Christos Sakaridis, Kimon Drakopoulos, Petros Maragos Theoretical Analysis of Active Contours on Graphs Journal Article SIAM J. Imaging Sciences, 2017, ISSN: 1936-4954. Abstract | BibTeX | Links: [Webpage] @article{344, title = {Theoretical Analysis of Active Contours on Graphs}, author = {Christos Sakaridis and Kimon Drakopoulos and Petros Maragos}, url = {http://arxiv.org/abs/1610.07381}, doi = {10.1137/16M1100101}, issn = {1936-4954}, year = {2017}, date = {2017-01-01}, journal = {SIAM J. Imaging Sciences}, abstract = {Active contour models based on partial differential equations have proved successful in image segmentation, yet the study of their geometric formulation on arbitrary geometric graphs is still at an early stage. In this paper, we introduce geometric approximations of gradient and curvature, which are used in the geodesic active contour model. We prove convergence in probability of our gradient approximation to the true gradient value and derive an asymptotic upper bound for the error of this approximation for the class of random geometric graphs. Two different approaches for the approximation of curvature are presented and both are also proved to converge in probability in the case of random geometric graphs. We propose neighborhood-based filtering on graphs to improve the accuracy of the aforementioned approximations and define two variants of Gaussian smoothing on graphs which include normalization in order to adapt to graph non-uniformities. The performance of our active contour framework on graphs is demonstrated in the segmentation of regular images and geographical data defined on arbitrary graphs.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Active contour models based on partial differential equations have proved successful in image segmentation, yet the study of their geometric formulation on arbitrary geometric graphs is still at an early stage. In this paper, we introduce geometric approximations of gradient and curvature, which are used in the geodesic active contour model. We prove convergence in probability of our gradient approximation to the true gradient value and derive an asymptotic upper bound for the error of this approximation for the class of random geometric graphs. Two different approaches for the approximation of curvature are presented and both are also proved to converge in probability in the case of random geometric graphs. We propose neighborhood-based filtering on graphs to improve the accuracy of the aforementioned approximations and define two variants of Gaussian smoothing on graphs which include normalization in order to adapt to graph non-uniformities. The performance of our active contour framework on graphs is demonstrated in the segmentation of regular images and geographical data defined on arbitrary graphs. |
Christos G Bampis, Petros Maragos, Alan C Bovik Graph-driven diffusion and random walk schemes for image segmentation Journal Article IEEE Transactions on Image Processing, 26 (1), pp. 35–50, 2017, ISSN: 10577149. Abstract | BibTeX | Links: [PDF] @article{327, title = {Graph-driven diffusion and random walk schemes for image segmentation}, author = {Christos G Bampis and Petros Maragos and Alan C Bovik}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BampisMaragosBovik_GraphDiffusionRandomWalkImageSegment_TIP2017_0.pdf}, doi = {10.1109/TIP.2016.2621663}, issn = {10577149}, year = {2017}, date = {2017-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {26}, number = {1}, pages = {35--50}, abstract = {— We propose graph-driven approaches to image segmentation by developing diffusion processes defined on arbi-trary graphs. We formulate a solution to the image segmentation problem modeled as the result of infectious wavefronts prop-agating on an image-driven graph, where pixels correspond to nodes of an arbitrary graph. By relating the popular susceptible-infected-recovered epidemic propagation model to the Random Walker algorithm, we develop the normalized random walker and a lazy random walker variant. The underlying iterative solutions of these methods are derived as the result of infec-tions transmitted on this arbitrary graph. The main idea is to incorporate a degree-aware term into the original Random Walker algorithm in order to account for the node centrality of every neighboring node and to weigh the contribution of every neighbor to the underlying diffusion process. Our lazy random walk variant models the tendency of patients or nodes to resist changes in their infection status. We also show how previous work can be naturally extended to take advantage of this degree-aware term, which enables the design of other novel methods. Through an extensive experimental analysis, we demonstrate the reliability of our approach, its small computational burden and the dimensionality reduction capabilities of graph-driven approaches. Without applying any regular grid constraint, the proposed graph clustering scheme allows us to consider pixel-level, node-level approaches, and multidimensional input data by naturally integrating the importance of each node to the final clustering or segmentation solution. A software release containing implementations of this paper and supplementary material can be found at: http://cvsp.cs.ntua.gr/research/GraphClustering/.}, keywords = {}, pubstate = {published}, tppubtype = {article} } — We propose graph-driven approaches to image segmentation by developing diffusion processes defined on arbi-trary graphs. We formulate a solution to the image segmentation problem modeled as the result of infectious wavefronts prop-agating on an image-driven graph, where pixels correspond to nodes of an arbitrary graph. By relating the popular susceptible-infected-recovered epidemic propagation model to the Random Walker algorithm, we develop the normalized random walker and a lazy random walker variant. The underlying iterative solutions of these methods are derived as the result of infec-tions transmitted on this arbitrary graph. The main idea is to incorporate a degree-aware term into the original Random Walker algorithm in order to account for the node centrality of every neighboring node and to weigh the contribution of every neighbor to the underlying diffusion process. Our lazy random walk variant models the tendency of patients or nodes to resist changes in their infection status. We also show how previous work can be naturally extended to take advantage of this degree-aware term, which enables the design of other novel methods. Through an extensive experimental analysis, we demonstrate the reliability of our approach, its small computational burden and the dimensionality reduction capabilities of graph-driven approaches. Without applying any regular grid constraint, the proposed graph clustering scheme allows us to consider pixel-level, node-level approaches, and multidimensional input data by naturally integrating the importance of each node to the final clustering or segmentation solution. A software release containing implementations of this paper and supplementary material can be found at: http://cvsp.cs.ntua.gr/research/GraphClustering/. |
Panagiotis Paraskevas Filntisis, Athanasios Katsamanis, Pirros Tsiakoulis, Petros Maragos Video-realistic expressive audio-visual speech synthesis for the Greek language Journal Article Speech Communication, 95 , pp. 137–152, 2017, ISSN: 01676393. Abstract | BibTeX | Links: [PDF] @article{345, title = {Video-realistic expressive audio-visual speech synthesis for the Greek language}, author = {Panagiotis Paraskevas Filntisis and Athanasios Katsamanis and Pirros Tsiakoulis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/FilntisisKatsamanisTsiakoulis+_VideoRealExprAudioVisSpeechSynthGrLang_SC17.pdf}, doi = {10.1016/j.specom.2017.08.011}, issn = {01676393}, year = {2017}, date = {2017-01-01}, journal = {Speech Communication}, volume = {95}, pages = {137--152}, abstract = {High quality expressive speech synthesis has been a long-standing goal towards natural human-computer interaction. Generating a talking head which is both realistic and expressive appears to be a considerable challenge, due to both the high complexity in the acoustic and visual streams and the large non-discrete number of emotional states we would like the talking head to be able to express. In order to cover all the desired emotions, a significant amount of data is required, which poses an additional time-consuming data collection challenge. In this paper we attempt to address the aforementioned problems in an audio-visual context. Towards this goal, we propose two deep neural network (DNN) architectures for Video-realistic Expressive Audio-Visual Text-To-Speech synthesis (EAVTTS) and evaluate them by comparing them directly both to traditional hidden Markov model (HMM) based EAVTTS, as well as a concatenative unit selection EAVTTS approach, both on the realism and the expressiveness of the generated talking head. Next, we investigate adaptation and interpolation techniques to address the problem of covering the large emotional space. We use HMM interpolation in order to generate different levels of intensity for an emotion, as well as investigate whether it is possible to generate speech with intermediate speaking styles between two emotions. In addition, we employ HMM adaptation to adapt an HMM-based system to another emotion using only a limited amount of adaptation data from the target emotion. We performed an extensive experimental evaluation on a medium sized audio-visual corpus covering three emotions, namely anger, sadness and happiness, as well as neutral reading style. Our results show that DNN-based models outperform HMMs and unit selection on both the realism and expressiveness of the generated talking heads, while in terms of adaptation we can successfully adapt an audio-visual HMM set trained on a neutral speaking style database to a target emotion. Finally, we show that HMM interpolation can indeed generate different levels of intensity for EAVTTS by interpolating an emotion with the neutral reading style, as well as in some cases, generate audio-visual speech with intermediate expressions between two emotions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } High quality expressive speech synthesis has been a long-standing goal towards natural human-computer interaction. Generating a talking head which is both realistic and expressive appears to be a considerable challenge, due to both the high complexity in the acoustic and visual streams and the large non-discrete number of emotional states we would like the talking head to be able to express. In order to cover all the desired emotions, a significant amount of data is required, which poses an additional time-consuming data collection challenge. In this paper we attempt to address the aforementioned problems in an audio-visual context. Towards this goal, we propose two deep neural network (DNN) architectures for Video-realistic Expressive Audio-Visual Text-To-Speech synthesis (EAVTTS) and evaluate them by comparing them directly both to traditional hidden Markov model (HMM) based EAVTTS, as well as a concatenative unit selection EAVTTS approach, both on the realism and the expressiveness of the generated talking head. Next, we investigate adaptation and interpolation techniques to address the problem of covering the large emotional space. We use HMM interpolation in order to generate different levels of intensity for an emotion, as well as investigate whether it is possible to generate speech with intermediate speaking styles between two emotions. In addition, we employ HMM adaptation to adapt an HMM-based system to another emotion using only a limited amount of adaptation data from the target emotion. We performed an extensive experimental evaluation on a medium sized audio-visual corpus covering three emotions, namely anger, sadness and happiness, as well as neutral reading style. Our results show that DNN-based models outperform HMMs and unit selection on both the realism and expressiveness of the generated talking heads, while in terms of adaptation we can successfully adapt an audio-visual HMM set trained on a neutral speaking style database to a target emotion. Finally, we show that HMM interpolation can indeed generate different levels of intensity for EAVTTS by interpolating an emotion with the neutral reading style, as well as in some cases, generate audio-visual speech with intermediate expressions between two emotions. |
A Zlatintsi, P Koutras, G Evangelopoulos, N Malandrakis, N Efthymiou, K Pastra, A Potamianos, P Maragos COGNIMUSE: a multimodal video database annotated with saliency, events, semantics and emotion with application to summarization Journal Article EURASIP Journal on Image and Video Processing, 54 , pp. 1–24, 2017. Abstract | BibTeX | Links: [PDF] @article{ZKE+17, title = {COGNIMUSE: a multimodal video database annotated with saliency, events, semantics and emotion with application to summarization}, author = {A Zlatintsi and P Koutras and G Evangelopoulos and N Malandrakis and N Efthymiou and K Pastra and A Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Zlatintsi+_COGNIMUSEdb_EURASIP_JIVP-2017.pdf}, doi = {doi 10.1186/s13640-017-0194}, year = {2017}, date = {2017-01-01}, journal = {EURASIP Journal on Image and Video Processing}, volume = {54}, pages = {1--24}, abstract = {Research related to computational modeling for machine-based understanding requires ground truth data for training, content analysis, and evaluation. In this paper, we present a multimodal video database, namely COGNIMUSE, annotated with sensory and semantic saliency, events, cross-media semantics, and emotion. The purpose of this database is manifold; it can be used for training and evaluation of event detection and summarization algorithms, for classification and recognition of audio-visual and cross-media events, as well as for emotion tracking. In order to enable comparisons with other computational models, we propose state-of-the-art algorithms, specifically a unified energy-based audio-visual framework and a method for text saliency computation, for the detection of perceptually salient events from videos. Additionally, a movie summarization system for the automatic production of summaries is presented. Two kinds of evaluation were performed, an objective based on the saliency annotation of the database and an extensive qualitative human evaluation of the automatically produced summaries, where we investigated what composes high-quality movie summaries, where both methods verified the appropriateness of the proposed methods. The annotation of the database and the code for the summarization system can be found at http://cognimuse.cs.ntua.gr/database.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Research related to computational modeling for machine-based understanding requires ground truth data for training, content analysis, and evaluation. In this paper, we present a multimodal video database, namely COGNIMUSE, annotated with sensory and semantic saliency, events, cross-media semantics, and emotion. The purpose of this database is manifold; it can be used for training and evaluation of event detection and summarization algorithms, for classification and recognition of audio-visual and cross-media events, as well as for emotion tracking. In order to enable comparisons with other computational models, we propose state-of-the-art algorithms, specifically a unified energy-based audio-visual framework and a method for text saliency computation, for the detection of perceptually salient events from videos. Additionally, a movie summarization system for the automatic production of summaries is presented. Two kinds of evaluation were performed, an objective based on the saliency annotation of the database and an extensive qualitative human evaluation of the automatically produced summaries, where we investigated what composes high-quality movie summaries, where both methods verified the appropriateness of the proposed methods. The annotation of the database and the code for the summarization system can be found at http://cognimuse.cs.ntua.gr/database. |
P Maragos Dynamical Systems on Weighted Lattices: General Theory Journal Article Math. Control Signals Syst., 29 (21), 2017. BibTeX | Links: @article{Mara17, title = {Dynamical Systems on Weighted Lattices: General Theory}, author = {P Maragos}, doi = {10.1007/s00498-017-0207-8}, year = {2017}, date = {2017-01-01}, journal = {Math. Control Signals Syst.}, volume = {29}, number = {21}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas Towards a user-adaptive context-aware robotic walker with a pathological gait assessment system: First experimental study Conference IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2017. Abstract | BibTeX | Links: [PDF] @conference{CPT17, title = {Towards a user-adaptive context-aware robotic walker with a pathological gait assessment system: First experimental study}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/publications/CPT17.pdf}, doi = {10.1109/IROS.2017.8206388}, year = {2017}, date = {2017-09-01}, booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {5037-5042}, abstract = {When designing a user-friendly Mobility Assistive Device (MAD) for mobility constrained people, it is important to take into account the diverse spectrum of disabilities, which results to completely different needs to be covered by the MAD for each specific user. An intelligent adaptive behavior is necessary. In this work we present experimental results, using an in house developed methodology for assessing the gait of users with different mobility status while interacting with a robotic MAD. We use data from a laser scanner, mounted on the MAD to track the legs using Particle Filters and Probabilistic Data Association (PDA-PF). The legs' states are fed to an HMM-based pathological gait cycle recognition system to compute in real-time the gait parameters that are crucial for the mobility status characterization of the user. We aim to show that a gait assessment system would be an important feedback for an intelligent MAD. Thus, we use this system to compare the gaits of the subjects using two different control settings of the MAD and we experimentally validate the ability of our system to recognize the impact of the control designs on the users' walking performance. The results demonstrate that a generic control scheme does not meet every patient's needs, and therefore, an Adaptive Context-Aware MAD (ACA MAD), that can understand the specific needs of the user, is important for enhancing the human-robot physical interaction.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } When designing a user-friendly Mobility Assistive Device (MAD) for mobility constrained people, it is important to take into account the diverse spectrum of disabilities, which results to completely different needs to be covered by the MAD for each specific user. An intelligent adaptive behavior is necessary. In this work we present experimental results, using an in house developed methodology for assessing the gait of users with different mobility status while interacting with a robotic MAD. We use data from a laser scanner, mounted on the MAD to track the legs using Particle Filters and Probabilistic Data Association (PDA-PF). The legs' states are fed to an HMM-based pathological gait cycle recognition system to compute in real-time the gait parameters that are crucial for the mobility status characterization of the user. We aim to show that a gait assessment system would be an important feedback for an intelligent MAD. Thus, we use this system to compare the gaits of the subjects using two different control settings of the MAD and we experimentally validate the ability of our system to recognize the impact of the control designs on the users' walking performance. The results demonstrate that a generic control scheme does not meet every patient's needs, and therefore, an Adaptive Context-Aware MAD (ACA MAD), that can understand the specific needs of the user, is important for enhancing the human-robot physical interaction. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas, P Maragos HMM-based Pathological Gait Analyzer for a User-Adaptive Intelligent Robotic Walker Conference Proc. 25th European Conf.(EUSIPCO-17) Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017. Abstract | BibTeX | Links: [PDF] @conference{CPTM_WML17, title = {HMM-based Pathological Gait Analyzer for a User-Adaptive Intelligent Robotic Walker}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/CPTM_WML17.pdf}, year = {2017}, date = {2017-09-01}, booktitle = {Proc. 25th European Conf.(EUSIPCO-17) Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications"}, address = {Kos, Greece}, abstract = {During the past decade, robotic technology has evolved considerably towards the development of cognitive robotic systems that enable close interaction with humans. Application fields of such novel robotic technologies are now wide spreading covering a variety of human assistance function- alities, aiming in particular at supporting the needs of human beings experiencing various forms of mobility or cognitive impairments. Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. This paper re- ports current research work related to the development of a pathological gait analyzer for intelligent robotic rollator aiming to be an input to a user-adaptive and context-aware robot control architecture. Specifically, we present a novel method for human leg tracking using Particle Filters and Probablistic Data Association from a laser scanner, constituting a non- wearable and non-intrusive approach. The tracked positions and velocities of the user’s legs are the observables of an HMM, which provides the gait phases of the detected gait cycles. Given those phases we compute specific gait parameters, which are used for medical diagnosis. The results of our pathological gait analyzer are validated using ground truth data from a GAITRite system. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior- based robot control system.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } During the past decade, robotic technology has evolved considerably towards the development of cognitive robotic systems that enable close interaction with humans. Application fields of such novel robotic technologies are now wide spreading covering a variety of human assistance function- alities, aiming in particular at supporting the needs of human beings experiencing various forms of mobility or cognitive impairments. Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. This paper re- ports current research work related to the development of a pathological gait analyzer for intelligent robotic rollator aiming to be an input to a user-adaptive and context-aware robot control architecture. Specifically, we present a novel method for human leg tracking using Particle Filters and Probablistic Data Association from a laser scanner, constituting a non- wearable and non-intrusive approach. The tracked positions and velocities of the user’s legs are the observables of an HMM, which provides the gait phases of the detected gait cycles. Given those phases we compute specific gait parameters, which are used for medical diagnosis. The results of our pathological gait analyzer are validated using ground truth data from a GAITRite system. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior- based robot control system. |
A C Dometios, X S Papageorgiou, A Arvanitakis, C S Tzafestas, P Maragos Real-time End-effector Motion Behavior Planning Approach Using On-line Point-cloud Data Towards a User Adaptive Assistive Bath Robot Conference 2017 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), Vancouver, Canada, 2017. Abstract | BibTeX | Links: [PDF] @conference{DPATM17, title = {Real-time End-effector Motion Behavior Planning Approach Using On-line Point-cloud Data Towards a User Adaptive Assistive Bath Robot}, author = {A C Dometios and X S Papageorgiou and A Arvanitakis and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Dometios17_End-Effector_Motion_Behavior_Planning_PointCloud.pdf}, doi = {10.1109/IROS.2017.8206387}, year = {2017}, date = {2017-09-01}, booktitle = {2017 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {5031-5036}, address = {Vancouver, Canada}, abstract = {Elderly people have particular needs in performing bathing activities, since these tasks require body flexibility. Our aim is to build an assistive robotic bath system, in order to increase the independence and safety of this procedure. Towards this end, the expertise of professional carers for bathing sequences and appropriate motions has to be adopted, in order to achieve natural, physical human - robot interaction. In this paper, a real-time end-effector motion planning method for an assistive bath robot, using on-line Point-Cloud information, is proposed. The visual feedback obtained from Kinect depth sensor is employed to adapt suitable washing paths to the user’s body part motion and deformable surface. We make use of a navigation function-based controller, with guarantied globally uniformly asymptotic stability, and bijective transformations for the adaptation of the paths. Experiments were conducted with a rigid rectangular object for validation purposes, while a female subject took part to the experiment in order to evaluate and demonstrate the basic concepts of the proposed methodology.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Elderly people have particular needs in performing bathing activities, since these tasks require body flexibility. Our aim is to build an assistive robotic bath system, in order to increase the independence and safety of this procedure. Towards this end, the expertise of professional carers for bathing sequences and appropriate motions has to be adopted, in order to achieve natural, physical human - robot interaction. In this paper, a real-time end-effector motion planning method for an assistive bath robot, using on-line Point-Cloud information, is proposed. The visual feedback obtained from Kinect depth sensor is employed to adapt suitable washing paths to the user’s body part motion and deformable surface. We make use of a navigation function-based controller, with guarantied globally uniformly asymptotic stability, and bijective transformations for the adaptation of the paths. Experiments were conducted with a rigid rectangular object for validation purposes, while a female subject took part to the experiment in order to evaluate and demonstrate the basic concepts of the proposed methodology. |
A C Dometios, A Tsiami, A Arvanitakis, P Giannoulis, X S Papageorgiou, C S Tzafestas, P Maragos Integrated Speech-based Perception System for User Adaptive Robot Motion Planning in Assistive Bath Scenarios Conference Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017. Abstract | BibTeX | Links: [PDF] @conference{DTAGPTM17, title = {Integrated Speech-based Perception System for User Adaptive Robot Motion Planning in Assistive Bath Scenarios}, author = {A C Dometios and A Tsiami and A Arvanitakis and P Giannoulis and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://www.eurasip.org/Proceedings/Eusipco/Eusipco2017/wpapers/ML5.pdf}, year = {2017}, date = {2017-09-01}, booktitle = {Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications"}, address = {Kos, Greece}, abstract = {Elderly people have augmented needs in performing bathing activities, since these tasks require body flexibility. Our aim is to build an assistive robotic bath system, in order to increase the independence and safety of this procedure. Towards this end, the expertise of professional carers for bathing sequences and appropriate motions have to be adopted, in order to achieve natural, physical human - robot interaction. The integration of the communication and verbal interaction between the user and the robot during the bathing tasks is a key issue for such a challenging assistive robotic application. In this paper, we tackle this challenge by developing a novel integrated real-time speech-based perception system, which will provide the necessary assistance to the frail senior citizens. This system can be suitable for installation and use in conventional home or hospital bathroom space. We employ both a speech recognition system with sub-modules to achieve a smooth and robust human-system communication and a low cost depth camera or end-effector motion planning. With a variety of spoken commands, the system can be adapted to the user’s needs and preferences. The instructed by the user washing commands are executed by a robotic manipulator, demonstrating the progress of each task. The smooth integration of ll subsystems is accomplished by a modular and hierarchical decision architecture organized as a Behavior Tree. The system was experimentally tested by successful execution of scenarios from different users with different preferences.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Elderly people have augmented needs in performing bathing activities, since these tasks require body flexibility. Our aim is to build an assistive robotic bath system, in order to increase the independence and safety of this procedure. Towards this end, the expertise of professional carers for bathing sequences and appropriate motions have to be adopted, in order to achieve natural, physical human - robot interaction. The integration of the communication and verbal interaction between the user and the robot during the bathing tasks is a key issue for such a challenging assistive robotic application. In this paper, we tackle this challenge by developing a novel integrated real-time speech-based perception system, which will provide the necessary assistance to the frail senior citizens. This system can be suitable for installation and use in conventional home or hospital bathroom space. We employ both a speech recognition system with sub-modules to achieve a smooth and robust human-system communication and a low cost depth camera or end-effector motion planning. With a variety of spoken commands, the system can be adapted to the user’s needs and preferences. The instructed by the user washing commands are executed by a robotic manipulator, demonstrating the progress of each task. The smooth integration of ll subsystems is accomplished by a modular and hierarchical decision architecture organized as a Behavior Tree. The system was experimentally tested by successful execution of scenarios from different users with different preferences. |
G. Velentzas, C. Tzafestas, M. Khamassi Bio-inspired meta-learning for active exploration during non-stationary multi-armed bandit tasks Conference Proc. IEEE Intelligent Systems Conference, London, UK, 2017. Abstract | BibTeX | Links: [PDF] @conference{BFB97, title = {Bio-inspired meta-learning for active exploration during non-stationary multi-armed bandit tasks}, author = {G. Velentzas and C. Tzafestas and M. Khamassi}, url = {http://robotics.ntua.gr/wp-content/publications/Velentzas_Intellisys2017.pdf}, doi = {10.1109/IntelliSys.2017.8324365}, year = {2017}, date = {2017-09-01}, booktitle = {Proc. IEEE Intelligent Systems Conference}, address = {London, UK}, abstract = {Fast adaptation to changes in the environment requires agents (animals, robots and simulated artefacts) to be able to dynamically tune an exploration-exploitation trade-off during learning. This trade-off usually determines a fixed proportion of exploitative choices (i.e. choice of the action that subjectively appears as best at a given moment) relative to exploratory choices (i.e. testing other actions that now appear worst but may turn out promising later). Rather than using a fixed proportion, non-stationary multi-armed bandit methods in the field of machine learning have proven that principles such as exploring actions that have not been tested for a long time can lead to performance closer to optimal - bounded regret. In parallel, researches in active exploration in the fields of robot learning and computational neuroscience of learning and decision-making have proposed alternative solutions such as transiently increasing exploration in response to drops in average performance, or attributing exploration bonuses specifically to actions associated with high uncertainty in order to gain information when choosing them. In this work, we compare different methods from machine learning, computational neuroscience and robot learning on a set of non-stationary stochastic multi-armed bandit tasks: abrupt shifts; best bandit becomes worst one and vice versa; multiple shifting frequencies. We find that different methods are appropriate in different scenarios. We propose a new hybrid method combining bio-inspired meta-learning, kalman filter and exploration bonuses and show that it outperforms other methods in these scenarios.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Fast adaptation to changes in the environment requires agents (animals, robots and simulated artefacts) to be able to dynamically tune an exploration-exploitation trade-off during learning. This trade-off usually determines a fixed proportion of exploitative choices (i.e. choice of the action that subjectively appears as best at a given moment) relative to exploratory choices (i.e. testing other actions that now appear worst but may turn out promising later). Rather than using a fixed proportion, non-stationary multi-armed bandit methods in the field of machine learning have proven that principles such as exploring actions that have not been tested for a long time can lead to performance closer to optimal - bounded regret. In parallel, researches in active exploration in the fields of robot learning and computational neuroscience of learning and decision-making have proposed alternative solutions such as transiently increasing exploration in response to drops in average performance, or attributing exploration bonuses specifically to actions associated with high uncertainty in order to gain information when choosing them. In this work, we compare different methods from machine learning, computational neuroscience and robot learning on a set of non-stationary stochastic multi-armed bandit tasks: abrupt shifts; best bandit becomes worst one and vice versa; multiple shifting frequencies. We find that different methods are appropriate in different scenarios. We propose a new hybrid method combining bio-inspired meta-learning, kalman filter and exploration bonuses and show that it outperforms other methods in these scenarios. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas, P Maragos Estimating double support in pathological gaits using an HMM-based analyzer for an intelligent robotic walker Conference IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN), 2017. Abstract | BibTeX | Links: [PDF] @conference{CPTM_ROMAN17, title = {Estimating double support in pathological gaits using an HMM-based analyzer for an intelligent robotic walker}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/CPTM_ROMAN17.pdf}, doi = {10.1109/ROMAN.2017.8172287}, year = {2017}, date = {2017-08-01}, booktitle = {IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN)}, pages = {101-106}, abstract = {For a robotic walker designed to assist mobility constrained people, it is important to take into account the different spectrum of pathological walking patterns, which result into completely different needs to be covered for each specific user. For a deployable intelligent assistant robot it is necessary to have a precise gait analysis system, providing real-time monitoring of the user and extracting specific gait parameters, which are associated with the rehabilitation progress and the risk of fall. In this paper, we present a completely non-invasive framework for the on-line analysis of pathological human gait and the recognition of specific gait phases and events. The performance of this gait analysis system is assessed, in particular, as related to the estimation of double support phases, which are typically difficult to extract reliably, especially when applying non-wearable and non-intrusive technologies. Furthermore, the duration of double support phases constitutes an important gait parameter and a critical indicator in pathological gait patterns. The performance of this framework is assessed using real data collected from an ensemble of elderly persons with different pathologies. The estimated gait parameters are experimentally validated using ground truth data provided by a Motion Capture system. The results obtained and presented in this paper demonstrate that the proposed human data analysis (modeling, learning and inference) framework has the potential to support efficient detection and classification of specific walking pathologies, as needed to empower a cognitive robotic mobility-assistance device with user-adaptive and context-aware functionalities.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } For a robotic walker designed to assist mobility constrained people, it is important to take into account the different spectrum of pathological walking patterns, which result into completely different needs to be covered for each specific user. For a deployable intelligent assistant robot it is necessary to have a precise gait analysis system, providing real-time monitoring of the user and extracting specific gait parameters, which are associated with the rehabilitation progress and the risk of fall. In this paper, we present a completely non-invasive framework for the on-line analysis of pathological human gait and the recognition of specific gait phases and events. The performance of this gait analysis system is assessed, in particular, as related to the estimation of double support phases, which are typically difficult to extract reliably, especially when applying non-wearable and non-intrusive technologies. Furthermore, the duration of double support phases constitutes an important gait parameter and a critical indicator in pathological gait patterns. The performance of this framework is assessed using real data collected from an ensemble of elderly persons with different pathologies. The estimated gait parameters are experimentally validated using ground truth data provided by a Motion Capture system. The results obtained and presented in this paper demonstrate that the proposed human data analysis (modeling, learning and inference) framework has the potential to support efficient detection and classification of specific walking pathologies, as needed to empower a cognitive robotic mobility-assistance device with user-adaptive and context-aware functionalities. |
Theodore Tsitsimis, George Velentzas, Mehdi Khamassi, Costas Tzafestas Online adaptation to human engagement perturbations in simulated human-robot interaction using hybrid reinforcement learning Conference Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017., Kos, Greece, 2017. Abstract | BibTeX | Links: [PDF] @conference{BFB98, title = {Online adaptation to human engagement perturbations in simulated human-robot interaction using hybrid reinforcement learning}, author = {Theodore Tsitsimis and George Velentzas and Mehdi Khamassi and Costas Tzafestas}, editor = {Michael Aron}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/MultiLearn2017.pdf}, year = {2017}, date = {2017-08-01}, booktitle = {Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017.}, address = {Kos, Greece}, abstract = {Dynamic uncontrolled human-robot interaction requires robots to be able to adapt to changes in the human’s behavior and intentions. Among relevant signals, non-verbal cues such as the human’s gaze can provide the robot with important information about the human’s current engagement in the task, and whether the robot should continue its current behavior or not. In a previous work [1] we proposed an active exploration algorithm for reinforcement learning where the reward function is the weighted sum of the human’s current engagement and variations of this engagement (so that a low but increasing engagement is rewarding). We used a structured (parameterized) continuous action space where a meta-learning algorithm is applied to simultaneously tune the exploration in discrete and continuous action space, enabling the robot to learn which discrete action is expected by the human (e.g. moving an object) and with which velocity of movement. In this paper we want to show the performance of the algorithm to a simulated humanrobot interaction task where a practical approach is followed to estimate human engagement through visual cues of the head pose. We then measure the adaptation of the algorithm to engagement perturbations simulated as changes in the optimal action parameter and we quantify its performance for variations in perturbation duration and measurement noise.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Dynamic uncontrolled human-robot interaction requires robots to be able to adapt to changes in the human’s behavior and intentions. Among relevant signals, non-verbal cues such as the human’s gaze can provide the robot with important information about the human’s current engagement in the task, and whether the robot should continue its current behavior or not. In a previous work [1] we proposed an active exploration algorithm for reinforcement learning where the reward function is the weighted sum of the human’s current engagement and variations of this engagement (so that a low but increasing engagement is rewarding). We used a structured (parameterized) continuous action space where a meta-learning algorithm is applied to simultaneously tune the exploration in discrete and continuous action space, enabling the robot to learn which discrete action is expected by the human (e.g. moving an object) and with which velocity of movement. In this paper we want to show the performance of the algorithm to a simulated humanrobot interaction task where a practical approach is followed to estimate human engagement through visual cues of the head pose. We then measure the adaptation of the algorithm to engagement perturbations simulated as changes in the optimal action parameter and we quantify its performance for variations in perturbation duration and measurement noise. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas, P Maragos Comparative experimental validation of human gait tracking algorithms for an intelligent robotic rollator Conference IEEE International Conference on Robotics and Automation (ICRA), 2017. Abstract | BibTeX | Links: [PDF] @conference{CPTM_ICRA17, title = {Comparative experimental validation of human gait tracking algorithms for an intelligent robotic rollator}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/CPTM_ICRA17.pdf}, doi = {10.1109/ICRA.2017.7989713}, year = {2017}, date = {2017-05-01}, booktitle = {IEEE International Conference on Robotics and Automation (ICRA)}, pages = {6026-6031}, abstract = {Tracking human gait accurately and robustly constitutes a key factor for a smart robotic walker, aiming to provide assistance to patients with different mobility impairment. A context-aware assistive robot needs constant knowledge of the user's kinematic state to assess the gait status and adjust its movement properly to provide optimal assistance. In this work, we experimentally validate the performance of two gait tracking algorithms using data from elderly patients; the first algorithm employs a Kalman Filter (KF), while the second one tracks the user legs separately using two probabilistically associated Particle Filters (PFs). The algorithms are compared according to their accuracy and robustness, using data captured from real experiments, where elderly subjects performed specific walking scenarios with physical assistance from a prototype Robotic Rollator. Sensorial data were provided by a laser rangefinder mounted on the robotic platform recording the movement of the user's legs. The accuracy of the proposed algorithms is analysed and validated with respect to ground truth data provided by a Motion Capture system tracking a set of visual markers worn by the patients. The robustness of the two tracking algorithms is also analysed comparatively in a complex maneuvering scenario. Current experimental findings demonstrate the superior performance of the PFs in difficult cases of occlusions and clutter, where KF tracking often fails.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Tracking human gait accurately and robustly constitutes a key factor for a smart robotic walker, aiming to provide assistance to patients with different mobility impairment. A context-aware assistive robot needs constant knowledge of the user's kinematic state to assess the gait status and adjust its movement properly to provide optimal assistance. In this work, we experimentally validate the performance of two gait tracking algorithms using data from elderly patients; the first algorithm employs a Kalman Filter (KF), while the second one tracks the user legs separately using two probabilistically associated Particle Filters (PFs). The algorithms are compared according to their accuracy and robustness, using data captured from real experiments, where elderly subjects performed specific walking scenarios with physical assistance from a prototype Robotic Rollator. Sensorial data were provided by a laser rangefinder mounted on the robotic platform recording the movement of the user's legs. The accuracy of the proposed algorithms is analysed and validated with respect to ground truth data provided by a Motion Capture system tracking a set of visual markers worn by the patients. The robustness of the two tracking algorithms is also analysed comparatively in a complex maneuvering scenario. Current experimental findings demonstrate the superior performance of the PFs in difficult cases of occlusions and clutter, where KF tracking often fails. |
Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task Conference Proc. IEEE Int'l Conference on Robotic Computing, Taichung, Taiwan, 2017. Abstract | BibTeX | Links: [PDF] @conference{BFB95, title = {Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task}, url = {http://robotics.ntua.gr/wp-content/publications/khamassi_IRC2017.pdf}, doi = {10.1109/IRC.2017.33}, year = {2017}, date = {2017-04-01}, booktitle = {Proc. IEEE Int'l Conference on Robotic Computing}, address = {Taichung, Taiwan}, abstract = {Online model-free reinforcement learning (RL) methods with continuous actions are playing a prominent role when dealing with real-world applications such as Robotics. However, when confronted to non-stationary environments, these methods crucially rely on an exploration-exploitation trade-off which is rarely dynamically and automatically adjusted to changes in the environment. Here we propose an active exploration algorithm for RL in structured (parameterized) continuous action space. This framework deals with a set of discrete actions, each of which is parameterized with continuous variables. Discrete exploration is controlled through a Boltzmann softmax function with an inverse temperature β parameter. In parallel, a Gaussian exploration is applied to the continuous action parameters. We apply a meta-learning algorithm based on the comparison between variations of short-term and long-term reward running averages to simultaneously tune β and the width of the Gaussian distribution from which continuous action parameters are drawn. We first show that this algorithm reaches state-of-the-art performance in the non-stationary multi-armed bandit paradigm, while also being generalizable to continuous actions and multi-step tasks. We then apply it to a simulated human-robot interaction task, and show that it outperforms continuous parameterized RL both without active exploration and with active exploration based on uncertainty variations measured by a Kalman-Q-learning algorithm.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Online model-free reinforcement learning (RL) methods with continuous actions are playing a prominent role when dealing with real-world applications such as Robotics. However, when confronted to non-stationary environments, these methods crucially rely on an exploration-exploitation trade-off which is rarely dynamically and automatically adjusted to changes in the environment. Here we propose an active exploration algorithm for RL in structured (parameterized) continuous action space. This framework deals with a set of discrete actions, each of which is parameterized with continuous variables. Discrete exploration is controlled through a Boltzmann softmax function with an inverse temperature β parameter. In parallel, a Gaussian exploration is applied to the continuous action parameters. We apply a meta-learning algorithm based on the comparison between variations of short-term and long-term reward running averages to simultaneously tune β and the width of the Gaussian distribution from which continuous action parameters are drawn. We first show that this algorithm reaches state-of-the-art performance in the non-stationary multi-armed bandit paradigm, while also being generalizable to continuous actions and multi-step tasks. We then apply it to a simulated human-robot interaction task, and show that it outperforms continuous parameterized RL both without active exploration and with active exploration based on uncertainty variations measured by a Kalman-Q-learning algorithm. |
A Zlatintsi, I Rodomagoulakis, V Pitsikalis, P Koutras, N Kardaris, X Papageorgiou, C Tzafestas, P Maragos Social Human-Robot Interaction for the Elderly: Two Real-life Use Cases, Conference ACM/IEEE International Conference on Human-Robot Interaction (HRI), Vienna, Austria, 2017. Abstract | BibTeX | Links: [PDF] @conference{ZRP+17, title = {Social Human-Robot Interaction for the Elderly: Two Real-life Use Cases,}, author = {A Zlatintsi and I Rodomagoulakis and V Pitsikalis and P Koutras and N Kardaris and X Papageorgiou and C Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Zlatintsi+_SocialHRIforTheElderly_HRI-17.pdf}, year = {2017}, date = {2017-03-01}, booktitle = {ACM/IEEE International Conference on Human-Robot Interaction (HRI)}, address = {Vienna, Austria}, abstract = {We explore new aspects on assistive living via smart social human-robot interaction (HRI) involving automatic recognition of multimodal gestures and speech in a natural interface, providing social features in HRI. We discuss a whole framework of resources, including datasets and tools, briefly shown in two real-life use cases for elderly subjects: a multimodal interface of an assistive robotic rollator and an assistive bathing robot. We discuss these domain specific tasks, and open source tools, which can be used to build such HRI systems, as well as indicative results. Sharing such resources can open new perspectives in assistive HRI.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We explore new aspects on assistive living via smart social human-robot interaction (HRI) involving automatic recognition of multimodal gestures and speech in a natural interface, providing social features in HRI. We discuss a whole framework of resources, including datasets and tools, briefly shown in two real-life use cases for elderly subjects: a multimodal interface of an assistive robotic rollator and an assistive bathing robot. We discuss these domain specific tasks, and open source tools, which can be used to build such HRI systems, as well as indicative results. Sharing such resources can open new perspectives in assistive HRI. |
X S Papageorgiou, G Chalvatzaki, A Dometios, C S Tzafestas, P Maragos Intelligent Assistive Robotic Systems for the Elderly: Two Real-life Use Cases Conference C_PETRA, ACM, Island of Rhodes, Greece, 2017, ISBN: 978-1-4503-5227-7. Abstract | BibTeX | Links: [PDF] @conference{PETRA2017, title = {Intelligent Assistive Robotic Systems for the Elderly: Two Real-life Use Cases}, author = {X S Papageorgiou and G Chalvatzaki and A Dometios and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/PETRA2017.pdf}, doi = {10.1145/3056540.3076184}, isbn = {978-1-4503-5227-7}, year = {2017}, date = {2017-01-01}, booktitle = {C_PETRA}, pages = {360--365}, publisher = {ACM}, address = {Island of Rhodes, Greece}, abstract = {Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. When designing a user-friendly assistive device for mobility constrained people, it is important to take into account the diverse spectrum of disabilities, which results into completely different needs to be covered by the device for each specific user. An intelligent adaptive behavior is necessary for the deployment of such systems. Also, elderly people have particular needs in specific case of performing bathing activities, since these tasks require body flexibility. We explore new aspects of assistive living via intelligent assistive robotic systems involving human robot interaction in a natural interface. Our aim is to build assistive robotic systems, in order to increase the independence and safety of these procedures. Towards this end, the expertise of professional carers for walking or bathing sequences and appropriate motions have to be adopted, in order to achieve natural, physical human - robot interaction. Our goal is to report current research work related to the development of two real-life use cases of intelligent robotic systems for elderly aiming to provide user-adaptive and context-aware assistance.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. When designing a user-friendly assistive device for mobility constrained people, it is important to take into account the diverse spectrum of disabilities, which results into completely different needs to be covered by the device for each specific user. An intelligent adaptive behavior is necessary for the deployment of such systems. Also, elderly people have particular needs in specific case of performing bathing activities, since these tasks require body flexibility. We explore new aspects of assistive living via intelligent assistive robotic systems involving human robot interaction in a natural interface. Our aim is to build assistive robotic systems, in order to increase the independence and safety of these procedures. Towards this end, the expertise of professional carers for walking or bathing sequences and appropriate motions have to be adopted, in order to achieve natural, physical human - robot interaction. Our goal is to report current research work related to the development of two real-life use cases of intelligent robotic systems for elderly aiming to provide user-adaptive and context-aware assistance. |
G Karamanolakis, E Iosif, A Zlatintsi, A Pikrakis, A Potamianos Audio-based Distributional Semantic Models for Music Auto-tagging and Similarity Measurement Conference Proc. MultiLearn2017: Multimodal Processing, Modeling and Learning for Human-Computer/Robot Interaction Workshop, in conjuction with European Signal Processing Conference, Kos, Greece, 2017. Abstract | BibTeX | Links: [PDF] @conference{KIZ+17, title = {Audio-based Distributional Semantic Models for Music Auto-tagging and Similarity Measurement}, author = {G Karamanolakis and E Iosif and A Zlatintsi and A Pikrakis and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/Karamanolakis+_MultiLearn-17_ML7.pdf}, year = {2017}, date = {2017-01-01}, booktitle = {Proc. MultiLearn2017: Multimodal Processing, Modeling and Learning for Human-Computer/Robot Interaction Workshop, in conjuction with European Signal Processing Conference}, address = {Kos, Greece}, abstract = {The recent development of Audio-based Distributional Semantic Models (ADSMs) enables the computation of audio and lexical vector representations in a joint acoustic-semantic space. In this work, these joint representations are applied to the problem of automatic tag generation. The predicted tags together with their corresponding acoustic representation are exploited for the construction of acoustic-semantic clip embeddings. The proposed algorithms are evaluated on the task of similarity measurement between music clips. Acoustic-semantic models are shown to outperform the state-of-the-art for this task and produce high quality tags for audio/music clips.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The recent development of Audio-based Distributional Semantic Models (ADSMs) enables the computation of audio and lexical vector representations in a joint acoustic-semantic space. In this work, these joint representations are applied to the problem of automatic tag generation. The predicted tags together with their corresponding acoustic representation are exploited for the construction of acoustic-semantic clip embeddings. The proposed algorithms are evaluated on the task of similarity measurement between music clips. Acoustic-semantic models are shown to outperform the state-of-the-art for this task and produce high quality tags for audio/music clips. |
Vasileios Charisopoulos, Petros Maragos Morphological perceptrons: Geometry and training algorithms Conference Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 10225 LNCS , 2017, ISSN: 16113349. Abstract | BibTeX | Links: [PDF] @conference{346, title = {Morphological perceptrons: Geometry and training algorithms}, author = { Vasileios Charisopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/CharisopoulosMaragos_MorphologPerceptGeometryTrainAlgor_ISMM17.pdf}, doi = {10.1007/978-3-319-57240-6_1}, issn = {16113349}, year = {2017}, date = {2017-01-01}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {10225 LNCS}, pages = {3--15}, abstract = {Neural networks have traditionally relied on mostly linear models, such as the multiply-accumulate architecture of a linear perceptron that remains the dominant paradigm of neuronal computation. However, from a biological standpoint, neuron activity may as well involve inherently nonlinear and competitive operations. Mathematical morphology and minimax algebra provide the necessary background in the study of neural networks made up from these kinds of nonlinear units. This paper deals with such a model, called the morphological perceptron. We study some of its geometrical properties and introduce a training algorithm for binary classification. We point out the relationship between morphological classifiers and the recent field of tropical geometry, which enables us to obtain a precise bound on the number of linear regions of the maxout unit, a popular choice for deep neural networks introduced recently. Finally, we present some relevant numerical results.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Neural networks have traditionally relied on mostly linear models, such as the multiply-accumulate architecture of a linear perceptron that remains the dominant paradigm of neuronal computation. However, from a biological standpoint, neuron activity may as well involve inherently nonlinear and competitive operations. Mathematical morphology and minimax algebra provide the necessary background in the study of neural networks made up from these kinds of nonlinear units. This paper deals with such a model, called the morphological perceptron. We study some of its geometrical properties and introduce a training algorithm for binary classification. We point out the relationship between morphological classifiers and the recent field of tropical geometry, which enables us to obtain a precise bound on the number of linear regions of the maxout unit, a popular choice for deep neural networks introduced recently. Finally, we present some relevant numerical results. |
Mehdi Khamassi, George Velentzas, Theodore Tsitsimis, Costas Tzafestas Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task Conference Proceedings - 2017 1st IEEE International Conference on Robotic Computing, IRC 2017, 2017, ISBN: 9781509067237. Abstract | BibTeX | Links: [PDF] @conference{337, title = {Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task}, author = { Mehdi Khamassi and George Velentzas and Theodore Tsitsimis and Costas Tzafestas}, url = {http://ieeexplore.ieee.org/document/7926511/%0Ahttp://ieeexplore.ieee.org/ielx7/7925476/7926477/07926511.pdf?tp=&arnumber=7926511&isnumber=7926477}, doi = {10.1109/IRC.2017.33}, isbn = {9781509067237}, year = {2017}, date = {2017-01-01}, booktitle = {Proceedings - 2017 1st IEEE International Conference on Robotic Computing, IRC 2017}, pages = {28--35}, abstract = {textcopyright 2017 IEEE. Online model-free reinforcement learning (RL) methods with continuous actions are playing a prominent role when dealing with real-world applications such as Robotics. However, when confronted to non-stationary environments, these methods crucially rely on an exploration-exploitation trade-off which is rarely dynamically and automatically adjusted to changes in the environment. Here we propose an active exploration algorithm for RL in structured (parameterized) continuous action space. This framework deals with a set of discrete actions, each of which is parameterized with continuous variables. Discrete exploration is controlled through a Boltzmann softmax function with an inverse temperature $beta$ parameter. In parallel, a Gaussian exploration is applied to the continuous action parameters. We apply a meta-learning algorithm based on the comparison between variations of short-Term and long-Term reward running averages to simultaneously tune $beta$ and the width of the Gaussian distribution from which continuous action parameters are drawn. We first show that this algorithm reaches state-of-The-Art performance in the non-stationary multi-Armed bandit paradigm, while also being generalizable to continuous actions and multi-step tasks. We then apply it to a simulated human-robot interaction task, and show that it outperforms continuous parameterized RL both without active exploration and with active exploration based on uncertainty variations measured by a Kalman-Q-learning algorithm.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } textcopyright 2017 IEEE. Online model-free reinforcement learning (RL) methods with continuous actions are playing a prominent role when dealing with real-world applications such as Robotics. However, when confronted to non-stationary environments, these methods crucially rely on an exploration-exploitation trade-off which is rarely dynamically and automatically adjusted to changes in the environment. Here we propose an active exploration algorithm for RL in structured (parameterized) continuous action space. This framework deals with a set of discrete actions, each of which is parameterized with continuous variables. Discrete exploration is controlled through a Boltzmann softmax function with an inverse temperature $beta$ parameter. In parallel, a Gaussian exploration is applied to the continuous action parameters. We apply a meta-learning algorithm based on the comparison between variations of short-Term and long-Term reward running averages to simultaneously tune $beta$ and the width of the Gaussian distribution from which continuous action parameters are drawn. We first show that this algorithm reaches state-of-The-Art performance in the non-stationary multi-Armed bandit paradigm, while also being generalizable to continuous actions and multi-step tasks. We then apply it to a simulated human-robot interaction task, and show that it outperforms continuous parameterized RL both without active exploration and with active exploration based on uncertainty variations measured by a Kalman-Q-learning algorithm. |
Athanasios Katsamanis, Vassilis Pitsikalis, Stavros Theodorakis, Petros Maragos Multimodal Gesture Recognition Book Chapter The Handbook of Multimodal-Multisensor Interfaces: Foundations, User Modeling, and Common Modality Combinations - Volume 1, pp. 449–487, Association for Computing Machinery and Morgan & Claypool, 2017, ISBN: 9781970001679. @inbook{10.1145/3015783.3015796, title = {Multimodal Gesture Recognition}, author = {Athanasios Katsamanis and Vassilis Pitsikalis and Stavros Theodorakis and Petros Maragos}, url = {https://doi.org/10.1145/3015783.3015796}, isbn = {9781970001679}, year = {2017}, date = {2017-01-01}, booktitle = {The Handbook of Multimodal-Multisensor Interfaces: Foundations, User Modeling, and Common Modality Combinations - Volume 1}, pages = {449–487}, publisher = {Association for Computing Machinery and Morgan & Claypool}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
2016 |
John N Karigiannis, Costas S Tzafestas Model-free learning on robot kinematic chains using a nested multi-agent topology Journal Article Journal of Experimental and Theoretical Artificial Intelligence, 28 (6), pp. 913–954, 2016, ISSN: 13623079. @article{321, title = {Model-free learning on robot kinematic chains using a nested multi-agent topology}, author = {John N Karigiannis and Costas S Tzafestas}, doi = {10.1080/0952813X.2015.1042923}, issn = {13623079}, year = {2016}, date = {2016-01-01}, journal = {Journal of Experimental and Theoretical Artificial Intelligence}, volume = {28}, number = {6}, pages = {913--954}, abstract = {This paper proposes a model-free learning scheme for the developmental acquisition of robot kinematic control and dexterous manipulation skills. The approach is based on a nested-hierarchical multi-agent architecture that intuitively encapsulates the topology of robot kinematic chains, where the activity of each independent degree-of-freedom (DOF) is finally mapped onto a distinct agent. Each one of those agents progressively evolves a local kinematic control strategy in a game-theoretic sense, that is, based on a partial (local) view of the whole system topology, which is incrementally updated through a recursive communication process according to the nested-hierarchical topology. Learning is thus approached not through demonstration and training but through an autonomous self-exploration process. A fuzzy reinforcement learning scheme is employed within each agent to enable efficient exploration in a continuous state–action domain. This paper constitutes in fact a proof of concept, demonstrating that glo...}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper proposes a model-free learning scheme for the developmental acquisition of robot kinematic control and dexterous manipulation skills. The approach is based on a nested-hierarchical multi-agent architecture that intuitively encapsulates the topology of robot kinematic chains, where the activity of each independent degree-of-freedom (DOF) is finally mapped onto a distinct agent. Each one of those agents progressively evolves a local kinematic control strategy in a game-theoretic sense, that is, based on a partial (local) view of the whole system topology, which is incrementally updated through a recursive communication process according to the nested-hierarchical topology. Learning is thus approached not through demonstration and training but through an autonomous self-exploration process. A fuzzy reinforcement learning scheme is employed within each agent to enable efficient exploration in a continuous state–action domain. This paper constitutes in fact a proof of concept, demonstrating that glo... |
N Kardaris, I Rodomagoulakis, V Pitsikalis, A Arvanitakis, P Maragos A Platform for Building New Human-Computer Interface Systems that Support Online Automatic Recognition of Audio-Gestural Commands Conference Proceedings of the 2017 ACM on Multimedia Conference, Amsterdam, The Netherlands, 2016. Abstract | BibTeX | Links: [PDF] @conference{acm_kardaris_2016, title = {A Platform for Building New Human-Computer Interface Systems that Support Online Automatic Recognition of Audio-Gestural Commands}, author = {N Kardaris and I Rodomagoulakis and V Pitsikalis and A Arvanitakis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/KPMM_TemporalOrderForVisualWords-ActionRecognition_ICIP2016.pdf}, doi = {10.1145/2964284.2973794}, year = {2016}, date = {2016-10-01}, booktitle = {Proceedings of the 2017 ACM on Multimedia Conference}, address = {Amsterdam, The Netherlands}, abstract = {We introduce a new framework to build human-computer interfaces that provide online automatic audio-gestural command recognition. The overall system allows the construction of a multimodal interface that recognizes user input expressed naturally as audio commands and manual gestures, captured by sensors such as Kinect. It includes a component for acquiring multimodal user data which is used as input to a module responsible for training audio-gestural models. These models are employed by the automatic recognition component, which supports online recognition of audiovisual modalities. The overall framework is exemplified by a working system use case. This demonstrates the potential of the overall software platform, which can be employed to build other new human-computer interaction systems. Moreover, users may populate libraries of models and/or data that can be shared in the network. In this way users may reuse or extend existing systems.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We introduce a new framework to build human-computer interfaces that provide online automatic audio-gestural command recognition. The overall system allows the construction of a multimodal interface that recognizes user input expressed naturally as audio commands and manual gestures, captured by sensors such as Kinect. It includes a component for acquiring multimodal user data which is used as input to a module responsible for training audio-gestural models. These models are employed by the automatic recognition component, which supports online recognition of audiovisual modalities. The overall framework is exemplified by a working system use case. This demonstrates the potential of the overall software platform, which can be employed to build other new human-computer interaction systems. Moreover, users may populate libraries of models and/or data that can be shared in the network. In this way users may reuse or extend existing systems. |
A Guler, N Kardaris, S Chandra, V Pitsikalis, C Werner, K Hauer, C Tzafestas, P Maragos, I Kokkinos Human Joint Angle Estimation and Gesture Recognition for Assistive Robotic Vision Conference Proc. of Workshop on Assistive Computer Vision and Robotics, European Conf. on Computer Vision (ECCV-2016), Amsterdam, The Netherlands, 2016. Abstract | BibTeX | Links: [PDF] @conference{guler_joint_gesture_2016, title = {Human Joint Angle Estimation and Gesture Recognition for Assistive Robotic Vision}, author = {A Guler and N Kardaris and S Chandra and V Pitsikalis and C Werner and K Hauer and C Tzafestas and P Maragos and I Kokkinos}, url = {http://robotics.ntua.gr/wp-content/publications/PoseEstimGestureRecogn-AssistRobotVision_ACVR2016-ECCV-Workshop.pdf}, doi = {10.1007/978-3-319-48881-3_29}, year = {2016}, date = {2016-10-01}, booktitle = {Proc. of Workshop on Assistive Computer Vision and Robotics, European Conf. on Computer Vision (ECCV-2016)}, address = {Amsterdam, The Netherlands}, abstract = {We explore new directions for automatic human gesture recognition and human joint angle estimation as applied for human-robot interaction in the context of an actual challenging task of assistive living for real-life elderly subjects. Our contributions include state-of-the-art approaches for both low- and mid-level vision, as well as for higher level action and gesture recognition. The first direction investigates a deep learning based framework for the challenging task of human joint angle estimation on noisy real world RGB-D images. The second direction includes the employment of dense trajectory features for online processing of videos for automatic gesture recognition with real-time performance. Our approaches are evaluated both qualitative and quantitatively on a newly acquired dataset that is constructed on a challenging real-life scenario on assistive living for elderly subjects.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We explore new directions for automatic human gesture recognition and human joint angle estimation as applied for human-robot interaction in the context of an actual challenging task of assistive living for real-life elderly subjects. Our contributions include state-of-the-art approaches for both low- and mid-level vision, as well as for higher level action and gesture recognition. The first direction investigates a deep learning based framework for the challenging task of human joint angle estimation on noisy real world RGB-D images. The second direction includes the employment of dense trajectory features for online processing of videos for automatic gesture recognition with real-time performance. Our approaches are evaluated both qualitative and quantitatively on a newly acquired dataset that is constructed on a challenging real-life scenario on assistive living for elderly subjects. |
N Kardaris, V Pitsikalis, E Mavroudi, P Maragos Introducing Temporal Order of Dominant Visual Word Sub-Sequences for Human Action Recognition Conference Proc. of IEEE Int'l Conf. on Image Processing (ICIP-2016), Phoenix, AZ, USA, 2016. Abstract | BibTeX | Links: [PDF] @conference{acm_kardaris_2016b, title = {Introducing Temporal Order of Dominant Visual Word Sub-Sequences for Human Action Recognition}, author = {N Kardaris and V Pitsikalis and E Mavroudi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/KRPAM_BuildingMultimodalInterfaces_ACM-MM2016.pdf}, doi = {10.1109/ICIP.2016.7532922}, year = {2016}, date = {2016-09-01}, booktitle = {Proc. of IEEE Int'l Conf. on Image Processing (ICIP-2016)}, address = {Phoenix, AZ, USA}, abstract = {We present a novel video representation for human action recognition by considering temporal sequences of visual words. Based on state-of-the-art dense trajectories, we introduce temporal bundles of dominant, that is most frequent, visual words. These are employed to construct a complementary action representation of ordered dominant visual word sequences, that additionally incorporates fine-grained temporal information. We exploit the introduced temporal information by applying local sub-sequence alignment that quantifies the similarity between sequences. This facilitates the fusion of our representation with the bag-of-visual-words (BoVW) representation. Our approach incorporates sequential temporal structure and results in a low-dimensional representation compared to the BoVW, while still yielding a descent result when combined with it. Experiments on the KTH, Hollywood2 and the challenging HMDB51 datasets show that the proposed framework is complementary to the BoVW representation, which discards temporal order}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We present a novel video representation for human action recognition by considering temporal sequences of visual words. Based on state-of-the-art dense trajectories, we introduce temporal bundles of dominant, that is most frequent, visual words. These are employed to construct a complementary action representation of ordered dominant visual word sequences, that additionally incorporates fine-grained temporal information. We exploit the introduced temporal information by applying local sub-sequence alignment that quantifies the similarity between sequences. This facilitates the fusion of our representation with the bag-of-visual-words (BoVW) representation. Our approach incorporates sequential temporal structure and results in a low-dimensional representation compared to the BoVW, while still yielding a descent result when combined with it. Experiments on the KTH, Hollywood2 and the challenging HMDB51 datasets show that the proposed framework is complementary to the BoVW representation, which discards temporal order |
G Panagiotaropoulou, P Koutras, A Katsamanis, P Maragos, A Zlatintsi, A Protopapas, E Karavasilis, N Smyrnis fMRI-based Perceptual Validation of a computational Model for Visual and Auditory Saliency in Videos Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Phoenix, AZ, USA, 2016. Abstract | BibTeX | Links: [PDF] @conference{PKK+16, title = {fMRI-based Perceptual Validation of a computational Model for Visual and Auditory Saliency in Videos}, author = {G Panagiotaropoulou and P Koutras and A Katsamanis and P Maragos and A Zlatintsi and A Protopapas and E Karavasilis and N Smyrnis}, url = {http://robotics.ntua.gr/wp-content/publications/PanagiotaropoulouEtAl_fMRI-Validation-CompAVsaliencyVideos_ICIP2016.pdf}, year = {2016}, date = {2016-09-01}, booktitle = {Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing}, address = {Phoenix, AZ, USA}, abstract = {In this study, we make use of brain activation data to investigate the perceptual plausibility of a visual and an auditory model for visual and auditory saliency in video processing. These models have already been successfully employed in a number of applications. In addition, we experiment with parameters, modifications and suitable fusion schemes. As part of this work, fMRI data from complex video stimuli were collected, on which we base our analysis and results. The core part of the analysis involves the use of well-established methods for the manipulation of fMRI data and the examination of variability across brain responses of different individuals. Our results indicate a success in confirming the value of these saliency models in terms of perceptual plausibility.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this study, we make use of brain activation data to investigate the perceptual plausibility of a visual and an auditory model for visual and auditory saliency in video processing. These models have already been successfully employed in a number of applications. In addition, we experiment with parameters, modifications and suitable fusion schemes. As part of this work, fMRI data from complex video stimuli were collected, on which we base our analysis and results. The core part of the analysis involves the use of well-established methods for the manipulation of fMRI data and the examination of variability across brain responses of different individuals. Our results indicate a success in confirming the value of these saliency models in terms of perceptual plausibility. |
G Chalvatzaki, X S Papageorgiou, C Werner, K Hauer, C S Tzafestas, P Maragos Experimental comparison of human gait tracking algorithms: Towards a context-aware mobility assistance robotic walker Conference Mediterranean Conference on Control and Automation (MED), 2016. Abstract | BibTeX | Links: [PDF] @conference{CPWHTM16, title = {Experimental comparison of human gait tracking algorithms: Towards a context-aware mobility assistance robotic walker}, author = {G Chalvatzaki and X S Papageorgiou and C Werner and K Hauer and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/CPWHTM16.pdf}, doi = {10.1109/MED.2016.7535962}, year = {2016}, date = {2016-06-01}, booktitle = {Mediterranean Conference on Control and Automation (MED)}, pages = {719-724}, abstract = {Towards a mobility assistance robot for the elderly, it is essential to develop a robust and accurate gait tracking system. Various pathologies cause mobility inabilities to the aged population, leading to different gait patterns and walking speed. In this work, we present the experimental comparison of two user leg tracking systems of a robotic assistance walker, using data collected by a laser range sensor. The first one is a Kalman Filter tracking system, while the second one proposes the use of Particle Filters. The tracking systems provide the positions and velocities of the user's legs, which are used as observations into an HMM-based gait phases recognition system. The spatiotemporal results of the HMM framework are employed for computing parameters that characterize the human motion, which subsequently can be used to assess and distinguish between possible motion disabilities. For the experimental comparison, we are using real data collected from an ensemble of different elderly persons with a number of pathologies, and ground truth data from a GaitRite System. The results presented in this work, demonstrate the applicability of the tracking systems in real test cases.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Towards a mobility assistance robot for the elderly, it is essential to develop a robust and accurate gait tracking system. Various pathologies cause mobility inabilities to the aged population, leading to different gait patterns and walking speed. In this work, we present the experimental comparison of two user leg tracking systems of a robotic assistance walker, using data collected by a laser range sensor. The first one is a Kalman Filter tracking system, while the second one proposes the use of Particle Filters. The tracking systems provide the positions and velocities of the user's legs, which are used as observations into an HMM-based gait phases recognition system. The spatiotemporal results of the HMM framework are employed for computing parameters that characterize the human motion, which subsequently can be used to assess and distinguish between possible motion disabilities. For the experimental comparison, we are using real data collected from an ensemble of different elderly persons with a number of pathologies, and ground truth data from a GaitRite System. The results presented in this work, demonstrate the applicability of the tracking systems in real test cases. |
A C Dometios, X S Papageorgiou, C S Tzafestas, P Vartholomeos Towards ICT-supported Bath Robots: Control Architecture Description and Localized Perception of User for Robot Motion Planning Conference Mediterranean Conference on Control and Automation (MED), Athens, Greece, 2016. Abstract | BibTeX | Links: [PDF] @conference{DPTV16, title = {Towards ICT-supported Bath Robots: Control Architecture Description and Localized Perception of User for Robot Motion Planning}, author = {A C Dometios and X S Papageorgiou and C S Tzafestas and P Vartholomeos}, url = {http://robotics.ntua.gr/wp-content/publications/Dometios16_Control_Architecture_Description_Motion_Planning.pdf}, doi = {10.1109/MED.2016.7535954}, year = {2016}, date = {2016-06-01}, booktitle = {Mediterranean Conference on Control and Automation (MED)}, pages = {713-718}, address = {Athens, Greece}, abstract = {Τhis paper describes the general control architecture and the basic implementation concepts of a bath service robotic system. The goal of this system is to support and enhance elderly’s mobility, manipulation and force exertion abilities and assist them in successfully, safely and independently completing the entire sequence of showering and drying tasks, such as properly washing their back and lower limbs. This service robotic system is based on soft-robotic arms which,together with advanced human-robot force/compliance control will form the basis for a safe physical human-robot interaction that complies with the most up-to-date safety standards. In this paper an overview of the bath robotic system components is presented, and the basic modules that contribute to the overall control architecture of the system are described. Moreover, this paper proposed an algorithm that performs efficient processing of feedback data provided by a depth sensor. This algorithm supports local shape perception and geometric characterization of user body parts and will form the basis for further implementation of surface reconstruction and robot motion planning algorithms.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Τhis paper describes the general control architecture and the basic implementation concepts of a bath service robotic system. The goal of this system is to support and enhance elderly’s mobility, manipulation and force exertion abilities and assist them in successfully, safely and independently completing the entire sequence of showering and drying tasks, such as properly washing their back and lower limbs. This service robotic system is based on soft-robotic arms which,together with advanced human-robot force/compliance control will form the basis for a safe physical human-robot interaction that complies with the most up-to-date safety standards. In this paper an overview of the bath robotic system components is presented, and the basic modules that contribute to the overall control architecture of the system are described. Moreover, this paper proposed an algorithm that performs efficient processing of feedback data provided by a depth sensor. This algorithm supports local shape perception and geometric characterization of user body parts and will form the basis for further implementation of surface reconstruction and robot motion planning algorithms. |
X S Papageorgiou, G Chalvatzaki, K N Lianos, C Werner, K Hauer, C S Tzafestas, P Maragos Experimental validation of human pathological gait analysis for an assisted living intelligent robotic walker Conference C_BIOROB, 2016. Abstract | BibTeX | Links: [PDF] @conference{BIOROB2016, title = {Experimental validation of human pathological gait analysis for an assisted living intelligent robotic walker}, author = {X S Papageorgiou and G Chalvatzaki and K N Lianos and C Werner and K Hauer and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/BIOROB2016.pdf}, doi = {10.1109/BIOROB.2016.7523776}, year = {2016}, date = {2016-06-01}, booktitle = {C_BIOROB}, pages = {1086-1091}, abstract = {A robust and effective gait analysis functionality is an essential characteristic for an assistance mobility robot dealing with elderly persons. The aforementioned functionality is crucial for dealing with mobility disabilities which are widespread in these parts of the population. In this work we present experimental validation of our in house developed system. We are using real data, collected from an ensemble of different elderly persons with a number of pathologies, and we present a validation study by using a GaitRite System. Our system, following the standard literature conventions, characterizes the human motion with a set of parameters which subsequently can be used to assess and distinguish between possible motion disabilities, using a laser range finder as its main sensor. The initial results, presented in this work, demonstrate the applicability of our framework in real test cases. Regarding such frameworks, a crucial technical question is the necessary complexity of the overall tracking system. To answer this question, we compare two approaches with different complexity levels. The first is a static rule based system acting on filtered laser data, while the second system utilizes a Hidden Markov Model for gait cycle estimation, and extraction of the gait parameters. The results demonstrate that the added complexity of the HMM system is necessary for improving the accuracy and efficacy of the system.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } A robust and effective gait analysis functionality is an essential characteristic for an assistance mobility robot dealing with elderly persons. The aforementioned functionality is crucial for dealing with mobility disabilities which are widespread in these parts of the population. In this work we present experimental validation of our in house developed system. We are using real data, collected from an ensemble of different elderly persons with a number of pathologies, and we present a validation study by using a GaitRite System. Our system, following the standard literature conventions, characterizes the human motion with a set of parameters which subsequently can be used to assess and distinguish between possible motion disabilities, using a laser range finder as its main sensor. The initial results, presented in this work, demonstrate the applicability of our framework in real test cases. Regarding such frameworks, a crucial technical question is the necessary complexity of the overall tracking system. To answer this question, we compare two approaches with different complexity levels. The first is a static rule based system acting on filtered laser data, while the second system utilizes a Hidden Markov Model for gait cycle estimation, and extraction of the gait parameters. The results demonstrate that the added complexity of the HMM system is necessary for improving the accuracy and efficacy of the system. |
A Tsiami, A Katsamanis, P Maragos, A Vatakis Towards a behaviorally-validated computational audiovisual saliency model Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing, Shanghai, China, 2016. Abstract | BibTeX | Links: [PDF] @conference{7472197, title = {Towards a behaviorally-validated computational audiovisual saliency model}, author = {A Tsiami and A Katsamanis and P Maragos and A Vatakis}, url = {http://robotics.ntua.gr/wp-content/publications/TKMV_BehaviorComputAVSaliencyModel_ICASSP2016.pdf}, doi = {10.1109/ICASSP.2016.7472197}, year = {2016}, date = {2016-03-01}, booktitle = {Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing}, pages = {2847-2851}, address = {Shanghai, China}, abstract = {Computational saliency models aim at predicting, in a bottom-up fashion, where human attention is drawn in the presented (visual, auditory or audiovisual) scene and have been proven useful in applications like robotic navigation, image compression and movie summarization. Despite the fact that well-established auditory and visual saliency models have been validated in behavioral experiments, e.g., by means of eye-tracking, there is no established computational audiovisual saliency model validated in the same way. In this work, building on biologically-inspired models of visual and auditory saliency, we present a joint audiovisual saliency model and introduce the validation approach we follow to show that it is compatible with recent findings of psychology and neuroscience regarding multimodal integration and attention. In this direction, we initially focus on the "pip and pop" effect which has been observed in behavioral experiments and indicates that visual search in sequences of cluttered images can be significantly aided by properly timed non-spatial auditory signals presented alongside the target visual stimuli.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Computational saliency models aim at predicting, in a bottom-up fashion, where human attention is drawn in the presented (visual, auditory or audiovisual) scene and have been proven useful in applications like robotic navigation, image compression and movie summarization. Despite the fact that well-established auditory and visual saliency models have been validated in behavioral experiments, e.g., by means of eye-tracking, there is no established computational audiovisual saliency model validated in the same way. In this work, building on biologically-inspired models of visual and auditory saliency, we present a joint audiovisual saliency model and introduce the validation approach we follow to show that it is compatible with recent findings of psychology and neuroscience regarding multimodal integration and attention. In this direction, we initially focus on the "pip and pop" effect which has been observed in behavioral experiments and indicates that visual search in sequences of cluttered images can be significantly aided by properly timed non-spatial auditory signals presented alongside the target visual stimuli. |
G Karamanolakis, E Iosif, A Zlatintsi, A Pikrakis, A Potamianos Audio-Based Distributional Representations of Meaning Using a Fusion of Feature Encodings Conference 2016. Abstract | BibTeX | Links: [Webpage] @conference{KIZ+16, title = {Audio-Based Distributional Representations of Meaning Using a Fusion of Feature Encodings}, author = {G Karamanolakis and E Iosif and A Zlatintsi and A Pikrakis and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/Karamanolakis+_Interspeech16.PDF}, year = {2016}, date = {2016-01-01}, abstract = {Recently a “Bag-of-Audio-Words” approach was proposed [1] for the combination of lexical features with audio clips in a multimodal semantic representation, i.e., an Audio Distributional Semantic Model (ADSM). An important step towards the creation of ADSMs is the estimation of the semantic distance between clips in the acoustic space, which is especially challenging given the diversity of audio collections. In this work, we investigate the use of different feature encodings in order to address this challenge following a two-step approach. First, an audio clip is categorized with respect to three classes, namely, music, speech and other. Next, the feature encodings are fused according to the posterior probabilities estimated in the previous step. Using a collection of audio clips annotated with tags we derive a mapping between words and audio clips. Based on this mapping and the proposed audio semantic distance, we construct an ADSM model in order to compute the distance between words (lexical semantic similarity task). The proposed model is shown to significantly outperform (23.6% relative improvement in correlation coefficient) the state-of-the-art results reported in the literature.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Recently a “Bag-of-Audio-Words” approach was proposed [1] for the combination of lexical features with audio clips in a multimodal semantic representation, i.e., an Audio Distributional Semantic Model (ADSM). An important step towards the creation of ADSMs is the estimation of the semantic distance between clips in the acoustic space, which is especially challenging given the diversity of audio collections. In this work, we investigate the use of different feature encodings in order to address this challenge following a two-step approach. First, an audio clip is categorized with respect to three classes, namely, music, speech and other. Next, the feature encodings are fused according to the posterior probabilities estimated in the previous step. Using a collection of audio clips annotated with tags we derive a mapping between words and audio clips. Based on this mapping and the proposed audio semantic distance, we construct an ADSM model in order to compute the distance between words (lexical semantic similarity task). The proposed model is shown to significantly outperform (23.6% relative improvement in correlation coefficient) the state-of-the-art results reported in the literature. |
Petros Maragos, Vassilis Pitsikalis, Athanasios Katsamanis, George Pavlakos, Stavros Theodorakis On Shape Recognition and Language Conference Perspectives in Shape Analysis, Springer International Publishing, Cham, 2016, ISBN: 978-3-319-24726-7. @conference{10.1007/978-3-319-24726-7_15, title = {On Shape Recognition and Language}, author = {Petros Maragos and Vassilis Pitsikalis and Athanasios Katsamanis and George Pavlakos and Stavros Theodorakis}, editor = {Michael Breuß and Alfred Bruckstein and Petros Maragos and Stefanie Wuhrer}, isbn = {978-3-319-24726-7}, year = {2016}, date = {2016-01-01}, booktitle = {Perspectives in Shape Analysis}, pages = {321--344}, publisher = {Springer International Publishing}, address = {Cham}, abstract = {Shapes shapeconvey meaning. Language is efficient in expressing and structuring meaning. The main thesis of this chapter is that by integrating shape with linguistic information shape recognition can be improved in performance. It broadens the concept of shape to visual shapes that include both geometric and optical information and explores ways that additional linguistic information may help with shape recognition. Towards this goal, it briefly describes some shape categories which have the potential of better recognition via language, with emphasis on gestures and moving shapes of sign language, as well as on cross-modal relations between vision and language in videos. It also draws inspiration from psychological studies that explore connections between gestures and human languages. Afterwards, it focuses on the broad class of multimodal gestures that combine spatio-temporal visual shapes with audio information. In this area, an approach is reviewed that significantly improves multimodal gesture recognition by fusing 3D shape information from motion-position of gesturing hands/arms and spatio-temporal handshapes in color and depth visual channels with audio information in the form of acoustically recognized sequences of gesture words.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Shapes shapeconvey meaning. Language is efficient in expressing and structuring meaning. The main thesis of this chapter is that by integrating shape with linguistic information shape recognition can be improved in performance. It broadens the concept of shape to visual shapes that include both geometric and optical information and explores ways that additional linguistic information may help with shape recognition. Towards this goal, it briefly describes some shape categories which have the potential of better recognition via language, with emphasis on gestures and moving shapes of sign language, as well as on cross-modal relations between vision and language in videos. It also draws inspiration from psychological studies that explore connections between gestures and human languages. Afterwards, it focuses on the broad class of multimodal gestures that combine spatio-temporal visual shapes with audio information. In this area, an approach is reviewed that significantly improves multimodal gesture recognition by fusing 3D shape information from motion-position of gesturing hands/arms and spatio-temporal handshapes in color and depth visual channels with audio information in the form of acoustically recognized sequences of gesture words. |
Christos G. Bampis, Petros Maragos, Alan C. Bovik Projective non-negative matrix factorization for unsupervised graph clustering Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. @conference{328, title = {Projective non-negative matrix factorization for unsupervised graph clustering}, author = { Christos G. Bampis and Petros Maragos and Alan C. Bovik}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BampisMaragosBovik_GRPNMF-UnsupervisGraphCluster_ICIP2016.pdf}, doi = {10.1109/ICIP.2016.7532559}, issn = {15224880}, year = {2016}, date = {2016-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2016-August}, pages = {1255--1258}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgia Panagiotaropoulou, Petros Koutras, Athanasios Katsamanis, Petros Maragos, Athanasia Zlatintsi, Athanassios Protopapas, Efstratios Karavasilis, Nikolaos Smyrnis FMRI-based perceptual validation of a computational model for visual and auditory saliency in videos Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{332, title = {FMRI-based perceptual validation of a computational model for visual and auditory saliency in videos}, author = { Georgia Panagiotaropoulou and Petros Koutras and Athanasios Katsamanis and Petros Maragos and Athanasia Zlatintsi and Athanassios Protopapas and Efstratios Karavasilis and Nikolaos Smyrnis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PanagiotaropoulouEtAl_fMRI-Validation-CompAVsaliencyVideos_ICIP2016.pdf}, doi = {10.1109/ICIP.2016.7532447}, issn = {15224880}, year = {2016}, date = {2016-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2016-August}, pages = {699--703}, abstract = {textcopyright 2016 IEEE.In this study, we make use of brain activation data to investigate the perceptual plausibility of a visual and an auditory model for visual and auditory saliency in video processing. These models have already been successfully employed in a number of applications. In addition, we experiment with parameters, modifications and suitable fusion schemes. As part of this work, fMRI data from complex video stimuli were collected, on which we base our analysis and results. The core part of the analysis involves the use of well-established methods for the manipulation of fMRI data and the examination of variability across brain responses of different individuals. Our results indicate a success in confirming the value of these saliency models in terms of perceptual plausibility.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } textcopyright 2016 IEEE.In this study, we make use of brain activation data to investigate the perceptual plausibility of a visual and an auditory model for visual and auditory saliency in video processing. These models have already been successfully employed in a number of applications. In addition, we experiment with parameters, modifications and suitable fusion schemes. As part of this work, fMRI data from complex video stimuli were collected, on which we base our analysis and results. The core part of the analysis involves the use of well-established methods for the manipulation of fMRI data and the examination of variability across brain responses of different individuals. Our results indicate a success in confirming the value of these saliency models in terms of perceptual plausibility. |
I. Rodomagoulakis, N. Kardaris, V. Pitsikalis, A. Arvanitakis, P. Maragos A multimedia gesture dataset for human robot communication: Acquisition, tools and recognition results Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{334, title = {A multimedia gesture dataset for human robot communication: Acquisition, tools and recognition results}, author = { I. Rodomagoulakis and N. Kardaris and V. Pitsikalis and A. Arvanitakis and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RKPAM_MultimedaGestureDataset-HRI_ICIP2016.pdf}, doi = {10.1109/ICIP.2016.7532923}, issn = {15224880}, year = {2016}, date = {2016-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2016-August}, pages = {3066--3070}, abstract = {Motivated by the recent advances in human-robot interaction we present a new dataset, a suite of tools to handle it and state-of-the-art work on visual gestures and audio commands recognition. The dataset has been collected with an integrated annotation and acquisition web-interface that facilitates on-the-way temporal ground-truths for fast acquisition. The dataset includes gesture instances in which the subjects are not in strict setup positions, and contains multiple scenarios, not restricted to a single static configuration. We accompany it by a valuable suite of tools as the practical interface to acquire audio-visual data in the robotic operating system, a state-of-the-art learning pipeline to train visual gesture and audio command models, and an online gesture recognition system. Finally, we include a rich evaluation of the dataset providing rich and insightfull experimental recognition results.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Motivated by the recent advances in human-robot interaction we present a new dataset, a suite of tools to handle it and state-of-the-art work on visual gestures and audio commands recognition. The dataset has been collected with an integrated annotation and acquisition web-interface that facilitates on-the-way temporal ground-truths for fast acquisition. The dataset includes gesture instances in which the subjects are not in strict setup positions, and contains multiple scenarios, not restricted to a single static configuration. We accompany it by a valuable suite of tools as the practical interface to acquire audio-visual data in the robotic operating system, a state-of-the-art learning pipeline to train visual gesture and audio command models, and an online gesture recognition system. Finally, we include a rich evaluation of the dataset providing rich and insightfull experimental recognition results. |
P Maragos, V Pitsikalis, A Katsamanis, G Pavlakos, S Theodorakis On Shape Recognition and Language Incollection Breuss, M; Bruckstein, A; Maragos, P; Wuhrer, S (Ed.): Perspectives in Shape Analysis, pp. 321-344, Springer, Cham, 2016. @incollection{Mar+16, title = {On Shape Recognition and Language}, author = {P Maragos and V Pitsikalis and A Katsamanis and G Pavlakos and S Theodorakis}, editor = {M Breuss and A Bruckstein and P Maragos and S Wuhrer}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2016_MaragosEtAl_ShapeRecognitionAndLanguage_PerspectivesInShapeAnalysis_Springer.pdf}, doi = {https://doi.org/10.1007/978-3-319-24726-7_15}, year = {2016}, date = {2016-01-01}, booktitle = {Perspectives in Shape Analysis}, pages = {321-344}, publisher = {Springer, Cham}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } |
2015 |
Stamatios Lefkimmiatis, Anastasios Roussos, Petros Maragos, Michael Unser Structure Tensor Total Variation Journal Article SIAM Journal on Imaging Sciences, 8 (2), pp. 1090–1122, 2015, ISSN: 1936-4954. Abstract | BibTeX | Links: [Webpage] @article{305, title = {Structure Tensor Total Variation}, author = {Stamatios Lefkimmiatis and Anastasios Roussos and Petros Maragos and Michael Unser}, url = {http://epubs.siam.org/doi/10.1137/14098154X}, doi = {10.1137/14098154X}, issn = {1936-4954}, year = {2015}, date = {2015-01-01}, journal = {SIAM Journal on Imaging Sciences}, volume = {8}, number = {2}, pages = {1090--1122}, abstract = {We introduce a novel generic energy functional that we employ to solve inverse imaging problems within a variational framework. The proposed regularization family, termed as structure tensor total variation (STV), penalizes the eigenvalues of the structure tensor and is suitable for both grayscale and vector-valued images. It generalizes several existing variational penalties, including the total variation seminorm and vectorial extensions of it. Meanwhile, thanks to the structure tensor's ability to capture first-order information around a local neighborhood, the STV functionals can provide more robust measures of image variation. Further, we prove that the STV regularizers are convex while they also satisfy several invariance properties w.r.t. image transformations. These properties qualify them as ideal candidates for imaging applications. In addition, for the discrete version of the STV functionals we derive an equivalent definition that is based on the patch-based Jacobian operator, a novel linear op...}, keywords = {}, pubstate = {published}, tppubtype = {article} } We introduce a novel generic energy functional that we employ to solve inverse imaging problems within a variational framework. The proposed regularization family, termed as structure tensor total variation (STV), penalizes the eigenvalues of the structure tensor and is suitable for both grayscale and vector-valued images. It generalizes several existing variational penalties, including the total variation seminorm and vectorial extensions of it. Meanwhile, thanks to the structure tensor's ability to capture first-order information around a local neighborhood, the STV functionals can provide more robust measures of image variation. Further, we prove that the STV regularizers are convex while they also satisfy several invariance properties w.r.t. image transformations. These properties qualify them as ideal candidates for imaging applications. In addition, for the discrete version of the STV functionals we derive an equivalent definition that is based on the patch-based Jacobian operator, a novel linear op... |
Vassilis Pitsikalis, Athanasios Katsamanis, Stavros Theodorakis, Petros Maragos Multimodal Gesture Recognition via Multiple Hypotheses Rescoring Journal Article Journal of Machine Learning Research, 16 (1), pp. 255-284, 2015. @article{144, title = {Multimodal Gesture Recognition via Multiple Hypotheses Rescoring}, author = {Vassilis Pitsikalis and Athanasios Katsamanis and Stavros Theodorakis and Petros Maragos}, url = {http://link.springer.com/10.1007/978-3-319-57021-1_16}, doi = {10.1007/978-3-319-57021-1_16}, year = {2015}, date = {2015-01-01}, journal = {Journal of Machine Learning Research}, volume = {16}, number = {1}, pages = {255-284}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Theodora Chaspari, Constantin Soldatos, Petros Maragos The development of the Athens Emotional States Inventory (AESI): collection, validation and automatic processing of emotionally loaded sentences Journal Article The World Journal of Biological Psychiatry, 16 (5), pp. 312–322, 2015. @article{chaspari2015development, title = {The development of the Athens Emotional States Inventory (AESI): collection, validation and automatic processing of emotionally loaded sentences}, author = {Theodora Chaspari and Constantin Soldatos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ChaspariSoldatosMaragos_AESI_WJBP2015_postprint.pdf}, doi = {10.3109/15622975.2015.1012228}, year = {2015}, date = {2015-01-01}, journal = {The World Journal of Biological Psychiatry}, volume = {16}, number = {5}, pages = {312--322}, publisher = {Taylor & Francis}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
X S Papageorgiou, C S Tzafestas, C Vartholomeos Laschi, R Lopez ICT-Supported Bath Robots: Design Concepts Conference C_ICSR, 2015. Abstract | BibTeX | Links: [PDF] @conference{ICSR2015_1, title = {ICT-Supported Bath Robots: Design Concepts}, author = {X S Papageorgiou and C S Tzafestas and C Vartholomeos Laschi and R Lopez}, url = {http://robotics.ntua.gr/wp-content/publications/ICSR2015_1.pdf}, year = {2015}, date = {2015-10-01}, booktitle = {C_ICSR}, abstract = {This paper presents the concept and the architecture of the I-SUPPORT service robotics system. The goal of the I-SUPPORT system is to support and enhance older adults mobility, manipulation and force exertion abilities and assist them in successfully, safely and independently completing the entire sequence of showering tasks, such as properly washing their back, their upper parts, their lower limbs, their buttocks and groin, and to effectively use the towel for drying purposes. Adaptation and integration of state-of-the-art, cost-effective, soft-robotic arms will provide the hardware constituents, which, together with advanced human-robot force/compliance control will form the basis for a safe physical human-robot interaction that complies with the most up-to-date safety standards. Human behavioural, sociological, safety, ethical and acceptability aspects, as well as financial factors related to the proposed service robotics system will be thoroughly investigated and evaluated so that the I-SUPPORT end result is a close-to-market prototype, applicable to realistic living settings.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper presents the concept and the architecture of the I-SUPPORT service robotics system. The goal of the I-SUPPORT system is to support and enhance older adults mobility, manipulation and force exertion abilities and assist them in successfully, safely and independently completing the entire sequence of showering tasks, such as properly washing their back, their upper parts, their lower limbs, their buttocks and groin, and to effectively use the towel for drying purposes. Adaptation and integration of state-of-the-art, cost-effective, soft-robotic arms will provide the hardware constituents, which, together with advanced human-robot force/compliance control will form the basis for a safe physical human-robot interaction that complies with the most up-to-date safety standards. Human behavioural, sociological, safety, ethical and acceptability aspects, as well as financial factors related to the proposed service robotics system will be thoroughly investigated and evaluated so that the I-SUPPORT end result is a close-to-market prototype, applicable to realistic living settings. |
G Papageorgiou X.S. Moustris, G Pitsikalis V. Chalvatzaki, A Dometios, N Kardaris, C S Tzafestas, P Maragos User-Oriented Cognitive Interaction and Control for an Intelligent Robotic Walker Conference 17th International Conference on Social Robotics (ICSR 2015), 2015. Abstract | BibTeX | Links: [PDF] @conference{ICSR2015_2, title = {User-Oriented Cognitive Interaction and Control for an Intelligent Robotic Walker}, author = {G Papageorgiou X.S. Moustris and G Pitsikalis V. Chalvatzaki and A Dometios and N Kardaris and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ICSR2015_2.pdf}, year = {2015}, date = {2015-10-01}, booktitle = {17th International Conference on Social Robotics (ICSR 2015)}, abstract = {Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. This paper reports current research work related to the control of an intelligent robotic rollator aiming to provide user-adaptive and context-aware walking assistance. To achieve such targets, a large spectrum of multimodal sensory processing and interactive control modules need to be developed and seamlessly integrated, that can, on one side track and analyse human motions and actions, in order to detect pathological situations and estimate user needs, while predicting at the same time the user (short-term or long-range) intentions in order to adapt robot control actions and supportive behaviours accordingly. User-oriented human-robot interaction and control refers to the functionalities that couple the motions, the actions and, in more general terms, the behaviours of the assistive robotic device to the user in a non-physical interaction context.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. This paper reports current research work related to the control of an intelligent robotic rollator aiming to provide user-adaptive and context-aware walking assistance. To achieve such targets, a large spectrum of multimodal sensory processing and interactive control modules need to be developed and seamlessly integrated, that can, on one side track and analyse human motions and actions, in order to detect pathological situations and estimate user needs, while predicting at the same time the user (short-term or long-range) intentions in order to adapt robot control actions and supportive behaviours accordingly. User-oriented human-robot interaction and control refers to the functionalities that couple the motions, the actions and, in more general terms, the behaviours of the assistive robotic device to the user in a non-physical interaction context. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas Gait Modelling for a Context-Aware User-Adaptive Robotic Assistant Platform Conference 2015, ISSN: 978-88-97999-63-8. Abstract | BibTeX | Links: [PDF] @conference{CPT15, title = {Gait Modelling for a Context-Aware User-Adaptive Robotic Assistant Platform}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/publications/CPT15.pdf}, issn = {978-88-97999-63-8}, year = {2015}, date = {2015-09-01}, pages = {132-141}, abstract = {For a context-aware robotic assistant platform that follows patients with moderate mobility impairment and adapts its motion to the patient?s needs, the de- velopment of an efficient leg tracker and the recogni- tion of pathological gait are very important. In this work, we present the basic concept for the robot con- trol architecture and analyse three essential parts of the Adaptive Context-Aware Robot Control scheme; the detection and tracking of the subject?s legs, the gait modelling and classification and the computation of gait parameters for the impairment level assess- ment. We initially process raw laser data and estimate the legs? position and velocity with a Kalman Filter and then use this information as input for a Hidden Markov Model-based framework that detects specific gait patterns and classifies human gait into normal or pathological. We then compute gait parameters com- monly used for medical diagnosis. The recognised gait patterns along with the gait parameters will be used for the impairment level assessment, which will activate certain control assistive actions regarding the pathological state of the patient.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } For a context-aware robotic assistant platform that follows patients with moderate mobility impairment and adapts its motion to the patient?s needs, the de- velopment of an efficient leg tracker and the recogni- tion of pathological gait are very important. In this work, we present the basic concept for the robot con- trol architecture and analyse three essential parts of the Adaptive Context-Aware Robot Control scheme; the detection and tracking of the subject?s legs, the gait modelling and classification and the computation of gait parameters for the impairment level assess- ment. We initially process raw laser data and estimate the legs? position and velocity with a Kalman Filter and then use this information as input for a Hidden Markov Model-based framework that detects specific gait patterns and classifies human gait into normal or pathological. We then compute gait parameters com- monly used for medical diagnosis. The recognised gait patterns along with the gait parameters will be used for the impairment level assessment, which will activate certain control assistive actions regarding the pathological state of the patient. |
X S Papageorgiou, G Chalvatzaki, C S Tzafestas, P Maragos Hidden markov modeling of human pathological gait using laser range finder for an assisted living intelligent robotic walker Conference IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2015. Abstract | BibTeX | Links: [PDF] @conference{IROS2015, title = {Hidden markov modeling of human pathological gait using laser range finder for an assisted living intelligent robotic walker}, author = {X S Papageorgiou and G Chalvatzaki and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/IROS2015.pdf}, doi = {10.1109/IROS.2015.7354283}, year = {2015}, date = {2015-09-01}, booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {6342-6347}, abstract = {The precise analysis of a patient's or an elderly person's walking pattern is very important for an effective intelligent active mobility assistance robot. This walking pattern can be described by a cyclic motion, which can be modeled using the consecutive gait phases. In this paper, we present a completely non-invasive framework for analyzing and recognizing a pathological human walking gait pattern. Our framework utilizes a laser range finder sensor to detect and track the human legs, and an appropriately synthesized Hidden Markov Model (HMM) for state estimation, and recognition of the gait patterns. We demonstrate the applicability of this setup using real data, collected from an ensemble of different elderly persons with a number of pathologies. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior-based robot control system. More specifically, the proposed framework has the potential to be used for the classification of specific walking pathologies, which is needed for the development of a context-aware robot mobility assistant.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The precise analysis of a patient's or an elderly person's walking pattern is very important for an effective intelligent active mobility assistance robot. This walking pattern can be described by a cyclic motion, which can be modeled using the consecutive gait phases. In this paper, we present a completely non-invasive framework for analyzing and recognizing a pathological human walking gait pattern. Our framework utilizes a laser range finder sensor to detect and track the human legs, and an appropriately synthesized Hidden Markov Model (HMM) for state estimation, and recognition of the gait patterns. We demonstrate the applicability of this setup using real data, collected from an ensemble of different elderly persons with a number of pathologies. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior-based robot control system. More specifically, the proposed framework has the potential to be used for the classification of specific walking pathologies, which is needed for the development of a context-aware robot mobility assistant. |
P Koutras, A Zlatintsi, E.Iosif, A Katsamanis, P Maragos, A Potamianos Predicting Audio-Visual Salient Events Based on Visual, Audio and Text Modalities for Movie Summarization Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Quebec, Canada, 2015. Abstract | BibTeX | Links: [PDF] @conference{KZI+15, title = {Predicting Audio-Visual Salient Events Based on Visual, Audio and Text Modalities for Movie Summarization}, author = {P Koutras and A Zlatintsi and E.Iosif and A Katsamanis and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/KZIKMP_MovieSum2_ICIP-2015.pdf}, year = {2015}, date = {2015-09-01}, booktitle = {Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing}, address = {Quebec, Canada}, abstract = {In this paper, we present a new and improved synergistic approach to the problem of audio-visual salient event detection and movie summarization based on visual, audio and text modalities. Spatio-temporal visual saliency is estimated through a perceptually inspired frontend based on 3D (space, time) Gabor filters and frame-wise features are extracted from the saliency volumes. For the auditory salient event detection we extract features based on Teager-Kaiser Energy Operator, while text analysis incorporates part-of-speech tag-ging and affective modeling of single words on the movie subtitles. For the evaluation of the proposed system, we employ an elementary and non-parametric classification technique like KNN. Detection results are reported on the MovSum database, using objective evaluations against ground-truth denoting the perceptually salient events, and human evaluations of the movie summaries. Our evaluation verifies the appropriateness of the proposed methods compared to our baseline system. Finally, our newly proposed summarization algorithm produces summaries that consist of salient and meaningful events, also improving the comprehension of the semantics.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we present a new and improved synergistic approach to the problem of audio-visual salient event detection and movie summarization based on visual, audio and text modalities. Spatio-temporal visual saliency is estimated through a perceptually inspired frontend based on 3D (space, time) Gabor filters and frame-wise features are extracted from the saliency volumes. For the auditory salient event detection we extract features based on Teager-Kaiser Energy Operator, while text analysis incorporates part-of-speech tag-ging and affective modeling of single words on the movie subtitles. For the evaluation of the proposed system, we employ an elementary and non-parametric classification technique like KNN. Detection results are reported on the MovSum database, using objective evaluations against ground-truth denoting the perceptually salient events, and human evaluations of the movie summaries. Our evaluation verifies the appropriateness of the proposed methods compared to our baseline system. Finally, our newly proposed summarization algorithm produces summaries that consist of salient and meaningful events, also improving the comprehension of the semantics. |
A Zlatintsi, E.Iosif, P Maragos, A Potamianos Audio Salient Event Detection and Summarization using Audio and Text Modalities Conference Nice, France, 2015. Abstract | BibTeX | Links: [PDF] @conference{ZIM+15, title = {Audio Salient Event Detection and Summarization using Audio and Text Modalities}, author = {A Zlatintsi and E.Iosif and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiEtAl_AudioTextSum-EUSIPCO-2015.pdf}, year = {2015}, date = {2015-09-01}, address = {Nice, France}, abstract = {This paper investigates the problem of audio event detection and summarization, building on previous work [1, 2] on the detection of perceptually important audio events based on saliency models. We take a synergistic approach to audio summarization where saliency computation of audio streams is assisted by using the text modality as well. Auditory saliency is assessed by auditory and perceptual cues such as Teager energy, loudness and roughness; all known to correlate with attention and human hearing. Text analysis incorporates part-of-speech tagging and affective modeling. A computational method for the automatic correction of the boundaries of the selected audio events is applied creating summaries that consist not only of salient but also meaningful and semantically coherent events. A non-parametric classification technique is employed and results are reported on the MovSum movie database using objective evaluations against ground-truth designating the auditory and semantically salient events.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper investigates the problem of audio event detection and summarization, building on previous work [1, 2] on the detection of perceptually important audio events based on saliency models. We take a synergistic approach to audio summarization where saliency computation of audio streams is assisted by using the text modality as well. Auditory saliency is assessed by auditory and perceptual cues such as Teager energy, loudness and roughness; all known to correlate with attention and human hearing. Text analysis incorporates part-of-speech tagging and affective modeling. A computational method for the automatic correction of the boundaries of the selected audio events is applied creating summaries that consist not only of salient but also meaningful and semantically coherent events. A non-parametric classification technique is employed and results are reported on the MovSum movie database using objective evaluations against ground-truth designating the auditory and semantically salient events. |
A Zlatintsi, P Koutras, N Efthymiou, P Maragos, A Potamianos, K Pastra Quality Evaluation of Computational Models for Movie Summarization Conference Costa Navarino, Messinia, Greece, 2015. Abstract | BibTeX | Links: [PDF] @conference{ZKE+15, title = {Quality Evaluation of Computational Models for Movie Summarization}, author = {A Zlatintsi and P Koutras and N Efthymiou and P Maragos and A Potamianos and K Pastra}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiEtAl_MovieSumEval-QoMEX2015.pdf}, year = {2015}, date = {2015-05-01}, address = {Costa Navarino, Messinia, Greece}, abstract = {In this paper we present a movie summarization system and we investigate what composes high quality movie summaries in terms of user experience evaluation. We propose state-of-the-art audio, visual and text techniques for the detection of perceptually salient events from movies. The evaluation of such computational models is usually based on the comparison of the similarity between the system-detected events and some ground-truth data. For this reason, we have developed the MovSum movie database, which includes sensory and semantic saliency annotation as well as cross-media relations, for objective evaluations. The automatically produced movie summaries were qualitatively evaluated, in an extensive human evaluation, in terms of informativeness and enjoyability accomplishing very high ratings up to 80% and 90%, respectively, which verifies the appropriateness of the proposed methods.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we present a movie summarization system and we investigate what composes high quality movie summaries in terms of user experience evaluation. We propose state-of-the-art audio, visual and text techniques for the detection of perceptually salient events from movies. The evaluation of such computational models is usually based on the comparison of the similarity between the system-detected events and some ground-truth data. For this reason, we have developed the MovSum movie database, which includes sensory and semantic saliency annotation as well as cross-media relations, for objective evaluations. The automatically produced movie summaries were qualitatively evaluated, in an extensive human evaluation, in terms of informativeness and enjoyability accomplishing very high ratings up to 80% and 90%, respectively, which verifies the appropriateness of the proposed methods. |
Christos G Bampis, Petros Maragos UNIFYING THE RANDOM WALKER ALGORITHM AND THE SIR MODEL FOR GRAPH CLUSTERING AND IMAGE SEGMENTATION Conference Icip 2015, 2 (3), 2015, ISBN: 9781479983391. @conference{319, title = {UNIFYING THE RANDOM WALKER ALGORITHM AND THE SIR MODEL FOR GRAPH CLUSTERING AND IMAGE SEGMENTATION}, author = { Christos G Bampis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BampisMaragos_NormalizedRandomWalk-GraphCluster-ImageSegment_ICIP2015_crf.pdf}, isbn = {9781479983391}, year = {2015}, date = {2015-01-01}, booktitle = {Icip 2015}, volume = {2}, number = {3}, pages = {2265--2269}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Panagiotis Giannoulis, Alessio Brutti, Marco Matassoni, Alberto Abad, Athanasios Katsamanis, Miguel Matos, Gerasimos Potamianos, Petros Maragos, Fondazione Bruno Kessler MULTI-ROOM SPEECH ACTIVITY DETECTION USING A DISTRIBUTED MICROPHONE NETWORK IN DOMESTIC ENVIRONMENTS Conference Proc. European Signal Processing Conf. (EUSIPCO-2015), Nice, France, Sep. 2015, 2015, ISBN: 9780992862633. @conference{306, title = {MULTI-ROOM SPEECH ACTIVITY DETECTION USING A DISTRIBUTED MICROPHONE NETWORK IN DOMESTIC ENVIRONMENTS}, author = { Panagiotis Giannoulis and Alessio Brutti and Marco Matassoni and Alberto Abad and Athanasios Katsamanis and Miguel Matos and Gerasimos Potamianos and Petros Maragos and Fondazione Bruno Kessler}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/GiannoulisEtAl_MultiRoomSpeechActivityDetection_EUSIPCO2015_crf.pdf}, isbn = {9780992862633}, year = {2015}, date = {2015-01-01}, booktitle = {Proc. European Signal Processing Conf. (EUSIPCO-2015), Nice, France, Sep. 2015}, pages = {1281--1285}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Koutras, A. Zlatintsi, E. Iosif, A. Katsamanis, P. Maragos, A. Potamianos Predicting audio-visual salient events based on visual, audio and text modalities for movie summarization Conference Proceedings - International Conference on Image Processing, ICIP, 2015-December , 2015, ISSN: 15224880. @conference{307, title = {Predicting audio-visual salient events based on visual, audio and text modalities for movie summarization}, author = { P. Koutras and A. Zlatintsi and E. Iosif and A. Katsamanis and P. Maragos and A. Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KZIKMP_MovieSum2_ICIP-2015.pdf}, doi = {10.1109/ICIP.2015.7351630}, issn = {15224880}, year = {2015}, date = {2015-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2015-December}, pages = {4361--4365}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Koutras, Petros Maragos Estimation of eye gaze direction angles based on active appearance models Conference 2015 IEEE International Conference on Image Processing (ICIP), 2015, ISBN: 978-1-4799-8339-1. @conference{308, title = {Estimation of eye gaze direction angles based on active appearance models}, author = { Petros Koutras and Petros Maragos}, url = {http://ieeexplore.ieee.org/document/7351237/}, doi = {10.1109/ICIP.2015.7351237}, isbn = {978-1-4799-8339-1}, year = {2015}, date = {2015-01-01}, booktitle = {2015 IEEE International Conference on Image Processing (ICIP)}, pages = {2424--2428}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos, V Pitsikalis, A Katsamanis, N Kardaris, E Mavroudi, I Rodomagoulakis, A Tsiami Multimodal Sensory Processing for Human Action Recognition in Mobility Assistive Robotics Conference Proc. IROS-2015 Workshop on Cognitive Mobility Assistance Robots, Hamburg, Germany, Sep. 2015, 2015. @conference{320, title = {Multimodal Sensory Processing for Human Action Recognition in Mobility Assistive Robotics}, author = { P Maragos and V Pitsikalis and A Katsamanis and N Kardaris and E Mavroudi and I Rodomagoulakis and A Tsiami}, url = {MaragosEtAl_MultiSensoryHumanActionRecogn-Robotics_IROS2015-Workshop.pdf}, year = {2015}, date = {2015-01-01}, booktitle = {Proc. IROS-2015 Workshop on Cognitive Mobility Assistance Robots, Hamburg, Germany, Sep. 2015}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Z I Skordilis, A Tsiami, P Maragos, G Potamianos, L Spelgatti, R Sannino Multichannel Speech Enhancement Using Mems Microphones Conference IEEE International Conference on Acoustics, Speech and Signal Processing, 2015, ISBN: 978-1-4673-6997-8. @conference{163, title = {Multichannel Speech Enhancement Using Mems Microphones}, author = { Z I Skordilis and A Tsiami and P Maragos and G Potamianos and L Spelgatti and R Sannino}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SkorTsiamMarPotSpelSan_MEMS-MCSE_ICASSP2015.pdf}, doi = {10.1109/ICASSP.2015.7178467}, isbn = {978-1-4673-6997-8}, year = {2015}, date = {2015-01-01}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing}, pages = {2--6}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Costas S Tzafestas, Xanthi S Papageorgiou, George P Moustris User-Oriented Human-Robot Interaction for an Intelligent Walking Assistant Robotic Device Conference Workshop IEEE/RSJ Int'l Conf. on Intelligent Robots and Systems, Invited Session: "Cognitive Mobility Assistance Robots: Scientific Advances and Perspectives", Hamburg, Germany, Sept. 28 - Oct. 02, 2015, 2015. @conference{310, title = {User-Oriented Human-Robot Interaction for an Intelligent Walking Assistant Robotic Device}, author = { Costas S Tzafestas and Xanthi S Papageorgiou and George P Moustris}, year = {2015}, date = {2015-01-01}, booktitle = {Workshop IEEE/RSJ Int'l Conf. on Intelligent Robots and Systems, Invited Session: "Cognitive Mobility Assistance Robots: Scientific Advances and Perspectives", Hamburg, Germany, Sept. 28 - Oct. 02, 2015}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2014 |
Sheraz Khan, Athanasios Dometios, Chris Verginis, Costas Tzafestas, Dirk Wollherr, Martin Buss RMAP: A rectangular cuboid approximation framework for 3D environment mapping Journal Article Autonomous Robots, 37 (3), pp. 261–277, 2014, ISSN: 09295593. @article{23n, title = {RMAP: A rectangular cuboid approximation framework for 3D environment mapping}, author = {Sheraz Khan and Athanasios Dometios and Chris Verginis and Costas Tzafestas and Dirk Wollherr and Martin Buss}, doi = {10.1007/s10514-014-9387-y}, issn = {09295593}, year = {2014}, date = {2014-01-01}, journal = {Autonomous Robots}, volume = {37}, number = {3}, pages = {261--277}, abstract = {This paper presents a rectangular cuboid approximation framework (RMAP) for 3D mapping. The goal of RMAP is to provide computational and memory efficient environment representations for 3D robotic mapping using axis aligned rectangular cuboids (RC). This paper focuses on two aspects of the RMAP framework: (i) An occupancy grid approach and (ii) A RC approximation of 3D environments based on point cloud density. The RMAP occupancy grid is based on the Rtree data structure which is composed of a hierarchy of RC. The proposed approach is capable of generating probabilistic 3D representations with multiresolution capabilities. It reduces the memory complexity in large scale 3D occupancy grids by avoiding explicit modelling of free space. In contrast to point cloud and fixed resolution cell representations based on beam end point observations, an approximation approach using point cloud density is presented. The proposed approach generates variable sized RC approximations that are memory efficient for axis aligned surfaces. Evaluation of the RMAP occupancy grid and approximation approach based on computational and memory complexity on different datasets shows the effectiveness of this framework for 3D mapping. textcopyright 2014 The Author(s).}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper presents a rectangular cuboid approximation framework (RMAP) for 3D mapping. The goal of RMAP is to provide computational and memory efficient environment representations for 3D robotic mapping using axis aligned rectangular cuboids (RC). This paper focuses on two aspects of the RMAP framework: (i) An occupancy grid approach and (ii) A RC approximation of 3D environments based on point cloud density. The RMAP occupancy grid is based on the Rtree data structure which is composed of a hierarchy of RC. The proposed approach is capable of generating probabilistic 3D representations with multiresolution capabilities. It reduces the memory complexity in large scale 3D occupancy grids by avoiding explicit modelling of free space. In contrast to point cloud and fixed resolution cell representations based on beam end point observations, an approximation approach using point cloud density is presented. The proposed approach generates variable sized RC approximations that are memory efficient for axis aligned surfaces. Evaluation of the RMAP occupancy grid and approximation approach based on computational and memory complexity on different datasets shows the effectiveness of this framework for 3D mapping. textcopyright 2014 The Author(s). |
Epameinondas Antonakos, Vassilis Pitsikalis, Petros Maragos Classification of extreme facial events in sign language videos Journal Article Eurasip Journal on Image and Video Processing, 2014 , 2014, ISSN: 16875281. @article{143, title = {Classification of extreme facial events in sign language videos}, author = {Epameinondas Antonakos and Vassilis Pitsikalis and Petros Maragos}, doi = {10.1186/1687-5281-2014-14}, issn = {16875281}, year = {2014}, date = {2014-01-01}, journal = {Eurasip Journal on Image and Video Processing}, volume = {2014}, abstract = {We propose a new approach for Extreme States Classification (ESC) on feature spaces of facial cues in sign language (SL) videos. The method is built upon Active Appearance Model (AAM) face tracking and feature extraction of global and local AAMs. ESC is applied on various facial cues-as, for instance, pose rotations, head movements and eye blinking-leading to the detection of extreme states such as left/right, up/down and open/closed. Given the importance of such facial events in SL analysis, we apply ESC to detect visual events on SL videos, including both American (ASL) and Greek (GSL) corpora, yielding promising qualitative and quantitative results. Further, we show the potential of ESC for assistive annotation tools and demonstrate a link of the detections with indicative higher-level linguistic events. Given the lack of facial annotated data and the fact that manual annotations are highly time-consuming, ESC results indicate that the framework can have significant impact on SL processing and analysis. textcopyright 2014 Antonakos et al.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We propose a new approach for Extreme States Classification (ESC) on feature spaces of facial cues in sign language (SL) videos. The method is built upon Active Appearance Model (AAM) face tracking and feature extraction of global and local AAMs. ESC is applied on various facial cues-as, for instance, pose rotations, head movements and eye blinking-leading to the detection of extreme states such as left/right, up/down and open/closed. Given the importance of such facial events in SL analysis, we apply ESC to detect visual events on SL videos, including both American (ASL) and Greek (GSL) corpora, yielding promising qualitative and quantitative results. Further, we show the potential of ESC for assistive annotation tools and demonstrate a link of the detections with indicative higher-level linguistic events. Given the lack of facial annotated data and the fact that manual annotations are highly time-consuming, ESC results indicate that the framework can have significant impact on SL processing and analysis. textcopyright 2014 Antonakos et al. |
S Khan, A Dometios, C Verginis, C Tzafestas, D Wollherr, M Buss RMAP: a Rectangular Cuboid Approximation Framework for 3D Environment Mapping Journal Article 37 (3), pp. 261–277, 2014, ISSN: 1573-7527. Abstract | BibTeX | Links: [PDF] @article{KDVTWB14, title = {RMAP: a Rectangular Cuboid Approximation Framework for 3D Environment Mapping}, author = {S Khan and A Dometios and C Verginis and C Tzafestas and D Wollherr and M Buss}, url = {http://robotics.ntua.gr/wp-content/publications/Khan14_RMAP_Rectangular_Cuboid_Approxim.pdf}, doi = {10.1007/s10514-014-9387-y}, issn = {1573-7527}, year = {2014}, date = {2014-01-01}, volume = {37}, number = {3}, pages = {261--277}, publisher = {Springer}, abstract = {This paper presents a rectangular cuboid approximation framework (RMAP) for 3D mapping. The goal of RMAP is to provide computational and memory efficient environment representations for 3D robotic mapping using axis aligned rectangular cuboids (RC). This paper focuses on two aspects of the RMAP framework: (i) An occupancy grid approach and (ii) A RC approximation of 3D environments based on point cloud density. The RMAP occupancy grid is based on the Rtree data structure which is composed of a hierarchy of RC. The proposed approach is capable of generating probabilistic 3D representations with multiresolution capabilities. It reduces the memory complexity in large scale 3D occupancy grids by avoiding explicit modelling of free space. In contrast to point cloud and fixed resolution cell representations based on beam end point observations, an approximation approach using point cloud density is presented. The proposed approach generates variable sized RC approximations that are memory efficient for axis aligned surfaces. Evaluation of the RMAP occupancy grid and approximation approach based on computational and memory complexity on different datasets shows the effectiveness of this framework for 3D mapping.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper presents a rectangular cuboid approximation framework (RMAP) for 3D mapping. The goal of RMAP is to provide computational and memory efficient environment representations for 3D robotic mapping using axis aligned rectangular cuboids (RC). This paper focuses on two aspects of the RMAP framework: (i) An occupancy grid approach and (ii) A RC approximation of 3D environments based on point cloud density. The RMAP occupancy grid is based on the Rtree data structure which is composed of a hierarchy of RC. The proposed approach is capable of generating probabilistic 3D representations with multiresolution capabilities. It reduces the memory complexity in large scale 3D occupancy grids by avoiding explicit modelling of free space. In contrast to point cloud and fixed resolution cell representations based on beam end point observations, an approximation approach using point cloud density is presented. The proposed approach generates variable sized RC approximations that are memory efficient for axis aligned surfaces. Evaluation of the RMAP occupancy grid and approximation approach based on computational and memory complexity on different datasets shows the effectiveness of this framework for 3D mapping. |
A Zlatintsi, P Maragos Comparison of Different Representations Based on Nonlinear Features for Music Genre Classification Conference Proc. European Signal Processing Conference, Lisbon, Portugal, 2014. Abstract | BibTeX | Links: [PDF] @conference{ZlMa14, title = {Comparison of Different Representations Based on Nonlinear Features for Music Genre Classification}, author = {A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos_MGC_EUSIPCO14_Lisbon_proc.pdf}, year = {2014}, date = {2014-09-01}, booktitle = {Proc. European Signal Processing Conference}, address = {Lisbon, Portugal}, abstract = {In this paper, we examine the descriptiveness and recognition properties of different feature representations for the analysis of musical signals, aiming in the exploration of their micro- and macro-structures, for the task of music genre classification. We explore nonlinear methods, such as the AM-FM model and ideas from fractal theory, so as to model the time-varying harmonic structure of musical signals and the geometrical complexity of the music waveform. The different feature representations’ efficacy is compared regarding their recognition properties for the specific task. The proposed features are evaluated against and in combination with Mel frequency cepstral coefficients (MFCC), using both static and dynamic classifiers, accomplishing an error reduction of 28%, illustrating that they can capture important aspects of music.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we examine the descriptiveness and recognition properties of different feature representations for the analysis of musical signals, aiming in the exploration of their micro- and macro-structures, for the task of music genre classification. We explore nonlinear methods, such as the AM-FM model and ideas from fractal theory, so as to model the time-varying harmonic structure of musical signals and the geometrical complexity of the music waveform. The different feature representations’ efficacy is compared regarding their recognition properties for the specific task. The proposed features are evaluated against and in combination with Mel frequency cepstral coefficients (MFCC), using both static and dynamic classifiers, accomplishing an error reduction of 28%, illustrating that they can capture important aspects of music. |
A Tsiami, A Katsamanis, P Maragos, G Potamianos Experiments in acoustic source localization using sparse arrays in adverse indoors environments Conference Proc. European Signal Processing Conference, Lisbon, Portugal, 2014. Abstract | BibTeX | Links: [PDF] @conference{tsiami2014localization, title = {Experiments in acoustic source localization using sparse arrays in adverse indoors environments}, author = {A Tsiami and A Katsamanis and P Maragos and G Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/Tsiami+_AcousticSourceLocalization_EUSIPCO2014.pdf}, year = {2014}, date = {2014-09-01}, booktitle = {Proc. European Signal Processing Conference}, pages = {2390-2394}, address = {Lisbon, Portugal}, abstract = {In this paper we experiment with 2-D source localization in smart homes under adverse conditions using sparse distributed microphone arrays. We propose some improvements to deal with problems due to high reverberation, noise and use of a limited number of microphones. These consist of a pre-filtering stage for dereverberation and an iterative procedure that aims to increase accuracy. Experiments carried out in relatively large databases with both simulated and real recordings of sources in various positions indicate that the proposed method exhibits a better performance compared to others under challenging conditions while also being computationally efficient. It is demonstrated that although reverberation degrades localization performance, this degradation can be compensated by identifying the reliable microphone pairs and disposing of the outliers.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we experiment with 2-D source localization in smart homes under adverse conditions using sparse distributed microphone arrays. We propose some improvements to deal with problems due to high reverberation, noise and use of a limited number of microphones. These consist of a pre-filtering stage for dereverberation and an iterative procedure that aims to increase accuracy. Experiments carried out in relatively large databases with both simulated and real recordings of sources in various positions indicate that the proposed method exhibits a better performance compared to others under challenging conditions while also being computationally efficient. It is demonstrated that although reverberation degrades localization performance, this degradation can be compensated by identifying the reliable microphone pairs and disposing of the outliers. |
Antigoni Tsiami, Isidoros Rodomagoulakis, Panagiotis Giannoulis, Athanasios Katsamanis, Gerasimos Potamianos, Petros Maragos ATHENA: A Greek Multi-Sensory Database for Home Automation Control uthor: Isidoros Rodomagoulakis (NTUA, Greece) Conference Proc. Int'l Conf. on Speech Communication and Technology (INTERSPEECH), Singapore, 2014. Abstract | BibTeX | Links: [PDF] @conference{tsiami2014athena, title = {ATHENA: A Greek Multi-Sensory Database for Home Automation Control uthor: Isidoros Rodomagoulakis (NTUA, Greece)}, author = {Antigoni Tsiami and Isidoros Rodomagoulakis and Panagiotis Giannoulis and Athanasios Katsamanis and Gerasimos Potamianos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Tsiami+_AthenaDatabase_INTERSPEECH2014.pdf}, year = {2014}, date = {2014-09-01}, booktitle = {Proc. Int'l Conf. on Speech Communication and Technology (INTERSPEECH)}, pages = {1608-1612}, address = {Singapore}, abstract = {In this paper we present a Greek speech database with real multi-modal data in a smart home two-room environment. In total, 20 speakers were recorded in 240 one-minute long sessions. The recordings include utterances of activation keywords and commands for home automation control, but also phonetically rich sentences and conversational speech. Audio, speaker movements and gestures were captured by 20 condenser microphones installed on the walls and ceiling, 6 MEMS microphones, 2 close-talk microphones and one Kinect camera. The new publicly available database exhibits adverse noise conditions because of background noises and acoustic events performed during the recordings to better approximate a realistic everyday home scenario. Thus, it is suitable for experimentation on voice activity and event detection, source localization, speech enhancement and far-field speech recognition. We present the details of the corpus as well as baseline results on multi-channel voice activity detection and spoken command recognition.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we present a Greek speech database with real multi-modal data in a smart home two-room environment. In total, 20 speakers were recorded in 240 one-minute long sessions. The recordings include utterances of activation keywords and commands for home automation control, but also phonetically rich sentences and conversational speech. Audio, speaker movements and gestures were captured by 20 condenser microphones installed on the walls and ceiling, 6 MEMS microphones, 2 close-talk microphones and one Kinect camera. The new publicly available database exhibits adverse noise conditions because of background noises and acoustic events performed during the recordings to better approximate a realistic everyday home scenario. Thus, it is suitable for experimentation on voice activity and event detection, source localization, speech enhancement and far-field speech recognition. We present the details of the corpus as well as baseline results on multi-channel voice activity detection and spoken command recognition. |
X S Papageorgiou, G Chalvatzaki, C S Tzafestas, P Maragos Hidden Markov modeling of human normal gait using laser range finder for a mobility assistance robot Conference IEEE International Conference on Robotics and Automation (ICRA), 2014, ISSN: 1050-4729. Abstract | BibTeX | Links: [PDF] @conference{ICRA2014, title = {Hidden Markov modeling of human normal gait using laser range finder for a mobility assistance robot}, author = {X S Papageorgiou and G Chalvatzaki and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ICRA2014.pdf}, doi = {10.1109/ICRA.2014.6906899}, issn = {1050-4729}, year = {2014}, date = {2014-05-01}, booktitle = {IEEE International Conference on Robotics and Automation (ICRA)}, pages = {482-487}, abstract = {For an effective intelligent active mobility assistance robot, the walking pattern of a patient or an elderly person has to be analyzed precisely. A well-known fact is that the walking patterns are gaits, that is, cyclic patterns with several consecutive phases. These cyclic motions can be modeled using the consecutive gait phases. In this paper, we present a completely non-invasive framework for analyzing a normal human walking gait pattern. Our framework utilizes a laser range finder sensor to collect the data, a combination of filters to preprocess these data, and an appropriately synthesized Hidden Markov Model (HMM) for state estimation, and recognition of the gait data. We demonstrate the applicability of this setup using real data, collected from an ensemble of different persons. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior-based robot control system. More specifically, the proposed framework has the potential to be used for the recognition of abnormal gait patterns and the subsequent classification of specific walking pathologies, which is needed for the development of a context-aware robot mobility assistant.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } For an effective intelligent active mobility assistance robot, the walking pattern of a patient or an elderly person has to be analyzed precisely. A well-known fact is that the walking patterns are gaits, that is, cyclic patterns with several consecutive phases. These cyclic motions can be modeled using the consecutive gait phases. In this paper, we present a completely non-invasive framework for analyzing a normal human walking gait pattern. Our framework utilizes a laser range finder sensor to collect the data, a combination of filters to preprocess these data, and an appropriately synthesized Hidden Markov Model (HMM) for state estimation, and recognition of the gait data. We demonstrate the applicability of this setup using real data, collected from an ensemble of different persons. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior-based robot control system. More specifically, the proposed framework has the potential to be used for the recognition of abnormal gait patterns and the subsequent classification of specific walking pathologies, which is needed for the development of a context-aware robot mobility assistant. |
X S Papageorgiou, C S Tzafestas, P Maragos, G Pavlakos, G Chalvatzaki, G Moustris, I Kokkinos, A Peer, B Stanczyk, E -S Fotinea, E Efthimiou Advances in Intelligent Mobility Assistance Robot Integrating Multimodal Sensory Processing Conference J_HCII, Springer International Publishing, Cham, 2014, ISBN: 978-3-319-07446-7. Abstract | BibTeX | Links: [PDF] @conference{HCII2014, title = {Advances in Intelligent Mobility Assistance Robot Integrating Multimodal Sensory Processing}, author = {X S Papageorgiou and C S Tzafestas and P Maragos and G Pavlakos and G Chalvatzaki and G Moustris and I Kokkinos and A Peer and B Stanczyk and E -S Fotinea and E Efthimiou}, editor = {C Stephanidis and M Antona}, url = {http://robotics.ntua.gr/wp-content/publications/HCII2014.pdf}, doi = {https://doi.org/10.1007/978-3-319-07446-7_66}, isbn = {978-3-319-07446-7}, year = {2014}, date = {2014-01-01}, booktitle = {J_HCII}, pages = {692--703}, publisher = {Springer International Publishing}, address = {Cham}, abstract = {Mobility disabilities are prevalent in our ageing society and impede activities important for the independent living of elderly people and their quality of life. The goal of this work is to support human mobility and thus enforce fitness and vitality by developing intelligent robotic platforms designed to provide user-centred and natural support for ambulating in indoor environments. We envision the design of cognitive mobile robotic systems that can monitor and understand specific forms of human activity, in order to deduce what the human needs are, in terms of mobility. The goal is to provide user and context adaptive active support and ambulation assistance to elderly users, and generally to individuals with specific forms of moderate to mild walking impairment.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mobility disabilities are prevalent in our ageing society and impede activities important for the independent living of elderly people and their quality of life. The goal of this work is to support human mobility and thus enforce fitness and vitality by developing intelligent robotic platforms designed to provide user-centred and natural support for ambulating in indoor environments. We envision the design of cognitive mobile robotic systems that can monitor and understand specific forms of human activity, in order to deduce what the human needs are, in terms of mobility. The goal is to provide user and context adaptive active support and ambulation assistance to elderly users, and generally to individuals with specific forms of moderate to mild walking impairment. |
Theodora Chaspari, Dimitrios Dimitriadis, Petros Maragos Emotion classification of speech using modulation features Conference European Signal Processing Conference, 2014, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{170, title = {Emotion classification of speech using modulation features}, author = { Theodora Chaspari and Dimitrios Dimitriadis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ChaspariDimitriadisMaragos_EmotionRecognitionSpeech_EUSIPCO2014_cr.pdf}, issn = {22195491}, year = {2014}, date = {2014-01-01}, booktitle = {European Signal Processing Conference}, pages = {1552--1556}, abstract = {Automatic classification of a speaker's affective state is one of the major challenges in signal processing community, since it can improve Human-Computer interaction and give insights into the nature of emotions from psychology perspective. The amplitude and frequency control of sound production influences strongly the affective voice content. In this paper, we take advantage of the inherent speech modulations and propose the use of instant amplitude- and frequency-derived features for efficient emotion recognition. Our results indicate that these features can further increase the performance of the widely-used spectral-prosodic information, achieving improvements on two emotional databases, the Berlin Database of Emotional Speech and the recently collected Athens Emotional States Inventory.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Automatic classification of a speaker's affective state is one of the major challenges in signal processing community, since it can improve Human-Computer interaction and give insights into the nature of emotions from psychology perspective. The amplitude and frequency control of sound production influences strongly the affective voice content. In this paper, we take advantage of the inherent speech modulations and propose the use of instant amplitude- and frequency-derived features for efficient emotion recognition. Our results indicate that these features can further increase the performance of the widely-used spectral-prosodic information, achieving improvements on two emotional databases, the Berlin Database of Emotional Speech and the recently collected Athens Emotional States Inventory. |
Panagiotis Giannoulis, Gerasimos Potamianos, Athanasios Katsamanis, Petros Maragos Multi-microphone fusion for detection of speech and acoustic events in smart spaces Conference European Signal Processing Conference, 2014, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{168, title = {Multi-microphone fusion for detection of speech and acoustic events in smart spaces}, author = { Panagiotis Giannoulis and Gerasimos Potamianos and Athanasios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/GiannoulisEtAl_MultimicrFusionDetectionSpeechEventsSmartspaces_EUSIPCO2014.pdf}, issn = {22195491}, year = {2014}, date = {2014-01-01}, booktitle = {European Signal Processing Conference}, pages = {2375--2379}, abstract = {In this paper, we examine the challenging problem of de- tecting acoustic events and voice activity in smart indoors environments, equipped with multiple microphones. In par- ticular, we focus on channel combination strategies, aiming to take advantage of the multiple microphones installed in the smart space, capturing the potentially noisy acoustic scene from the far-field. We propose various such approaches that can be formulated as fusion at the signal, feature, or at the decision level, as well as combinations of the above, also including multi-channel training. We apply our methods on two multi-microphone databases: (a) one recorded inside a small meeting room, containing twelve classes of isolated acoustic events; and (b) a speech corpus containing inter- fering noise sources, simulated inside a smart home with multiple rooms. Our multi-channel approaches demonstrate significant improvements, reaching relative error reductions over a single-channel baseline of 9.3% and 44.8% in the two datasets, respectively.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we examine the challenging problem of de- tecting acoustic events and voice activity in smart indoors environments, equipped with multiple microphones. In par- ticular, we focus on channel combination strategies, aiming to take advantage of the multiple microphones installed in the smart space, capturing the potentially noisy acoustic scene from the far-field. We propose various such approaches that can be formulated as fusion at the signal, feature, or at the decision level, as well as combinations of the above, also including multi-channel training. We apply our methods on two multi-microphone databases: (a) one recorded inside a small meeting room, containing twelve classes of isolated acoustic events; and (b) a speech corpus containing inter- fering noise sources, simulated inside a smart home with multiple rooms. Our multi-channel approaches demonstrate significant improvements, reaching relative error reductions over a single-channel baseline of 9.3% and 44.8% in the two datasets, respectively. |
A. Katsamanis, I. Rodomagoulakis, G. Potamianos, P. Maragos, A. Tsiami Robust far-field spoken command recognition for home automation combining adaptation and multichannel processing Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2014, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{171, title = {Robust far-field spoken command recognition for home automation combining adaptation and multichannel processing}, author = { A. Katsamanis and I. Rodomagoulakis and G. Potamianos and P. Maragos and A. Tsiami}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisEtAl_MultichannelASR_DIRHA_icassp2014.pdf}, doi = {10.1109/ICASSP.2014.6854664}, issn = {15206149}, year = {2014}, date = {2014-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {5547--5551}, abstract = {The paper presents our approach to speech-controlled home automa- tion. We are focusing on the detection and recognition of spoken commands preceded by a key-phrase as recorded in a voice-enabled apartment by a set of multiple microphones installed in the rooms. For both problems we investigate robust modeling, environmental adaptation and multichannel processing to cope with a) insufficient training data and b) the far-field effects and noise in the apartment. The proposed integrated scheme is evaluated in a challenging and highly realistic corpus of simulated audio recordings and achieves F-measure close to 0.70 for key-phrase spotting and word accuracy close to 98% for the command recognition task.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The paper presents our approach to speech-controlled home automa- tion. We are focusing on the detection and recognition of spoken commands preceded by a key-phrase as recorded in a voice-enabled apartment by a set of multiple microphones installed in the rooms. For both problems we investigate robust modeling, environmental adaptation and multichannel processing to cope with a) insufficient training data and b) the far-field effects and noise in the apartment. The proposed integrated scheme is evaluated in a challenging and highly realistic corpus of simulated audio recordings and achieves F-measure close to 0.70 for key-phrase spotting and word accuracy close to 98% for the command recognition task. |
Kevis Maninis, Petros Koutras, Petros Maragos ADVANCES ON ACTION RECOGNITION IN VIDEOS USING AN INTEREST POINT DETECTOR BASED ON MULTIBAND SPATIO-TEMPORAL ENERGIES Conference Icip, 2014, ISBN: 9781479957514. @conference{164, title = {ADVANCES ON ACTION RECOGNITION IN VIDEOS USING AN INTEREST POINT DETECTOR BASED ON MULTIBAND SPATIO-TEMPORAL ENERGIES }, author = { Kevis Maninis and Petros Koutras and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ManinisKoutrasMaragos_Action_ICIP2014.pdf}, isbn = {9781479957514}, year = {2014}, date = {2014-01-01}, booktitle = {Icip}, pages = {1490--1494}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgios Pavlakos, Stavros Theodorakis, Vassilis Pitsikalis, Athanasios Katsamanis, Petros Maragos Kinect-based multimodal gesture recognition using a two-pass fusion scheme Conference 2014 IEEE International Conference on Image Processing, ICIP 2014, 2014, ISBN: 9781479957514. Abstract | BibTeX | Links: [PDF] @conference{165, title = {Kinect-based multimodal gesture recognition using a two-pass fusion scheme}, author = { Georgios Pavlakos and Stavros Theodorakis and Vassilis Pitsikalis and Athanasios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PTPΚΜ_MultimodalGestureRecogn2PassFusion_ICIP2014.pdf}, doi = {10.1109/ICIP.2014.7025299}, isbn = {9781479957514}, year = {2014}, date = {2014-01-01}, booktitle = {2014 IEEE International Conference on Image Processing, ICIP 2014}, pages = {1495--1499}, abstract = {We present a new framework for multimodal gesture recognition that is based on a two-pass fusion scheme. In this, we deal with a demanding Kinect-based multimodal dataset, which was introduced in a recent gesture recognition challenge. We employ multiple modalities, i.e., visual cues, such as colour and depth images, as well as audio, and we specifically extract feature descriptors of the hands' movement, handshape, and audio spectral properties. Based on these features, we statistically train separate unimodal gesture-word models, namely hidden Markov models, explicitly accounting for the dynamics of each modality. Multimodal recognition of unknown gesture sequences is achieved by combining these models in a late, two-pass fusion scheme that exploits a set of unimodally generated n-best recognition hypotheses. The proposed scheme achieves 88.2% gesture recognition accuracy in the Kinect-based multimodal dataset, outperforming all recently published approaches on the same challenging multimodal gesture recognition task.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We present a new framework for multimodal gesture recognition that is based on a two-pass fusion scheme. In this, we deal with a demanding Kinect-based multimodal dataset, which was introduced in a recent gesture recognition challenge. We employ multiple modalities, i.e., visual cues, such as colour and depth images, as well as audio, and we specifically extract feature descriptors of the hands' movement, handshape, and audio spectral properties. Based on these features, we statistically train separate unimodal gesture-word models, namely hidden Markov models, explicitly accounting for the dynamics of each modality. Multimodal recognition of unknown gesture sequences is achieved by combining these models in a late, two-pass fusion scheme that exploits a set of unimodally generated n-best recognition hypotheses. The proposed scheme achieves 88.2% gesture recognition accuracy in the Kinect-based multimodal dataset, outperforming all recently published approaches on the same challenging multimodal gesture recognition task. |
2013 |
Anastasios Roussos, Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Dynamic Affine-Invariant Shape-Appearance Handshape Features and Classification in Sign Language Videos Journal Article Journal of Machine Learning Research, 14 , pp. 1627-1663, 2013. @article{142, title = {Dynamic Affine-Invariant Shape-Appearance Handshape Features and Classification in Sign Language Videos}, author = {Anastasios Roussos and Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://link.springer.com/10.1007/978-3-319-57021-1_8}, doi = {10.1007/978-3-319-57021-1_8}, year = {2013}, date = {2013-06-01}, journal = {Journal of Machine Learning Research}, volume = {14}, pages = {1627-1663}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, P Maragos Multiscale Fractal Analysis of Musical Instrument Signals with Application to Recognition Journal Article 21 (4), pp. 737–748, 2013. Abstract | BibTeX | Links: [PDF] @article{ZlMa13, title = {Multiscale Fractal Analysis of Musical Instrument Signals with Application to Recognition}, author = {A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos_MultiscaleFractalAnalMusicInstrumSignalsApplicRecogn_ieeetASLP2013.pdf}, year = {2013}, date = {2013-04-01}, volume = {21}, number = {4}, pages = {737--748}, abstract = {In this paper, we explore nonlinear methods, inspired by the fractal theory for the analysis of the structure of music signals at multiple time scales, which is of importance both for their modeling and for their automatic computer-based recognition. We propose the multiscale fractal dimension (MFD) prourl as a short-time descriptor, useful to quantify the multiscale complexity and fragmentation of the different states of the music waveform. We have experimentally found that this descriptor can discriminate several aspects among different music instruments, which is verified by further analysis on synthesized sinusoidal signals. We compare the descriptiveness of our features against that of Mel frequency cepstral coefficients (MFCCs), using both static and dynamic classifiers such as Gaussian mixture models (GMMs) and hidden Markov models (HMMs). The method and features proposed in this paper appear to be promising for music signal analysis, due to their capability for multiscale analysis of the signals and their applicability in recognition, as they accomplish an error reduction of up to 32%. These results are quite interesting and render the descriptor of direct applicability in large-scale music classification tasks.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper, we explore nonlinear methods, inspired by the fractal theory for the analysis of the structure of music signals at multiple time scales, which is of importance both for their modeling and for their automatic computer-based recognition. We propose the multiscale fractal dimension (MFD) prourl as a short-time descriptor, useful to quantify the multiscale complexity and fragmentation of the different states of the music waveform. We have experimentally found that this descriptor can discriminate several aspects among different music instruments, which is verified by further analysis on synthesized sinusoidal signals. We compare the descriptiveness of our features against that of Mel frequency cepstral coefficients (MFCCs), using both static and dynamic classifiers such as Gaussian mixture models (GMMs) and hidden Markov models (HMMs). The method and features proposed in this paper appear to be promising for music signal analysis, due to their capability for multiscale analysis of the signals and their applicability in recognition, as they accomplish an error reduction of up to 32%. These results are quite interesting and render the descriptor of direct applicability in large-scale music classification tasks. |
Costas S Tzafestas, Spyros Velanas Telehaptic perception of delayed stiffness using adaptive impedance control: Experimental psychophysical analysis Journal Article Presence: Teleoperators and Virtual Environments, 22 (4), pp. 323–344, 2013, ISSN: 15313263. @article{2_135, title = {Telehaptic perception of delayed stiffness using adaptive impedance control: Experimental psychophysical analysis}, author = {Costas S Tzafestas and Spyros Velanas}, doi = {10.1162/PRES_a_00162}, issn = {15313263}, year = {2013}, date = {2013-01-01}, journal = {Presence: Teleoperators and Virtual Environments}, volume = {22}, number = {4}, pages = {323--344}, abstract = {Abstract Telehaptics is the science of transmitting touch-related sensations over computer networks. With respect to robot teleoperation, telehaptics emphasizes more on reliably reproducing physical properties of a remote environment, as mediated over a network through the use of appropriate haptic interfacing technologies. One of the main factors that can cause degradation of the quality of a telehaptic system is the presence of time delays. Inspired by concepts such as impedance-reflection and model-mediated telemanipulation, an adaptive impedance control scheme has been proposed aiming to mitigate some of the problems caused by network delays in a telehaptic system. This paper presents an experimental analysis, which has been conducted to assess the actual performance of the proposed telehaptic scheme in terms of both control and human perception objectives. Firstly, a set of comparative numerical experiments is presented aiming to analyze stability and characterize transparency of the telehaptic system under large time delays. The results show the superior performance of the proposed adaptive impedance scheme as compared to direct force-reflecting teleoperation. Then, a series of psychophysical experiments is described, to evaluate the performance of the telehaptic system with respect to human perception of remote (delayed) stiffness. An analysis of the obtained results shows that the proposed adaptive scheme significantly improves telehaptic perception of linear stiffness in the presence of network delays, maintaining perceptual thresholds close to the ones obtained in the case of direct, nondelayed stimuli. A comparative experimental evaluation of psychometric transparency confirms the superior robustness with regard to time delay of the adaptive impedance telehaptic scheme as compared to state-of-the-art position/force transparentizing methods.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Telehaptics is the science of transmitting touch-related sensations over computer networks. With respect to robot teleoperation, telehaptics emphasizes more on reliably reproducing physical properties of a remote environment, as mediated over a network through the use of appropriate haptic interfacing technologies. One of the main factors that can cause degradation of the quality of a telehaptic system is the presence of time delays. Inspired by concepts such as impedance-reflection and model-mediated telemanipulation, an adaptive impedance control scheme has been proposed aiming to mitigate some of the problems caused by network delays in a telehaptic system. This paper presents an experimental analysis, which has been conducted to assess the actual performance of the proposed telehaptic scheme in terms of both control and human perception objectives. Firstly, a set of comparative numerical experiments is presented aiming to analyze stability and characterize transparency of the telehaptic system under large time delays. The results show the superior performance of the proposed adaptive impedance scheme as compared to direct force-reflecting teleoperation. Then, a series of psychophysical experiments is described, to evaluate the performance of the telehaptic system with respect to human perception of remote (delayed) stiffness. An analysis of the obtained results shows that the proposed adaptive scheme significantly improves telehaptic perception of linear stiffness in the presence of network delays, maintaining perceptual thresholds close to the ones obtained in the case of direct, nondelayed stimuli. A comparative experimental evaluation of psychometric transparency confirms the superior robustness with regard to time delay of the adaptive impedance telehaptic scheme as compared to state-of-the-art position/force transparentizing methods. |
Georgios Evangelopoulos, Athanasia Zlatintsi, Alexandros Potamianos, Petros Maragos, Konstantinos Rapantzikos, Georgios Skoumas, Yannis Avrithis Multimodal saliency and fusion for movie summarization based on aural, visual, and textual attention Journal Article IEEE Transactions on Multimedia, 15 (7), pp. 1553–1568, 2013, ISSN: 15209210. Abstract | BibTeX | Links: [PDF] @article{141, title = {Multimodal saliency and fusion for movie summarization based on aural, visual, and textual attention}, author = {Georgios Evangelopoulos and Athanasia Zlatintsi and Alexandros Potamianos and Petros Maragos and Konstantinos Rapantzikos and Georgios Skoumas and Yannis Avrithis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EZPMRSA_MultimodalSaliencyFusionMovieSumAVTattention_ieeetMM13.pdf}, doi = {10.1109/TMM.2013.2267205}, issn = {15209210}, year = {2013}, date = {2013-01-01}, journal = {IEEE Transactions on Multimedia}, volume = {15}, number = {7}, pages = {1553--1568}, abstract = {Multimodal streams of sensory information are naturally parsed and integrated by humans using signal-level feature extraction and higher level cognitive processes. Detection of attention-invoking audiovisual segments is formulated in this work on the basis of saliency models for the audio, visual, and textual information conveyed in a video stream. Aural or auditory saliency is assessed by cues that quantify multifrequency waveform modulations, extracted through nonlinear operators and energy tracking. Visual saliency is measured through a spatiotemporal attention model driven by intensity, color, and orientation. Textual or linguistic saliency is extracted from part-of-speech tagging on the subtitles information available with most movie distributions. The individual saliency streams, obtained from modality-depended cues, are integrated in a multimodal saliency curve, modeling the time-varying perceptual importance of the composite video stream and signifying prevailing sensory events. The multimodal saliency representation forms the basis of a generic, bottom-up video summarization algorithm. Different fusion schemes are evaluated on a movie database of multimodal saliency annotations with comparative results provided across modalities. The produced summaries, based on low-level features and content-independent fusion and selection, are of subjectively high aesthetic and informative quality.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Multimodal streams of sensory information are naturally parsed and integrated by humans using signal-level feature extraction and higher level cognitive processes. Detection of attention-invoking audiovisual segments is formulated in this work on the basis of saliency models for the audio, visual, and textual information conveyed in a video stream. Aural or auditory saliency is assessed by cues that quantify multifrequency waveform modulations, extracted through nonlinear operators and energy tracking. Visual saliency is measured through a spatiotemporal attention model driven by intensity, color, and orientation. Textual or linguistic saliency is extracted from part-of-speech tagging on the subtitles information available with most movie distributions. The individual saliency streams, obtained from modality-depended cues, are integrated in a multimodal saliency curve, modeling the time-varying perceptual importance of the composite video stream and signifying prevailing sensory events. The multimodal saliency representation forms the basis of a generic, bottom-up video summarization algorithm. Different fusion schemes are evaluated on a movie database of multimodal saliency annotations with comparative results provided across modalities. The produced summaries, based on low-level features and content-independent fusion and selection, are of subjectively high aesthetic and informative quality. |
Nikolaos Karianakis, Petros Maragos An integrated system for digital restoration of prehistoric theran wall paintings Conference 2013 18th International Conference on Digital Signal Processing, DSP 2013, 2013, ISBN: 9781467358057. @conference{174, title = {An integrated system for digital restoration of prehistoric theran wall paintings}, author = { Nikolaos Karianakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KarianakisMaragos_IntegratedSystemDigitalRestorationPrehistoricTheranWallpaintings_DSP2013.pdf}, doi = {10.1109/ICDSP.2013.6622838}, isbn = {9781467358057}, year = {2013}, date = {2013-01-01}, booktitle = {2013 18th International Conference on Digital Signal Processing, DSP 2013}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stamatios Lefkimmiatis, Anastasios Roussos, Michael Unser, Petros Maragos Supplementary Material for the SSVM-2013 paper entitled “ Convex Generalizations of Total Variation based on the Structure Tensor with Applications to Inverse Problems ” Conference Scale Space and Variational Methods in Computer Vision (SSVM-2013), pp.48-60, 2013, 2013. @conference{172, title = {Supplementary Material for the SSVM-2013 paper entitled “ Convex Generalizations of Total Variation based on the Structure Tensor with Applications to Inverse Problems ”}, author = { Stamatios Lefkimmiatis and Anastasios Roussos and Michael Unser and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LRUM_ConvexGeneralizationsTotalVariationStructureTensorInverseProblems_SSVM2013.pdf}, year = {2013}, date = {2013-01-01}, booktitle = {Scale Space and Variational Methods in Computer Vision (SSVM-2013), pp.48-60, 2013}, pages = {1--4}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G. P. Moustris, A. I. Mantelos, C. S. Tzafestas Enhancing surgical accuracy using virtual fixtures and motion compensation in robotic beating heart surgery Conference 2013 21st Mediterranean Conference on Control and Automation, MED 2013 - Conference Proceedings, 2013, ISSN: 1050-4729. @conference{28l, title = {Enhancing surgical accuracy using virtual fixtures and motion compensation in robotic beating heart surgery}, author = { G. P. Moustris and A. I. Mantelos and C. S. Tzafestas}, doi = {10.1109/MED.2013.6608880}, issn = {1050-4729}, year = {2013}, date = {2013-01-01}, booktitle = {2013 21st Mediterranean Conference on Control and Automation, MED 2013 - Conference Proceedings}, pages = {1254--1260}, abstract = {This paper proposes a novel technique for applying virtual fixtures in a changing environment. The main targeted application is robotic beating heart surgery, which enables the surgeon to operate directly on a beating heart. Using a motion compensation framework, the motion of the heart surface is stabilized in a virtual space, which is presented to the surgeon to operate in. Consequently, the fixture is implemented in this static space, bypassing problems of dynamic fixtures such as position update, placement and force transients. Randomized experiments were performed using a trained surgeon comparing our approach to simple motion compensation and no compensation at all. The positive effect of the fixture in surgical accuracy for a tracking task is also discussed. textcopyright 2013 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper proposes a novel technique for applying virtual fixtures in a changing environment. The main targeted application is robotic beating heart surgery, which enables the surgeon to operate directly on a beating heart. Using a motion compensation framework, the motion of the heart surface is stabilized in a virtual space, which is presented to the surgeon to operate in. Consequently, the fixture is implemented in this static space, bypassing problems of dynamic fixtures such as position update, placement and force transients. Randomized experiments were performed using a trained surgeon comparing our approach to simple motion compensation and no compensation at all. The positive effect of the fixture in surgical accuracy for a tracking task is also discussed. textcopyright 2013 IEEE. |
G. P. Moustris, A. I. Mantelos, C. S. Tzafestas Enhancing surgical accuracy using virtual fixtures and motion compensation in robotic beating heart surgery Conference 2013 21st Mediterranean Conference on Control and Automation, MED 2013 - Conference Proceedings, 2013, ISSN: 10504729. @conference{26l, title = {Enhancing surgical accuracy using virtual fixtures and motion compensation in robotic beating heart surgery}, author = { G. P. Moustris and A. I. Mantelos and C. S. Tzafestas}, doi = {10.1109/MED.2013.6608880}, issn = {10504729}, year = {2013}, date = {2013-01-01}, booktitle = {2013 21st Mediterranean Conference on Control and Automation, MED 2013 - Conference Proceedings}, pages = {1254--1260}, abstract = {This paper proposes a novel technique for applying virtual fixtures in a changing environment. The main targeted application is robotic beating heart surgery, which enables the surgeon to operate directly on a beating heart. Using a motion compensation framework, the motion of the heart surface is stabilized in a virtual space, which is presented to the surgeon to operate in. Consequently, the fixture is implemented in this static space, bypassing problems of dynamic fixtures such as position update, placement and force transients. Randomized experiments were performed using a trained surgeon comparing our approach to simple motion compensation and no compensation at all. The positive effect of the fixture in surgical accuracy for a tracking task is also discussed. textcopyright 2013 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper proposes a novel technique for applying virtual fixtures in a changing environment. The main targeted application is robotic beating heart surgery, which enables the surgeon to operate directly on a beating heart. Using a motion compensation framework, the motion of the heart surface is stabilized in a virtual space, which is presented to the surgeon to operate in. Consequently, the fixture is implemented in this static space, bypassing problems of dynamic fixtures such as position update, placement and force transients. Randomized experiments were performed using a trained surgeon comparing our approach to simple motion compensation and no compensation at all. The positive effect of the fixture in surgical accuracy for a tracking task is also discussed. textcopyright 2013 IEEE. |
G.P. Moustris, A.I. Mantelos, C.S. Tzafestas Active motion compensation in robotic cardiac surgery Conference 2013 European Control Conference, ECC 2013, 2013, ISBN: 9783033039629. @conference{27l, title = {Active motion compensation in robotic cardiac surgery}, author = { G.P. Moustris and A.I. Mantelos and C.S. Tzafestas}, isbn = {9783033039629}, year = {2013}, date = {2013-01-01}, booktitle = {2013 European Control Conference, ECC 2013}, abstract = {Motion compensation is a prominent application in robotic beating heart surgery, with significant potential benefits for both surgeons and patients. In this paper we investigate an activate assistance control scheme on a simple tracking task, which helps the surgeon guide the robot on a predefined reference. The control is implemented on top of a shared control system, which serves as a basis for implementing higher level controllers. Experiments with a trained surgeon are also presented, which show the positive effect of the approach. textcopyright 2013 EUCA.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Motion compensation is a prominent application in robotic beating heart surgery, with significant potential benefits for both surgeons and patients. In this paper we investigate an activate assistance control scheme on a simple tracking task, which helps the surgeon guide the robot on a predefined reference. The control is implemented on top of a shared control system, which serves as a basis for implementing higher level controllers. Experiments with a trained surgeon are also presented, which show the positive effect of the approach. textcopyright 2013 EUCA. |
I. Rodomagoulakis, P. Giannoulis, Z. I. Skordilis, P. Maragos, G. Potamianos Experiments on far-field multichannel speech processing in smart homes Conference 2013 18th International Conference on Digital Signal Processing, DSP 2013, 2013, ISBN: 9781467358057. @conference{175, title = {Experiments on far-field multichannel speech processing in smart homes}, author = { I. Rodomagoulakis and P. Giannoulis and Z. I. Skordilis and P. Maragos and G. Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RGSMP_ExperimsFarfieldMultichannelSpeechProcessSmartHomes_DSP2013.pdf}, doi = {10.1109/ICDSP.2013.6622707}, isbn = {9781467358057}, year = {2013}, date = {2013-01-01}, booktitle = {2013 18th International Conference on Digital Signal Processing, DSP 2013}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos, Kimon Drakopoulos Segmentation and Skeletonization on Arbitrary Graphs Using Multiscale Morphology and Active Contours Book Chapter Breuß, Michael; Bruckstein, Alfred; Maragos, Petros (Ed.): Innovations for Shape Analysis: Models and Algorithms, pp. 53–75, Springer Berlin Heidelberg, Berlin, Heidelberg, 2013, ISBN: 978-3-642-34141-0. Abstract | BibTeX | Links: [Webpage] @inbook{Maragos2013, title = {Segmentation and Skeletonization on Arbitrary Graphs Using Multiscale Morphology and Active Contours}, author = {Petros Maragos and Kimon Drakopoulos}, editor = {Michael Breuß and Alfred Bruckstein and Petros Maragos}, url = {https://doi.org/10.1007/978-3-642-34141-0_3}, doi = {10.1007/978-3-642-34141-0_3}, isbn = {978-3-642-34141-0}, year = {2013}, date = {2013-01-01}, booktitle = {Innovations for Shape Analysis: Models and Algorithms}, pages = {53--75}, publisher = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = {In this chapter we focus on formulating and implementing on abstract domains such as arbitrary graphs popular methods and techniques developed for image analysis, in particular multiscale morphology and active contours. To this goal we extend existing work on graph morphology to multiscale dilation and erosion and implement them recursively using level sets of functions defined on the graph's nodes. We propose approximations to the calculation of the gradient and the divergence of vector functions defined on graphs and use these approximations to apply the technique of geodesic active contours for object detection on graphs via segmentation. Finally, using these novel ideas, we propose a method for multiscale shape skeletonization on arbitrary graphs.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } In this chapter we focus on formulating and implementing on abstract domains such as arbitrary graphs popular methods and techniques developed for image analysis, in particular multiscale morphology and active contours. To this goal we extend existing work on graph morphology to multiscale dilation and erosion and implement them recursively using level sets of functions defined on the graph's nodes. We propose approximations to the calculation of the gradient and the divergence of vector functions defined on graphs and use these approximations to apply the technique of geodesic active contours for object detection on graphs via segmentation. Finally, using these novel ideas, we propose a method for multiscale shape skeletonization on arbitrary graphs. |
Petros Maragos Chapter Two - Representations for Morphological Image Operators and Analogies with Linear Operators Book Chapter Hawkes, Peter W (Ed.): Advances in Imaging and Electron Physics, 177 , pp. 45 - 187, Elsevier, 2013, ISSN: 1076-5670. Abstract | BibTeX | Links: [Webpage] @inbook{MARAGOS201345, title = {Chapter Two - Representations for Morphological Image Operators and Analogies with Linear Operators}, author = {Petros Maragos}, editor = {Peter W Hawkes}, url = {http://www.sciencedirect.com/science/article/pii/B9780124077027000024}, doi = {https://doi.org/10.1016/B978-0-12-407702-7.00002-4}, issn = {1076-5670}, year = {2013}, date = {2013-01-01}, booktitle = {Advances in Imaging and Electron Physics}, volume = {177}, pages = {45 - 187}, publisher = {Elsevier}, abstract = {This chapter deals with representation theoretical issues of nonlinear image operators, mainly based on the methodology of mathematical morphology, and more generally operators on lattices. After a brief overview of developments in morphological image operators both chronologically and thematically, the chapter provides a survey of some main concepts and results in the theory of lattices and morphological operators, especially of the monotone type. It also provides comparisons with linear operator theory. Then, it introduces a nonlinear signal space called complete weighted lattice, which generalizes both mathematical morphology and minimax algebra. Afterwards, it focuses on the representation of translation-invariant and/or increasing operators either on Euclidean spaces (or their discretized versions) or on complete weighted lattices by using a nonlinear basis. The results are operator representations as a supremum or infimum of nonlinear convolutions that are either of the max-plus type or their generalizations in weighted lattices. These representations have several potential applications in computation, imaging and vision, and nonlinear functional analysis.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } This chapter deals with representation theoretical issues of nonlinear image operators, mainly based on the methodology of mathematical morphology, and more generally operators on lattices. After a brief overview of developments in morphological image operators both chronologically and thematically, the chapter provides a survey of some main concepts and results in the theory of lattices and morphological operators, especially of the monotone type. It also provides comparisons with linear operator theory. Then, it introduces a nonlinear signal space called complete weighted lattice, which generalizes both mathematical morphology and minimax algebra. Afterwards, it focuses on the representation of translation-invariant and/or increasing operators either on Euclidean spaces (or their discretized versions) or on complete weighted lattices by using a nonlinear basis. The results are operator representations as a supremum or infimum of nonlinear convolutions that are either of the max-plus type or their generalizations in weighted lattices. These representations have several potential applications in computation, imaging and vision, and nonlinear functional analysis. |
A Zlatintsi Music Signal Processing and Applications in Recognition PhD Thesis School of ECE, NTUA, 2013. Abstract | BibTeX | Links: [PDF] @phdthesis{Zlatintsi13, title = {Music Signal Processing and Applications in Recognition}, author = {A Zlatintsi}, url = {http://robotics.ntua.gr/wp-content/publications/Zlatintsi_PhDThesis_Dec2013_EMP.pdf}, year = {2013}, date = {2013-12-01}, school = {School of ECE, NTUA}, abstract = {This thesis lays in the area of signal processing and analysis of music signalsusing computational methods for the extraction of effective representations for automatic recognition. We explore and develop efficient algorithms using nonlinear methods for the analysis of the structure of music signals, which is of importance for their modeling. Our main research directions deals with the analysis of the structure and the characteristics of musical instruments in order to gain insight about their function and properties. We study the characteristics of the different genres of music.Finally, we evaluate the effectiveness of the proposed nonlinear models for the detection of perceptually important music and audio events. The approach we follow contributes to state-of-the-art technologies related to automatic computer-based recognition of musical signals and audio summarization, which nowadays are essential in everyday life. Because of the vast amount of music, audio and multimedia data in the web and our personal computers, the use of this study could be shown in applications such as automatic genre classification, automatic recognition of music’s basic structures, such as musical instruments, and audio content analysis for music and audio summarization. The above mentioned applications require robust solutions to information processing problems. Toward this goal, the development of efficient digital signal processing methods and the extraction of relevant features is of importance. In this thesis we propose such methods and algorithms for feature extraction with interesting results that render the descriptors of direct applicability. The proposed methods are applied on classification experiments illustrating that they can capture important aspects of music, such as the micro-variations of their structure. Descriptors based on macro-structures may reduce the complexity of the classification system, since satisfactory results can be achieved using simpler statistical models. Finally, the introduction of a ‘‘music’’ filterbank appears to be promising for automatic genre classification.}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } This thesis lays in the area of signal processing and analysis of music signalsusing computational methods for the extraction of effective representations for automatic recognition. We explore and develop efficient algorithms using nonlinear methods for the analysis of the structure of music signals, which is of importance for their modeling. Our main research directions deals with the analysis of the structure and the characteristics of musical instruments in order to gain insight about their function and properties. We study the characteristics of the different genres of music.Finally, we evaluate the effectiveness of the proposed nonlinear models for the detection of perceptually important music and audio events. The approach we follow contributes to state-of-the-art technologies related to automatic computer-based recognition of musical signals and audio summarization, which nowadays are essential in everyday life. Because of the vast amount of music, audio and multimedia data in the web and our personal computers, the use of this study could be shown in applications such as automatic genre classification, automatic recognition of music’s basic structures, such as musical instruments, and audio content analysis for music and audio summarization. The above mentioned applications require robust solutions to information processing problems. Toward this goal, the development of efficient digital signal processing methods and the extraction of relevant features is of importance. In this thesis we propose such methods and algorithms for feature extraction with interesting results that render the descriptors of direct applicability. The proposed methods are applied on classification experiments illustrating that they can capture important aspects of music, such as the micro-variations of their structure. Descriptors based on macro-structures may reduce the complexity of the classification system, since satisfactory results can be achieved using simpler statistical models. Finally, the introduction of a ‘‘music’’ filterbank appears to be promising for automatic genre classification. |
2012 |
Argiro Vatakis, Petros Maragos, Isidoros Rodomagoulakis, Charles Spence Assessing the effect of physical differences in the articulation of consonants and vowels on audiovisual temporal perception Journal Article Frontiers in Integrative Neuroscience, 6 , 2012, ISSN: 1662-5145. Abstract | BibTeX | Links: [PDF] [Webpage] @article{138, title = {Assessing the effect of physical differences in the articulation of consonants and vowels on audiovisual temporal perception}, author = {Argiro Vatakis and Petros Maragos and Isidoros Rodomagoulakis and Charles Spence}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/VMRS_ArticulConsonVowelAudioVisualTimePerception_fiin2012.pdf http://journal.frontiersin.org/article/10.3389/fnint.2012.00071/abstract}, doi = {10.3389/fnint.2012.00071}, issn = {1662-5145}, year = {2012}, date = {2012-01-01}, journal = {Frontiers in Integrative Neuroscience}, volume = {6}, abstract = {We investigated how the physical differences associated with the articulation of speech affect the temporal aspects of audiovisual speech perception. Video clips of consonants and vowels uttered by three different speakers were presented. The video clips were analyzed using an auditory-visual signal saliency model in order to compare signal saliency and behavioral data. Participants made temporal order judgments (TOJs) regarding which speech-stream (auditory or visual) had been presented first. The sensitivity of participants' TOJs and the point of subjective simultaneity (PSS) were analyzed as a function of the place, manner of articulation, and voicing for consonants, and the height/backness of the tongue and lip-roundedness for vowels. We expected that in the case of the place of articulation and roundedness, where the visual-speech signal is more salient, temporal perception of speech would be modulated by the visual-speech signal. No such effect was expected for the manner of articulation or height. The results demonstrate that for place and manner of articulation, participants' temporal percept was affected (although not always significantly) by highly-salient speech-signals with the visual-signals requiring smaller visual-leads at the PSS. This was not the case when height was evaluated. These findings suggest that in the case of audiovisual speech perception, a highly salient visual-speech signal may lead to higher probabilities regarding the identity of the auditory-signal that modulate the temporal window of multisensory integration of the speech-stimulus.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We investigated how the physical differences associated with the articulation of speech affect the temporal aspects of audiovisual speech perception. Video clips of consonants and vowels uttered by three different speakers were presented. The video clips were analyzed using an auditory-visual signal saliency model in order to compare signal saliency and behavioral data. Participants made temporal order judgments (TOJs) regarding which speech-stream (auditory or visual) had been presented first. The sensitivity of participants' TOJs and the point of subjective simultaneity (PSS) were analyzed as a function of the place, manner of articulation, and voicing for consonants, and the height/backness of the tongue and lip-roundedness for vowels. We expected that in the case of the place of articulation and roundedness, where the visual-speech signal is more salient, temporal perception of speech would be modulated by the visual-speech signal. No such effect was expected for the manner of articulation or height. The results demonstrate that for place and manner of articulation, participants' temporal percept was affected (although not always significantly) by highly-salient speech-signals with the visual-signals requiring smaller visual-leads at the PSS. This was not the case when height was evaluated. These findings suggest that in the case of audiovisual speech perception, a highly salient visual-speech signal may lead to higher probabilities regarding the identity of the auditory-signal that modulate the temporal window of multisensory integration of the speech-stimulus. |
Kimon Drakopoulos, Petros Maragos Active contours on graphs: Multiscale morphology and graphcuts Journal Article IEEE Journal on Selected Topics in Signal Processing, 6 (7), pp. 780–794, 2012, ISSN: 19324553. @article{139, title = {Active contours on graphs: Multiscale morphology and graphcuts}, author = {Kimon Drakopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DrakopoulosMaragos_ACs-on-Graphs-MultiscaleMorf-Graphcuts_ieeejSTSP2012.pdf}, doi = {10.1109/JSTSP.2012.2213675}, issn = {19324553}, year = {2012}, date = {2012-01-01}, journal = {IEEE Journal on Selected Topics in Signal Processing}, volume = {6}, number = {7}, pages = {780--794}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, P Maragos AM-FM Modulation Features for Music Instrument Signal Analysis and Recognition Conference Proc. European Signal Processing Conference, Bucharest, Romania, 2012. Abstract | BibTeX | Links: [PDF] @conference{ZlMa12, title = {AM-FM Modulation Features for Music Instrument Signal Analysis and Recognition}, author = {A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos_MusicalInstrumentsAMFM_EUSIPCO2012.pdf}, year = {2012}, date = {2012-08-01}, booktitle = {Proc. European Signal Processing Conference}, address = {Bucharest, Romania}, abstract = {In this paper, we explore a nonlinear AM-FM model to extract alternative features for music instrument recognition tasks. Amplitude and frequency micro-modulations are measured in musical signals and are employed to model the existing information. The features used are the multiband mean instantaneous amplitude (mean-IAM) and mean instantaneous frequency (mean-IFM) modulation. The instantaneous features are estimated using the multiband Gabor Energy Separation Algorithm (Gabor-ESA). An alternative method, the iterative-ESA is also explored; and initial experimentation shows that it could be used to estimate the harmonic content of a tone. The Gabor-ESA is evaluated against and in combination with Mel frequency cepstrum coefficients (MFCCs) using both static and dynamic classifiers. The method used in this paper has proven to be able to extract the fine-structured modulations of music signals; further, it has shown to be promising for recognition tasks accomplishing an error rate reduction up to 60% for the best recognition case combined with MFCCs.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we explore a nonlinear AM-FM model to extract alternative features for music instrument recognition tasks. Amplitude and frequency micro-modulations are measured in musical signals and are employed to model the existing information. The features used are the multiband mean instantaneous amplitude (mean-IAM) and mean instantaneous frequency (mean-IFM) modulation. The instantaneous features are estimated using the multiband Gabor Energy Separation Algorithm (Gabor-ESA). An alternative method, the iterative-ESA is also explored; and initial experimentation shows that it could be used to estimate the harmonic content of a tone. The Gabor-ESA is evaluated against and in combination with Mel frequency cepstrum coefficients (MFCCs) using both static and dynamic classifiers. The method used in this paper has proven to be able to extract the fine-structured modulations of music signals; further, it has shown to be promising for recognition tasks accomplishing an error rate reduction up to 60% for the best recognition case combined with MFCCs. |
A Zlatintsi, P Maragos, A Potamianos, G Evangelopoulos A Saliency-Based Approach to Audio Event Detection and Summarization Conference Proc. European Signal Processing Conference, Bucharest, Romania, 2012. Abstract | BibTeX | Links: [PDF] @conference{ZMP+12, title = {A Saliency-Based Approach to Audio Event Detection and Summarization}, author = {A Zlatintsi and P Maragos and A Potamianos and G Evangelopoulos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos+_SaliencyBasedAudioSummarization_EUSIPCO2012.pdf}, year = {2012}, date = {2012-08-01}, booktitle = {Proc. European Signal Processing Conference}, address = {Bucharest, Romania}, abstract = {In this paper, we approach the problem of audio summarization by saliency computation of audio streams, exploring the potential of a modulation model for the detection of perceptually important audio events based on saliency models, along with various fusion schemes for their combination. The fusion schemes include linear, adaptive and nonlinear methods. A machine learning approach, where training of the features is performed, was also applied for the purpose of comparison with the proposed technique. For the evaluation of the algorithm we use audio data taken from movies and we show that nonlinear fusion schemes perform best. The results are reported on the MovSum database, using objective evaluations (against ground-truth denoting the perceptually important audio events). Analysis of the selected audio segments is also performed against a labeled database in respect to audio categories, while a method for fine-tuning of the selected audio events is proposed.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we approach the problem of audio summarization by saliency computation of audio streams, exploring the potential of a modulation model for the detection of perceptually important audio events based on saliency models, along with various fusion schemes for their combination. The fusion schemes include linear, adaptive and nonlinear methods. A machine learning approach, where training of the features is performed, was also applied for the purpose of comparison with the proposed technique. For the evaluation of the algorithm we use audio data taken from movies and we show that nonlinear fusion schemes perform best. The results are reported on the MovSum database, using objective evaluations (against ground-truth denoting the perceptually important audio events). Analysis of the selected audio segments is also performed against a labeled database in respect to audio categories, while a method for fine-tuning of the selected audio events is proposed. |
Epameinondas Antonakos, Vassilis Pitsikalis, Isidoros Rodomagoulakis, Petros Maragos Unsupervised classification of extreme facial events using active appearance models tracking for sign language videos Conference Proceedings - International Conference on Image Processing, ICIP, 2012, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{178, title = {Unsupervised classification of extreme facial events using active appearance models tracking for sign language videos}, author = { Epameinondas Antonakos and Vassilis Pitsikalis and Isidoros Rodomagoulakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/APRM_UnsupervisClassifExtremeFacialEventsAAM-SignLangVideos_ICIP2012.pdf}, doi = {10.1109/ICIP.2012.6467133}, issn = {15224880}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {1409--1412}, abstract = {We propose an Unsupervised method for Extreme States Classification (UnESC) on feature spaces of facial cues of interest. The method is built upon Active Appearance Models (AAM) face tracking and on feature extraction of Global and Local AAMs. UnESC is applied primarily on facial pose, but is shown to be extendable for the case of local models on the eyes and mouth. Given the importance of facial events in Sign Languages we apply the UnESC on videos from two sign language corpora, both American (ASL) and Greek (GSL) yielding promising qualitative and quantitative results. Apart from the detection of extreme facial states, the proposed Un-ESC also has impact for SL corpora lacking any facial annotations.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose an Unsupervised method for Extreme States Classification (UnESC) on feature spaces of facial cues of interest. The method is built upon Active Appearance Models (AAM) face tracking and on feature extraction of Global and Local AAMs. UnESC is applied primarily on facial pose, but is shown to be extendable for the case of local models on the eyes and mouth. Given the importance of facial events in Sign Languages we apply the UnESC on videos from two sign language corpora, both American (ASL) and Greek (GSL) yielding promising qualitative and quantitative results. Apart from the detection of extreme facial states, the proposed Un-ESC also has impact for SL corpora lacking any facial annotations. |
Christos Georgakis, Petros Maragos, Georgios Evangelopoulos, Dimitrios Dimitriadis Dominant spatio-temporal modulations and energy tracking in videos: Application to interest point detection for action recognition Conference Proceedings - International Conference on Image Processing, ICIP, 2012, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{176, title = {Dominant spatio-temporal modulations and energy tracking in videos: Application to interest point detection for action recognition}, author = { Christos Georgakis and Petros Maragos and Georgios Evangelopoulos and Dimitrios Dimitriadis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/GMED_SpatioTemporModulationsEnergyTrackVideos-InterestPointDetectActionRecogn_ICIP2012.pdf}, doi = {10.1109/ICIP.2012.6466966}, issn = {15224880}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {741--744}, abstract = {The presence of multiband amplitude and frequency modulations (AM-FM) in wideband signals, such as textured images or speech, has led to the development of efficient multicomponent modulation models for low-level image and sound analysis. Moreover, compact yet descriptive representations have emerged by tracking, through non-linear energy operators, the dominant model components across time, space or frequency.In this paper, we propose a generalization of such approaches in the 3D spatio-temporal domain and explore the benefits of incorporating the Dominant Component Analysis scheme for interest point detection in videos for action recognition. Within this framework, actions are implicitly considered as manifestations of spatio-temporal oscillations in the dynamic visual stream. Multiband filtering and energy operators are applied to track the source energy in both spatial and temporal frequency bands. A new measure for extracting keypoint locations is formulated as the temporal dominant energy computed over the locally dominant modulation components, in terms of spatial modulation energy, of the input video frames. Theoretical formulation is supported by evaluation and comparisons in human action classification, which demonstrate the potential of the proposed detector.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The presence of multiband amplitude and frequency modulations (AM-FM) in wideband signals, such as textured images or speech, has led to the development of efficient multicomponent modulation models for low-level image and sound analysis. Moreover, compact yet descriptive representations have emerged by tracking, through non-linear energy operators, the dominant model components across time, space or frequency.In this paper, we propose a generalization of such approaches in the 3D spatio-temporal domain and explore the benefits of incorporating the Dominant Component Analysis scheme for interest point detection in videos for action recognition. Within this framework, actions are implicitly considered as manifestations of spatio-temporal oscillations in the dynamic visual stream. Multiband filtering and energy operators are applied to track the source energy in both spatial and temporal frequency bands. A new measure for extracting keypoint locations is formulated as the temporal dominant energy computed over the locally dominant modulation components, in terms of spatial modulation energy, of the input video frames. Theoretical formulation is supported by evaluation and comparisons in human action classification, which demonstrate the potential of the proposed detector. |
John N. Karigiannis, Costas S. Tzafestas Robustness and generalization of model-free learning for robot kinematic control using a nested-hierarchical multi-agent topology Conference Proceedings of the IEEE RAS and EMBS International Conference on Biomedical Robotics and Biomechatronics, 2012, ISSN: 21551774. @conference{32c, title = {Robustness and generalization of model-free learning for robot kinematic control using a nested-hierarchical multi-agent topology}, author = { John N. Karigiannis and Costas S. Tzafestas}, doi = {10.1109/BioRob.2012.6290276}, issn = {21551774}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings of the IEEE RAS and EMBS International Conference on Biomedical Robotics and Biomechatronics}, pages = {1140--1147}, abstract = {This paper focuses on evaluating the robustness and knowledge generalization properties of a model-free learning mechanism, applied for the kinematic control of robot manipulation chains based on a nested-hierarchical multi-agent architecture. In the proposed topology, the agents correspond to independent degrees-of-freedom (DOF) of the system, managing to gain experience over the task that they collaboratively perform by continuously exploring and exploiting their state-to-action mapping space. Each agent forms a local (partial) view of the global system state and task progress, through a recursive learning process. By organizing the agents in a nested topology, the goal is to facilitate modular scaling to more complex kinematic topologies, with loose control coupling among the agents. Reinforcement learning is applied within each agent, to evolve a local state-to-action mapping in a continuous domain, thus leading to a system that exhibits developmental properties. This work addresses problem settings in the domain of kinematic control of dexterous-redundant robot manipulation systems. The numerical experiments performed consider the case of a single-linkage open kinematic chain, presenting kinematic redundancies given the desired task-goal. The focal issue in these experiments is to assess the capacity of the proposed multi-agent system to progressively and autonomously acquire cooperative sensorimotor skills through a self-learning process, that is, without the use of any explicit model-based planning strategy. In this paper, generalization and robustness properties of the overall multi-agent system are explored. Furthermore, the proposed framework is evaluated in constrained motion tasks, both in static and non-static environments. The computational cost of the proposed multi-agent architecture is also assessed.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper focuses on evaluating the robustness and knowledge generalization properties of a model-free learning mechanism, applied for the kinematic control of robot manipulation chains based on a nested-hierarchical multi-agent architecture. In the proposed topology, the agents correspond to independent degrees-of-freedom (DOF) of the system, managing to gain experience over the task that they collaboratively perform by continuously exploring and exploiting their state-to-action mapping space. Each agent forms a local (partial) view of the global system state and task progress, through a recursive learning process. By organizing the agents in a nested topology, the goal is to facilitate modular scaling to more complex kinematic topologies, with loose control coupling among the agents. Reinforcement learning is applied within each agent, to evolve a local state-to-action mapping in a continuous domain, thus leading to a system that exhibits developmental properties. This work addresses problem settings in the domain of kinematic control of dexterous-redundant robot manipulation systems. The numerical experiments performed consider the case of a single-linkage open kinematic chain, presenting kinematic redundancies given the desired task-goal. The focal issue in these experiments is to assess the capacity of the proposed multi-agent system to progressively and autonomously acquire cooperative sensorimotor skills through a self-learning process, that is, without the use of any explicit model-based planning strategy. In this paper, generalization and robustness properties of the overall multi-agent system are explored. Furthermore, the proposed framework is evaluated in constrained motion tasks, both in static and non-static environments. The computational cost of the proposed multi-agent architecture is also assessed. |
Sheraz Khan, Nikos Mitsou, Dirk Wollherr, Costas Tzafestas An optimization approach for 3D environment mapping using normal vector uncertainty Conference 2012 12th International Conference on Control Automation Robotics & Vision (ICARCV), 2012 (December), 2012, ISBN: 978-1-4673-1872-3. Abstract | BibTeX | Links: [Webpage] @conference{29l, title = {An optimization approach for 3D environment mapping using normal vector uncertainty}, author = { Sheraz Khan and Nikos Mitsou and Dirk Wollherr and Costas Tzafestas}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6485267}, doi = {10.1109/ICARCV.2012.6485267}, isbn = {978-1-4673-1872-3}, year = {2012}, date = {2012-01-01}, booktitle = {2012 12th International Conference on Control Automation Robotics & Vision (ICARCV)}, volume = {2012}, number = {December}, pages = {841--846}, abstract = {In this paper a novel approach for 3D environment mapping using registered robot poses is presented. The proposed algorithm focuses on improving the quality of robot generated 3D maps by incorporating the uncertainty of 3D points and propagating it into the normal vectors of surfaces. The uncertainty of normal vectors is an indicator of the quality of the detected surface. A controlled random search algorithm is applied to optimize a non-convex function of uncertain normal vectors and number of clusters in order to find the optimal threshold parameter for the segmentation process. This approach leads to an improved cluster coherence and thus better maps.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper a novel approach for 3D environment mapping using registered robot poses is presented. The proposed algorithm focuses on improving the quality of robot generated 3D maps by incorporating the uncertainty of 3D points and propagating it into the normal vectors of surfaces. The uncertainty of normal vectors is an indicator of the quality of the detected surface. A controlled random search algorithm is applied to optimize a non-convex function of uncertain normal vectors and number of clusters in order to find the optimal threshold parameter for the segmentation process. This approach leads to an improved cluster coherence and thus better maps. |
Nikos Mitsou, Roderick de Nijs, David Lenz, Johannes Frimberger, Dirk Wollherr, Kolja Kühnlenz, Costas Tzafestas Gesture and Sign Language in Human-Computer Interaction and Embodied Communication Conference Spatial Cognition VIII, 7206 , 2012, ISSN: 03029743. Abstract | BibTeX | Links: [Webpage] @conference{31b, title = {Gesture and Sign Language in Human-Computer Interaction and Embodied Communication}, author = { Nikos Mitsou and Roderick de Nijs and David Lenz and Johannes Frimberger and Dirk Wollherr and Kolja Kühnlenz and Costas Tzafestas}, url = {http://link.springer.com/10.1007/978-3-642-34182-3}, doi = {10.1007/978-3-642-34182-3}, issn = {03029743}, year = {2012}, date = {2012-01-01}, booktitle = {Spatial Cognition VIII}, volume = {7206}, pages = {54--73}, abstract = {We extend and develop an existing virtual agent system to generate communicative gestures for different embodiments (i.e. virtual or physical agents). This paper presents our ongoing work on an implementation of this system for the NAO humanoid robot. From a specification of multi-modal behaviors encoded with the behavior markup language, BML, the system synchronizes and realizes the verbal and nonverbal behaviors on the robot.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We extend and develop an existing virtual agent system to generate communicative gestures for different embodiments (i.e. virtual or physical agents). This paper presents our ongoing work on an implementation of this system for the NAO humanoid robot. From a specification of multi-modal behaviors encoded with the behavior markup language, BML, the system synchronizes and realizes the verbal and nonverbal behaviors on the robot. |
Anastasios Roussos, Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Hand tracking and affine shape-appearance handshape sub-units in continuous sign language recognition Conference Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 6553 LNCS (PART 1), 2012, ISSN: 03029743. Abstract | BibTeX | Links: [PDF] @conference{189, title = {Hand tracking and affine shape-appearance handshape sub-units in continuous sign language recognition}, author = { Anastasios Roussos and Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosTheodorakisPitsikalisMaragos_HandTrackingAffineSAHandshapeSUsCSLR_ECCV10-SGA.pdf}, doi = {10.1007/978-3-642-35749-7_20}, issn = {03029743}, year = {2012}, date = {2012-01-01}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {6553 LNCS}, number = {PART 1}, pages = {258--272}, abstract = {We propose and investigate a framework that utilizes novel aspects concerning probabilistic and morphological visual processing for the segmentation, tracking and handshape modeling of the hands, which is used as front-end for sign language video analysis. Our ultimate goal is to explore the automatic Handshape Sub-Unit (HSU) construction and moreover the exploitation of the overall system in automatic sign language recognition (ASLR). We employ probabilistic skin color detection followed by the proposed morphological algorithms and related shape filtering for fast and reliable segmentation of hands and head. This is then fed to our hand tracking system which emphasizes robust handling of occlusions based on forward-backward prediction and incorporation of probabilistic constraints. The tracking is exploited by an Affine-invariant Modeling of hand Shape-Appearance images, offering a compact and descriptive representation of the hand configurations. We further propose that the handshape features extracted via the fitting of this model are utilized to construct in an unsupervised way basic HSUs. We first provide intuitive results on the HSU to sign mapping and further quantitatively evaluate the integrated system and the constructed HSUs on ASLR experiments at the sub-unit and sign level. These are conducted on continuous SL data from the BU400 corpus and investigate the effect of the involved parameters. The experiments indicate the effectiveness of the overall approach and especially for the modeling of handshapes when incorporated in the HSU-based framework showing promising results}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose and investigate a framework that utilizes novel aspects concerning probabilistic and morphological visual processing for the segmentation, tracking and handshape modeling of the hands, which is used as front-end for sign language video analysis. Our ultimate goal is to explore the automatic Handshape Sub-Unit (HSU) construction and moreover the exploitation of the overall system in automatic sign language recognition (ASLR). We employ probabilistic skin color detection followed by the proposed morphological algorithms and related shape filtering for fast and reliable segmentation of hands and head. This is then fed to our hand tracking system which emphasizes robust handling of occlusions based on forward-backward prediction and incorporation of probabilistic constraints. The tracking is exploited by an Affine-invariant Modeling of hand Shape-Appearance images, offering a compact and descriptive representation of the hand configurations. We further propose that the handshape features extracted via the fitting of this model are utilized to construct in an unsupervised way basic HSUs. We first provide intuitive results on the HSU to sign mapping and further quantitatively evaluate the integrated system and the constructed HSUs on ASLR experiments at the sub-unit and sign level. These are conducted on continuous SL data from the BU400 corpus and investigate the effect of the involved parameters. The experiments indicate the effectiveness of the overall approach and especially for the modeling of handshapes when incorporated in the HSU-based framework showing promising results |
Sotirios Stasinopoulos, Petros Maragos Human action recognition using Histographic methods and hidden Markov models for visual martial arts applications Conference Image Processing (ICIP), 2012 19th IEEE International Conference on, 2012. @conference{179, title = {Human action recognition using Histographic methods and hidden Markov models for visual martial arts applications}, author = { Sotirios Stasinopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/StasinopoulosMaragos_ActionRecognHistogrHMMsVisualMartialArts_icip2012.pdf}, year = {2012}, date = {2012-01-01}, booktitle = {Image Processing (ICIP), 2012 19th IEEE International Conference on}, pages = {745--748}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stavros Theodorakis, Vassilis Pitsikalis, Isidoros Rodomagoulakis, Petros Maragos RECOGNITION WITH RAW CANONICAL PHONETIC MOVEMENT AND HANDSHAPE SUBUNITS ON VIDEOS OF CONTINUOUS SIGN LANGUAGE Conference Proc. IEEE Int'l Conf. on Image Processing, (Icip), 2012, ISBN: 9781467325332. @conference{177, title = {RECOGNITION WITH RAW CANONICAL PHONETIC MOVEMENT AND HANDSHAPE SUBUNITS ON VIDEOS OF CONTINUOUS SIGN LANGUAGE}, author = { Stavros Theodorakis and Vassilis Pitsikalis and Isidoros Rodomagoulakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/TPM_ContSignLangRecognRawCanonPhonSubunits_ICIP2012.pdf}, isbn = {9781467325332}, year = {2012}, date = {2012-01-01}, booktitle = {Proc. IEEE Int'l Conf. on Image Processing}, number = {Icip}, pages = {1413--1416}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Spyros V. Velanas, Costas S. Tzafestas Model-mediated telehaptic perception of delayed curvature Conference Proceedings - IEEE International Workshop on Robot and Human Interactive Communication, 2012, ISBN: 9781467346054. @conference{30g, title = {Model-mediated telehaptic perception of delayed curvature}, author = { Spyros V. Velanas and Costas S. Tzafestas}, doi = {10.1109/ROMAN.2012.6343871}, isbn = {9781467346054}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings - IEEE International Workshop on Robot and Human Interactive Communication}, pages = {941--947}, abstract = {This paper proposes a model-mediated telemanipulation scheme, focusing on the kinaesthetic perception of specific geometric properties of the remote environment in the presence of time delay. The basic idea is inspired from previous work on impedance-reflection teleoperation, which is here extended to incorporate the construction of a two-dimensional local geometric model. This model incorporates the local curvature of the remote environment, estimated online using a recursive least squares (RLS) method, which is then used to reconstruct a virtual surface model at the master site for haptic display. A series of experiments has been conducted, where each subject manipulated the haptic master to kinaes-thetically explore the surface of a remote (virtual) environment. The analysis of the obtained experimental results, in terms of telehaptic discrimination of curvature, shows the effectiveness of the proposed model-mediated scheme at mitigating some of the adverse effects of time delay in the communication loop. textcopyright 2012 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper proposes a model-mediated telemanipulation scheme, focusing on the kinaesthetic perception of specific geometric properties of the remote environment in the presence of time delay. The basic idea is inspired from previous work on impedance-reflection teleoperation, which is here extended to incorporate the construction of a two-dimensional local geometric model. This model incorporates the local curvature of the remote environment, estimated online using a recursive least squares (RLS) method, which is then used to reconstruct a virtual surface model at the master site for haptic display. A series of experiments has been conducted, where each subject manipulated the haptic master to kinaes-thetically explore the surface of a remote (virtual) environment. The analysis of the obtained experimental results, in terms of telehaptic discrimination of curvature, shows the effectiveness of the proposed model-mediated scheme at mitigating some of the adverse effects of time delay in the communication loop. textcopyright 2012 IEEE. |
2011 |
Dimitrios Dimitriadis, Petros Maragos, Alexandros Potamianos On the effects of filterbank design and energy computation on robust speech recognition Journal Article IEEE Transactions on Audio, Speech and Language Processing, 19 (6), pp. 1504–1516, 2011, ISSN: 15587916. Abstract | BibTeX | Links: [PDF] @article{137, title = {On the effects of filterbank design and energy computation on robust speech recognition}, author = {Dimitrios Dimitriadis and Petros Maragos and Alexandros Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMaragosPotamianos_Effects-Filterbank-Design-Energy-Computation-Robust-Speech-Recognition_ieeeTASLP_aug11.pdf}, doi = {10.1109/TASL.2010.2092766}, issn = {15587916}, year = {2011}, date = {2011-01-01}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {19}, number = {6}, pages = {1504--1516}, abstract = {In this paper, we examine how energy computation and filterbank design contribute to the overall front-end robustness, especially when the investigated features are applied to noisy speech signals, in mismatched training-testing conditions. In prior work (“Auditory Teager energy cepstrum coefficients for robust speech recognition,” D. Dimitriadis, P. Maragos, and A. Potamianos, in Proc. Eurospeech'05, Sep. 2005), a novel feature set called “Teager energy cepstrum coefficients” (TECCs) has been proposed, employing a dense, smooth filterbank and alternative energy computation schemes. TECCs were shown to be more robust to noise and exhibit improved performance compared to the widely used Mel frequency cepstral coefficients (MFCCs). In this paper, we attempt to interpret these results using a combined theoretical and experimental analysis framework. Specifically, we investigate in detail the connection between the filterbank design, i.e., the filter shape and bandwidth, the energy estimation scheme and the automatic speech recognition (ASR) performance under a variety of additive and/or convolutional noise conditions. For this purpose: 1) the performance of filterbanks using triangular, Gabor, and Gammatone filters with various bandwidths and filter positions are examined under different noisy speech recognition tasks, and 2) the squared amplitude and Teager–Kaiser energy operators are compared as two alternative approaches of computing the signal energy. Our end-goal is to understand how to select the most efficient filterbank and energy computation scheme that are maximally robust under both clean and noisy recording conditions. Theoretical and experimental results show that: 1) the filter bandwidth is one of the most important factors affecting speech recognition performance in noise, while the shape of the filter is of secondary importance, and 2) the Teager–Kaiser operator outperforms (on the average and for most noise types) the squared amplitude energy computation scheme for speech recognition in noisy conditions, especially, for large filter bandwidths. Experimental results show that selecting the appropriate filterbank and energy computation scheme can lead to significant error rate reduction over both MFCC and perceptual linear predicion (PLP) features for a variety of speech recognition tasks. A relative error rate reduction of up to textlessformula formulatype="inline"textgreatertextlesstex Notation="TeX"textgreater$backslashsimbackslashhbox 30backslashhboxbackslash%$textless/textextgreater textless/formulatextgreater for MFCCs and textlessformula formulatype="inline"textgreatertextlesstex Notation="TeX"textgreater$backslashsimbackslashhbox 39backslashhboxbackslash%$textless/textextgreatertextless/formulatextgreater for PLPs is shown for the Aurora-3 Spanish Task.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper, we examine how energy computation and filterbank design contribute to the overall front-end robustness, especially when the investigated features are applied to noisy speech signals, in mismatched training-testing conditions. In prior work (“Auditory Teager energy cepstrum coefficients for robust speech recognition,” D. Dimitriadis, P. Maragos, and A. Potamianos, in Proc. Eurospeech'05, Sep. 2005), a novel feature set called “Teager energy cepstrum coefficients” (TECCs) has been proposed, employing a dense, smooth filterbank and alternative energy computation schemes. TECCs were shown to be more robust to noise and exhibit improved performance compared to the widely used Mel frequency cepstral coefficients (MFCCs). In this paper, we attempt to interpret these results using a combined theoretical and experimental analysis framework. Specifically, we investigate in detail the connection between the filterbank design, i.e., the filter shape and bandwidth, the energy estimation scheme and the automatic speech recognition (ASR) performance under a variety of additive and/or convolutional noise conditions. For this purpose: 1) the performance of filterbanks using triangular, Gabor, and Gammatone filters with various bandwidths and filter positions are examined under different noisy speech recognition tasks, and 2) the squared amplitude and Teager–Kaiser energy operators are compared as two alternative approaches of computing the signal energy. Our end-goal is to understand how to select the most efficient filterbank and energy computation scheme that are maximally robust under both clean and noisy recording conditions. Theoretical and experimental results show that: 1) the filter bandwidth is one of the most important factors affecting speech recognition performance in noise, while the shape of the filter is of secondary importance, and 2) the Teager–Kaiser operator outperforms (on the average and for most noise types) the squared amplitude energy computation scheme for speech recognition in noisy conditions, especially, for large filter bandwidths. Experimental results show that selecting the appropriate filterbank and energy computation scheme can lead to significant error rate reduction over both MFCC and perceptual linear predicion (PLP) features for a variety of speech recognition tasks. A relative error rate reduction of up to textlessformula formulatype="inline"textgreatertextlesstex Notation="TeX"textgreater$backslashsimbackslashhbox 30backslashhboxbackslash%$textless/textextgreater textless/formulatextgreater for MFCCs and textlessformula formulatype="inline"textgreatertextlesstex Notation="TeX"textgreater$backslashsimbackslashhbox 39backslashhboxbackslash%$textless/textextgreatertextless/formulatextgreater for PLPs is shown for the Aurora-3 Spanish Task. |
A Zlatintsi, P Maragos Musical Instruments Signal Analysis and Recognition Using Fractal Features Conference Proc. European Signal Processing Conference, Barcelona, Spain, 2011. Abstract | BibTeX | Links: [PDF] @conference{ZlMa11, title = {Musical Instruments Signal Analysis and Recognition Using Fractal Features}, author = {A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos_MusicalInstrumentsMFD_EUSIPCO2011.pdf}, year = {2011}, date = {2011-08-01}, booktitle = {Proc. European Signal Processing Conference}, address = {Barcelona, Spain}, abstract = {Analyzing the structure of music signals at multiple time scales is of importance both for modeling music signals and their automatic computer-based recognition. In this paper we propose the multiscale fractal dimension prourl as a descriptor useful to quantify the multiscale complexity of the music waveform. We have experimentally found that this descriptor can discriminate several aspects among different music instruments. We compare the descriptiveness of our features against that of Mel frequency cepstral coefficients (MFCCs) using both static and dynamic classifiers, such as Gaussian mixture models (GMMs) and hidden Markov models (HMMs). The methods and features proposed in this paper are promising for music signal analysis and of direct applicability in large-scale music classification tasks.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Analyzing the structure of music signals at multiple time scales is of importance both for modeling music signals and their automatic computer-based recognition. In this paper we propose the multiscale fractal dimension prourl as a descriptor useful to quantify the multiscale complexity of the music waveform. We have experimentally found that this descriptor can discriminate several aspects among different music instruments. We compare the descriptiveness of our features against that of Mel frequency cepstral coefficients (MFCCs) using both static and dynamic classifiers, such as Gaussian mixture models (GMMs) and hidden Markov models (HMMs). The methods and features proposed in this paper are promising for music signal analysis and of direct applicability in large-scale music classification tasks. |
N Malandrakis, A Potamianos, G Evangelopoulos, A Zlatintsi A Supervised Approach to Movie Emotion Tracking Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Prague, Czech Republic, 2011. Abstract | BibTeX | Links: [PDF] @conference{MPE+11, title = {A Supervised Approach to Movie Emotion Tracking}, author = {N Malandrakis and A Potamianos and G Evangelopoulos and A Zlatintsi}, url = {http://robotics.ntua.gr/wp-content/publications/Malandrakis+_movie_emotion_ICASSP11.pdf}, year = {2011}, date = {2011-05-01}, booktitle = {Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing}, address = {Prague, Czech Republic}, abstract = {In this paper, we present experiments on continuous time, continuous scale affective movie content recognition (emotion tracking). A major obstacle for emotion research has been the lack of appropriately annotated databases, limiting the potential for supervised algorithms. To that end we develop and present a database of movie affect, annotated in continuous time, on a continuous valence-arousal scale. Supervised learning methods are proposed to model the continuous affective response using hidden Markov Models (independent) in each dimension. These models classify each video frame into one of seven discrete categories (in each dimension); the discrete-valued curves are then converted to continuous values via spline interpolation. A variety of audio-visual features are investigated and an optimal feature set is selected. The potential of the method is experimentally verified on twelve 30-minute movie clips with good precision at a macroscopic level.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we present experiments on continuous time, continuous scale affective movie content recognition (emotion tracking). A major obstacle for emotion research has been the lack of appropriately annotated databases, limiting the potential for supervised algorithms. To that end we develop and present a database of movie affect, annotated in continuous time, on a continuous valence-arousal scale. Supervised learning methods are proposed to model the continuous affective response using hidden Markov Models (independent) in each dimension. These models classify each video frame into one of seven discrete categories (in each dimension); the discrete-valued curves are then converted to continuous values via spline interpolation. A variety of audio-visual features are investigated and an optimal feature set is selected. The potential of the method is experimentally verified on twelve 30-minute movie clips with good precision at a macroscopic level. |
Athanasia Zlatintsi, Petros Maragos MUSICAL INSTRUMENTS SIGNAL ANALYSIS AND RECOGNITION USING FRACTAL FEATURES Conference Proc. 19th European Signal Processing Conference (EUSIPCO-2011), (Eusipco), 2011. @conference{182, title = {MUSICAL INSTRUMENTS SIGNAL ANALYSIS AND RECOGNITION USING FRACTAL FEATURES}, author = { Athanasia Zlatintsi and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ZlatintsiMaragos_MusicalInstrumentsMFD_EUSIPCO2011.pdf}, year = {2011}, date = {2011-01-01}, booktitle = {Proc. 19th European Signal Processing Conference (EUSIPCO-2011)}, number = {Eusipco}, pages = {684--688}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikos Mitsou, Irene Ntoutsi, Dirk Wollherr, Costas Tzafestas, Hans Peter Kriegel Revealing cluster formation over huge volatile robotic data Conference Proceedings - IEEE International Conference on Data Mining, ICDM, 2011, ISSN: 15504786. @conference{33b, title = {Revealing cluster formation over huge volatile robotic data}, author = { Nikos Mitsou and Irene Ntoutsi and Dirk Wollherr and Costas Tzafestas and Hans Peter Kriegel}, doi = {10.1109/ICDMW.2011.147}, issn = {15504786}, year = {2011}, date = {2011-01-01}, booktitle = {Proceedings - IEEE International Conference on Data Mining, ICDM}, pages = {450--457}, abstract = {In this paper, we propose a driven by the robotics field method for revealing global clusters over a fast, huge and volatile stream of robotic data. The stream comes from a mobile robot which autonomously navigates in an unknown environment perceiving it through its sensors. The sensor data arrives fast, is huge and evolves quickly over time as the robot explores the environment and observes new objects or new parts of already observed objects. To deal with the nature of data, we propose a grid-based algorithm that updates the grid structure and adjusts the so far built clusters online. Our method is capable of detecting object formations over time based on the partial observations of the robot at each time point. Experiments on real data verify the usefulness and efficiency of our method.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we propose a driven by the robotics field method for revealing global clusters over a fast, huge and volatile stream of robotic data. The stream comes from a mobile robot which autonomously navigates in an unknown environment perceiving it through its sensors. The sensor data arrives fast, is huge and evolves quickly over time as the robot explores the environment and observes new objects or new parts of already observed objects. To deal with the nature of data, we propose a grid-based algorithm that updates the grid structure and adjusts the so far built clusters online. Our method is capable of detecting object formations over time based on the partial observations of the robot at each time point. Experiments on real data verify the usefulness and efficiency of our method. |
Vassilis Pitsikalis, Stavros Theodorakis, Christian Vogler, Petros Maragos Advances in phonetics-based sub-unit modeling for transcription alignment and sign language recognition Conference IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops, 2011, ISSN: 21607508. Abstract | BibTeX | Links: [PDF] @conference{183, title = {Advances in phonetics-based sub-unit modeling for transcription alignment and sign language recognition}, author = { Vassilis Pitsikalis and Stavros Theodorakis and Christian Vogler and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PitsikalisTheodorakisVoglerMaragos_Advances-Phonetics-based-SubUnit-Modeling-Transcription-Alignmentand-SignLanguage-Recognition_CVPR2011Workshop.pdf}, doi = {10.1109/CVPRW.2011.5981681}, issn = {21607508}, year = {2011}, date = {2011-01-01}, booktitle = {IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops}, abstract = {We explore novel directions for incorporating phonetic transcriptions into sub-unit based statistical models for sign language recognition. First, we employ a new symbolic processing approach for converting sign language annotations, based on HamNoSys symbols, into structured sequences of labels according to the Posture-Detention-Transition-Steady Shift phonetic model. Next, we exploit these labels, and their correspondence with visual features to construct phonetics-based statistical sub-unit models. We also align these sequences, via the statistical sub-unit construction and decoding, to the visual data to extract time boundary information that they would lack otherwise. The resulting phonetic sub-units offer new perspectives for sign language analysis, phonetic modeling, and automatic recognition. We evaluate this approach via sign language recognition experiments on an extended Lemmas Corpus of Greek Sign Language, which results not only in improved performance compared to pure data-driven approaches, but also in meaningful phonetic sub-unit models that can be further exploited in interdisciplinary sign language analysis.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We explore novel directions for incorporating phonetic transcriptions into sub-unit based statistical models for sign language recognition. First, we employ a new symbolic processing approach for converting sign language annotations, based on HamNoSys symbols, into structured sequences of labels according to the Posture-Detention-Transition-Steady Shift phonetic model. Next, we exploit these labels, and their correspondence with visual features to construct phonetics-based statistical sub-unit models. We also align these sequences, via the statistical sub-unit construction and decoding, to the visual data to extract time boundary information that they would lack otherwise. The resulting phonetic sub-units offer new perspectives for sign language analysis, phonetic modeling, and automatic recognition. We evaluate this approach via sign language recognition experiments on an extended Lemmas Corpus of Greek Sign Language, which results not only in improved performance compared to pure data-driven approaches, but also in meaningful phonetic sub-unit models that can be further exploited in interdisciplinary sign language analysis. |
I Rodomagoulakis, S Theodorakis Experiments on global and local active appearance models for analysis of sign language facial expressions Conference 9th International Gesture łdots, 2011. @conference{184, title = {Experiments on global and local active appearance models for analysis of sign language facial expressions}, author = { I Rodomagoulakis and S Theodorakis}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.414.7847&rep=rep1&type=pdf}, year = {2011}, date = {2011-01-01}, booktitle = {9th International Gesture łdots}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
S Theodorakis, V Pitsikalis, P Maragos Advances in Dynamic-Static Integration of Manual Cues for Sign Language Recognition Conference łdots Gesture Workshop (GW 2011): Gesture in łdots, 2011. @conference{185, title = {Advances in Dynamic-Static Integration of Manual Cues for Sign Language Recognition}, author = { S Theodorakis and V Pitsikalis and P Maragos}, url = {http://scholar.google.com.tr/scholar?start=250&q=%22sign+language+recognition%22&hl=en&as_sdt=0,5&as_ylo=2011#0}, year = {2011}, date = {2011-01-01}, booktitle = {łdots Gesture Workshop (GW 2011): Gesture in łdots}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2010 |
Ioannis Gkioulekas, Georgios Evangelopoulos, Petros Maragos Spatial Bayesian surprise for image saliency and quality assessment Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{187, title = {Spatial Bayesian surprise for image saliency and quality assessment}, author = { Ioannis Gkioulekas and Georgios Evangelopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/GkioulekasEvangMaragos_SpatialBayesianSurpriseImageSaliency_ICIP10.pdf}, doi = {10.1109/ICIP.2010.5650991}, issn = {15224880}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {1081--1084}, abstract = {We propose an alternative interpretation of Bayesian surprise in the spatial domain, to account for saliency arising from contrast in image context. Our saliency formulation is integrated in three different application scenaria, with considerable improvements in performance: 1) visual attention prediction, validated using eye- and mouse-tracking data, 2) region of interest detection, to improve scale selection and localization, 3) image quality assessment to achieve better agreement with subjective human evaluations.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose an alternative interpretation of Bayesian surprise in the spatial domain, to account for saliency arising from contrast in image context. Our saliency formulation is integrated in three different application scenaria, with considerable improvements in performance: 1) visual attention prediction, validated using eye- and mouse-tracking data, 2) region of interest detection, to improve scale selection and localization, 3) image quality assessment to achieve better agreement with subjective human evaluations. |
John N Karigiannis, Theodoros I Rekatsinas, Costas S Tzafestas Hierarchical Multi-Agent Architecture employing TD ( $łambda$ ) Learning with Function Approximators for Robot Skill Acquisition Conference Architecture, 2010. @conference{36b, title = {Hierarchical Multi-Agent Architecture employing TD ( $łambda$ ) Learning with Function Approximators for Robot Skill Acquisition}, author = { John N Karigiannis and Theodoros I Rekatsinas and Costas S Tzafestas}, year = {2010}, date = {2010-01-01}, booktitle = {Architecture}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
John N. Karigiannis, Theodoros I. Rekatsinas, Costas S. Tzafestas Fuzzy rule based neuro-dynamic programming for mobile robot skill acquisition on the basis of a nested multi-agent architecture Conference 2010 IEEE International Conference on Robotics and Biomimetics, ROBIO 2010, 2010, ISBN: 9781424493173. @conference{34b, title = {Fuzzy rule based neuro-dynamic programming for mobile robot skill acquisition on the basis of a nested multi-agent architecture}, author = { John N. Karigiannis and Theodoros I. Rekatsinas and Costas S. Tzafestas}, doi = {10.1109/ROBIO.2010.5723346}, isbn = {9781424493173}, year = {2010}, date = {2010-01-01}, booktitle = {2010 IEEE International Conference on Robotics and Biomimetics, ROBIO 2010}, pages = {312--319}, abstract = {Biologically inspired architectures that mimic the organizational structure of living organisms and in general frameworks that will improve the design of intelligent robots attract significant attention from the research community. Self-organization problems, intrinsic behaviors as well as effective learning and skill transfer processes in the context of robotic systems have been significantly investigated by researchers. Our work presents a new framework of developmental skill learning process by introducing a hierarchical nested multi-agent architecture. A neuro-dynamic learning mechanism employing function approximators in a fuzzified state-space is utilized, leading to a collaborative control scheme among the distributed agents engaged in a continuous space, which enables the multi-agent system to learn, over a period of time, how to perform sequences of continuous actions in a cooperative manner without any prior task model. The agents comprising the system manage to gain experience over the task that they collaboratively perform by continuously exploring and exploiting their state-to-action mapping space. For the specific problem setting, the proposed theoretical framework is employed in the case of two simulated e-Puck robots performing a collaborative box-pushing task. This task involves active cooperation between the robots in order to jointly push an object on a plane to a specified goal location. We should note that 1) there are no contact points specified for the two e-Pucks and 2) the shape of the object is indifferent. The actuated wheels of the mobile robots are considered as the independent agents that have to build up cooperative skills over time, in order for the robot to demonstrate intelligent behavior. Our goal in this experimental study is to evaluate both the proposed hierarchical multi-agent architecture, as well as the methodological control framework. Such a hierarchical multi-agent approach is envisioned to be highly scalable for the control of complex biologically inspired robot locomotion systems.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Biologically inspired architectures that mimic the organizational structure of living organisms and in general frameworks that will improve the design of intelligent robots attract significant attention from the research community. Self-organization problems, intrinsic behaviors as well as effective learning and skill transfer processes in the context of robotic systems have been significantly investigated by researchers. Our work presents a new framework of developmental skill learning process by introducing a hierarchical nested multi-agent architecture. A neuro-dynamic learning mechanism employing function approximators in a fuzzified state-space is utilized, leading to a collaborative control scheme among the distributed agents engaged in a continuous space, which enables the multi-agent system to learn, over a period of time, how to perform sequences of continuous actions in a cooperative manner without any prior task model. The agents comprising the system manage to gain experience over the task that they collaboratively perform by continuously exploring and exploiting their state-to-action mapping space. For the specific problem setting, the proposed theoretical framework is employed in the case of two simulated e-Puck robots performing a collaborative box-pushing task. This task involves active cooperation between the robots in order to jointly push an object on a plane to a specified goal location. We should note that 1) there are no contact points specified for the two e-Pucks and 2) the shape of the object is indifferent. The actuated wheels of the mobile robots are considered as the independent agents that have to build up cooperative skills over time, in order for the robot to demonstrate intelligent behavior. Our goal in this experimental study is to evaluate both the proposed hierarchical multi-agent architecture, as well as the methodological control framework. Such a hierarchical multi-agent approach is envisioned to be highly scalable for the control of complex biologically inspired robot locomotion systems. |
Vassilis Pitsikalis, Stavros Theodorakis, Petros Maragos Data-Driven Sub-Units and Modeling Structure for Continuous Sign Language Recognition with Multiple Cues Conference Proceedings of the Language Resources and Evaluation Conference Workshop on the Representation and Processing of Sign Languages : Corpora and Sign Languages Technologies, 2010. @conference{190, title = {Data-Driven Sub-Units and Modeling Structure for Continuous Sign Language Recognition with Multiple Cues}, author = { Vassilis Pitsikalis and Stavros Theodorakis and Petros Maragos}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings of the Language Resources and Evaluation Conference Workshop on the Representation and Processing of Sign Languages : Corpora and Sign Languages Technologies}, pages = {196--203}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Anastasios Roussos, Petros Maragos Tensor-based image diffusions derived from generalizations of the total variation and beltrami functionals Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{188, title = {Tensor-based image diffusions derived from generalizations of the total variation and beltrami functionals}, author = { Anastasios Roussos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosMaragos_TensorBasedImageDiffusions_ICIP10.pdf}, doi = {10.1109/ICIP.2010.5653241}, issn = {15224880}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {4141--4144}, abstract = {We introduce a novel functional for vector-valued images that generalizes several variational methods, such as the Total Variation and Beltrami Functionals. This functional is based on the structure tensor that describes the geometry of image structures within the neighborhood of each point. We first generalize the Beltrami functional based on the image patches and using embeddings in high dimensional spaces. Proceeding to the most general form of the proposed functional, we prove that its minimization leads to a nonlinear anisotropic diffusion that is regularized, in the sense that its diffusion tensor contains convolutions with a kernel. Using this result we propose two novel diffusion methods, the Generalized Beltrami Flow and the Tensor Total Variation. These methods combine the advantages of the variational approaches with those of the tensor-based diffusion approaches.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We introduce a novel functional for vector-valued images that generalizes several variational methods, such as the Total Variation and Beltrami Functionals. This functional is based on the structure tensor that describes the geometry of image structures within the neighborhood of each point. We first generalize the Beltrami functional based on the image patches and using embeddings in high dimensional spaces. Proceeding to the most general form of the proposed functional, we prove that its minimization leads to a nonlinear anisotropic diffusion that is regularized, in the sense that its diffusion tensor contains convolutions with a kernel. Using this result we propose two novel diffusion methods, the Generalized Beltrami Flow and the Tensor Total Variation. These methods combine the advantages of the variational approaches with those of the tensor-based diffusion approaches. |
Anastasios Roussos, Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Affine-invariant modeling of shape-appearance images applied on sign language handshape classification Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. @conference{186, title = {Affine-invariant modeling of shape-appearance images applied on sign language handshape classification}, author = { Anastasios Roussos and Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Roussos+_AffineInvariantModelingSLHandshapeClassification_ICIP10.pdf}, doi = {10.1109/ICIP.2010.5651358}, issn = {15224880}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {1417--1420}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Model-level data-driven sub-units for signs in videos of continuous sign language Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2010, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{191, title = {Model-level data-driven sub-units for signs in videos of continuous sign language}, author = { Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/TheodorakisPitsikalisMaragos_ModelDatadrivenSubunitsSignsVideoContSignLang_ICASSP2010.pdf}, doi = {10.1109/ICASSP.2010.5495875}, issn = {15206149}, year = {2010}, date = {2010-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {2262--2265}, abstract = {We investigate the issue of sign language automatic phonetic sub-unit modeling, that is completely data driven and without any prior phonetic information. A first step of visual processing leads to simple and effective region-based visual features. Prior to the sub-unit modeling we propose to employ a pronunciation clustering step with respect to each sign. Afterwards, for each sign and pronunciation group we find the time segmentation at the hidden Markov model (HMM) level. The models employed refer to movements as a sequence of dominant hand positions. The constructed segments are exploited explicitly at the model level via hierarchical clustering of HMMs and lead to the data-driven movement sub-unit construction. The constructed movement sub-units are evaluated in qualitative analysis experiments on data from the Boston University (BU)-400 American Sign Language corpus showing promising results. textcopyright2010 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We investigate the issue of sign language automatic phonetic sub-unit modeling, that is completely data driven and without any prior phonetic information. A first step of visual processing leads to simple and effective region-based visual features. Prior to the sub-unit modeling we propose to employ a pronunciation clustering step with respect to each sign. Afterwards, for each sign and pronunciation group we find the time segmentation at the hidden Markov model (HMM) level. The models employed refer to movements as a sequence of dominant hand positions. The constructed segments are exploited explicitly at the model level via hierarchical clustering of HMMs and lead to the data-driven movement sub-unit construction. The constructed movement sub-units are evaluated in qualitative analysis experiments on data from the Boston University (BU)-400 American Sign Language corpus showing promising results. textcopyright2010 IEEE. |
Spyros V Velanas, Costas S Tzafestas Human Telehaptic Perception of Stiffness using an Adaptive Impedance Re fl ection Bilateral Teleoperation Control Scheme Conference In: Proceedings of the IEEE International Workshop on Robot and Human Interactive Communication, ROMAN-2010, 2010, ISBN: 9781424479894. @conference{35b, title = {Human Telehaptic Perception of Stiffness using an Adaptive Impedance Re fl ection Bilateral Teleoperation Control Scheme}, author = { Spyros V Velanas and Costas S Tzafestas}, isbn = {9781424479894}, year = {2010}, date = {2010-01-01}, booktitle = {In: Proceedings of the IEEE International Workshop on Robot and Human Interactive Communication, ROMAN-2010}, pages = {21--26}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2009 |
Iasonas Kokkinos, Petros Maragos Synergy between object recognition and image segmentation using the expectation-maximization algorithm Journal Article IEEE Transactions on Pattern Analysis and Machine Intelligence, 31 (8), pp. 1486–1501, 2009, ISSN: 01628828. Abstract | BibTeX | Links: [PDF] @article{135, title = {Synergy between object recognition and image segmentation using the expectation-maximization algorithm}, author = {Iasonas Kokkinos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KokkinosMaragos_SynergyBetweenObjectRecognitionAndImageSegmentation_ieeetPAMI09.pdf}, doi = {10.1109/TPAMI.2008.158}, issn = {01628828}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {31}, number = {8}, pages = {1486--1501}, abstract = {In this work, we formulate the interaction between image segmentation and object recognition in the framework of the Expectation-Maximization (EM) algorithm. We consider segmentation as the assignment of image observations to object hypotheses and phrase it as the E-step, while the M-step amounts to fitting the object models to the observations. These two tasks are performed iteratively, thereby simultaneously segmenting an image and reconstructing it in terms of objects. We model objects using Active Appearance Models (AAMs) as they capture both shape and appearance variation. During the E-step, the fidelity of the AAM predictions to the image is used to decide about assigning observations to the object. For this, we propose two top-down segmentation algorithms. The first starts with an oversegmentation of the image and then softly assigns image segments to objects, as in the common setting of EM. The second uses curve evolution to minimize a criterion derived from the variational interpretation of EM and introduces AAMs as shape priors. For the M-step, we derive AAM fitting equations that accommodate segmentation information, thereby allowing for the automated treatment of occlusions. Apart from top-down segmentation results, we provide systematic experiments on object detection that validate the merits of our joint segmentation and recognition approach.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this work, we formulate the interaction between image segmentation and object recognition in the framework of the Expectation-Maximization (EM) algorithm. We consider segmentation as the assignment of image observations to object hypotheses and phrase it as the E-step, while the M-step amounts to fitting the object models to the observations. These two tasks are performed iteratively, thereby simultaneously segmenting an image and reconstructing it in terms of objects. We model objects using Active Appearance Models (AAMs) as they capture both shape and appearance variation. During the E-step, the fidelity of the AAM predictions to the image is used to decide about assigning observations to the object. For this, we propose two top-down segmentation algorithms. The first starts with an oversegmentation of the image and then softly assigns image segments to objects, as in the common setting of EM. The second uses curve evolution to minimize a criterion derived from the variational interpretation of EM and introduces AAMs as shape priors. For the M-step, we derive AAM fitting equations that accommodate segmentation information, thereby allowing for the automated treatment of occlusions. Apart from top-down segmentation results, we provide systematic experiments on object detection that validate the merits of our joint segmentation and recognition approach. |
Vassilis Pitsikalis, Petros Maragos Analysis and classification of speech signals by generalized fractal dimension features Journal Article Speech Communication, 51 (12), pp. 1206–1223, 2009, ISSN: 01676393. Abstract | BibTeX | Links: [PDF] @article{136, title = {Analysis and classification of speech signals by generalized fractal dimension features}, author = {Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PitsikalisMaragos_AnalysisClassificationfSpeechFractalDimFeat_SpeechCommunication09.pdf}, doi = {10.1016/j.specom.2009.06.005}, issn = {01676393}, year = {2009}, date = {2009-01-01}, journal = {Speech Communication}, volume = {51}, number = {12}, pages = {1206--1223}, abstract = {We explore nonlinear signal processing methods inspired by dynamical systems and fractal theory in order to analyze and characterize speech sounds. A speech signal is at first embedded in a multidimensional phase-space and further employed for the estimation of measurements related to the fractal dimensions. Our goals are to compute these raw measurements in the practical cases of speech signals, to further utilize them for the extraction of simple descriptive features and to address issues on the efficacy of the proposed features to characterize speech sounds. We observe that distinct feature vector elements obtain values or show statistical trends that on average depend on general characteristics such as the voicing, the manner and the place of articulation of broad phoneme classes. Moreover the way that the statistical parameters of the features are altered as an effect of the variation of phonetic characteristics seem to follow some roughly formed patterns. We also discuss some qualitative aspects concerning the linear phoneme-wise correlation between the fractal features and the commonly employed mel-frequency cepstral coefficients (MFCCs) demonstrating phonetic cases of maximal and minimal correlation. In the same context we also investigate the fractal features' spectral content, in terms of the most and least correlated components with the MFCC. Further the proposed methods are examined under the light of indicative phoneme classification experiments. These quantify the efficacy of the features to characterize broad classes of speech sounds. The results are shown to be comparable for some classification scenarios with the corresponding ones of the MFCC features. textcopyright 2009 Elsevier B.V. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We explore nonlinear signal processing methods inspired by dynamical systems and fractal theory in order to analyze and characterize speech sounds. A speech signal is at first embedded in a multidimensional phase-space and further employed for the estimation of measurements related to the fractal dimensions. Our goals are to compute these raw measurements in the practical cases of speech signals, to further utilize them for the extraction of simple descriptive features and to address issues on the efficacy of the proposed features to characterize speech sounds. We observe that distinct feature vector elements obtain values or show statistical trends that on average depend on general characteristics such as the voicing, the manner and the place of articulation of broad phoneme classes. Moreover the way that the statistical parameters of the features are altered as an effect of the variation of phonetic characteristics seem to follow some roughly formed patterns. We also discuss some qualitative aspects concerning the linear phoneme-wise correlation between the fractal features and the commonly employed mel-frequency cepstral coefficients (MFCCs) demonstrating phonetic cases of maximal and minimal correlation. In the same context we also investigate the fractal features' spectral content, in terms of the most and least correlated components with the MFCC. Further the proposed methods are examined under the light of indicative phoneme classification experiments. These quantify the efficacy of the features to characterize broad classes of speech sounds. The results are shown to be comparable for some classification scenarios with the corresponding ones of the MFCC features. textcopyright 2009 Elsevier B.V. All rights reserved. |
George Papandreou, Athanassios Katsamanis, Vassilis Pitsikalis, Petros Maragos Adaptive multimodal fusion by uncertainty compensation with application to audiovisual speech recognition Journal Article IEEE Transactions on Audio, Speech and Language Processing, 17 (3), pp. 423–435, 2009, ISSN: 15587916. Abstract | BibTeX | Links: [PDF] [Webpage] @article{131, title = {Adaptive multimodal fusion by uncertainty compensation with application to audiovisual speech recognition}, author = {George Papandreou and Athanassios Katsamanis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/PapandreouKatsamanisPitsikalisMaragos_MultimodalFusionUncertaintyCompensationAvasr_ieee-j-aslp09.pdf http://www.scopus.com/inward/record.url?eid=2-s2.0-44949227080&partnerID=40&md5=6edf7efa047e4239c0ea003cf525bf63}, doi = {10.1109/TASL.2008.2011515}, issn = {15587916}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {17}, number = {3}, pages = {423--435}, abstract = {While the accuracy of feature measurements heavily depends on changing environmental conditions, studying the consequences of this fact in pattern recognition tasks has received relatively little attention to date. In this paper, we explicitly take feature measurement uncertainty into account and show how multimodal classification and learning rules should be adjusted to compensate for its effects. Our approach is particularly fruitful in multimodal fusion scenarios, such as audiovisual speech recognition, where multiple streams of complementary time-evolving features are integrated. For such applications, provided that the measurement noise uncertainty for each feature stream can be estimated, the proposed framework leads to highly adaptive multimodal fusion rules which are easy and efficient to implement. Our technique is widely applicable and can be transparently integrated with either synchronous or asynchronous multimodal sequence integration architectures. We further show that multimodal fusion methods relying on stream weights can naturally emerge from our scheme under certain assumptions; this connection provides valuable insights into the adaptivity properties of our multimodal uncertainty compensation approach. We show how these ideas can be practically applied for audiovisual speech recognition. In this context, we propose improved techniques for person-independent visual feature extraction and uncertainty estimation with active appearance models, and also discuss how enhanced audio features along with their uncertainty estimates can be effectively computed. We demonstrate the efficacy of our approach in audiovisual speech recognition experiments on the CUAVE database using either synchronous or asynchronous multimodal integration models.}, keywords = {}, pubstate = {published}, tppubtype = {article} } While the accuracy of feature measurements heavily depends on changing environmental conditions, studying the consequences of this fact in pattern recognition tasks has received relatively little attention to date. In this paper, we explicitly take feature measurement uncertainty into account and show how multimodal classification and learning rules should be adjusted to compensate for its effects. Our approach is particularly fruitful in multimodal fusion scenarios, such as audiovisual speech recognition, where multiple streams of complementary time-evolving features are integrated. For such applications, provided that the measurement noise uncertainty for each feature stream can be estimated, the proposed framework leads to highly adaptive multimodal fusion rules which are easy and efficient to implement. Our technique is widely applicable and can be transparently integrated with either synchronous or asynchronous multimodal sequence integration architectures. We further show that multimodal fusion methods relying on stream weights can naturally emerge from our scheme under certain assumptions; this connection provides valuable insights into the adaptivity properties of our multimodal uncertainty compensation approach. We show how these ideas can be practically applied for audiovisual speech recognition. In this context, we propose improved techniques for person-independent visual feature extraction and uncertainty estimation with active appearance models, and also discuss how enhanced audio features along with their uncertainty estimates can be effectively computed. We demonstrate the efficacy of our approach in audiovisual speech recognition experiments on the CUAVE database using either synchronous or asynchronous multimodal integration models. |
I Kokkinos, G Evangelopoulos, P Maragos Texture Analysis and Segmentation Using Modulation Features, Generative Models, and Weighted Curve Evolution Journal Article IEEE Transactions on Pattern Analysis and Machine Intelligence, 31 (1), pp. 142-157, 2009, ISSN: 0162-8828. @article{4447672, title = {Texture Analysis and Segmentation Using Modulation Features, Generative Models, and Weighted Curve Evolution}, author = {I Kokkinos and G Evangelopoulos and P Maragos}, doi = {10.1109/TPAMI.2008.33}, issn = {0162-8828}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {31}, number = {1}, pages = {142-157}, abstract = {In this work we approach the analysis and segmentation of natural textured images by combining ideas from image analysis and probabilistic modeling. We rely on AM-FM texture models and specifically on the Dominant Component Analysis (DCA) paradigm for feature extraction. This method provides a low-dimensional, dense and smooth descriptor, capturing essential aspects of texture, namely scale, orientation, and contrast. Our contributions are at three levels of the texture analysis and segmentation problems: First, at the feature extraction stage we propose a regularized demodulation algorithm that provides more robust texture features and explore the merits of modifying the channel selection criterion of DCA. Second, we propose a probabilistic interpretation of DCA and Gabor filtering in general, in terms of Local Generative Models. Extending this point of view to edge detection facilitates the estimation of posterior probabilities for the edge and texture classes. Third, we propose the weighted curve evolution scheme that enhances the Region Competition/ Geodesic Active Regions methods by allowing for the locally adaptive fusion of heterogeneous cues. Our segmentation results are evaluated on the Berkeley Segmentation Benchmark, and compare favorably to current state-of-the-art methods.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this work we approach the analysis and segmentation of natural textured images by combining ideas from image analysis and probabilistic modeling. We rely on AM-FM texture models and specifically on the Dominant Component Analysis (DCA) paradigm for feature extraction. This method provides a low-dimensional, dense and smooth descriptor, capturing essential aspects of texture, namely scale, orientation, and contrast. Our contributions are at three levels of the texture analysis and segmentation problems: First, at the feature extraction stage we propose a regularized demodulation algorithm that provides more robust texture features and explore the merits of modifying the channel selection criterion of DCA. Second, we propose a probabilistic interpretation of DCA and Gabor filtering in general, in terms of Local Generative Models. Extending this point of view to edge detection facilitates the estimation of posterior probabilities for the edge and texture classes. Third, we propose the weighted curve evolution scheme that enhances the Region Competition/ Geodesic Active Regions methods by allowing for the locally adaptive fusion of heterogeneous cues. Our segmentation results are evaluated on the Berkeley Segmentation Benchmark, and compare favorably to current state-of-the-art methods. |
Athanassios Katsamanis, George Papandreou, Petros Maragos Face active appearance modeling and speech acoustic information to recover articulation Journal Article IEEE Transactions on Audio, Speech and Language Processing, 17 (3), pp. 411–422, 2009, ISSN: 15587916. Abstract | BibTeX | Links: [PDF] @article{130, title = {Face active appearance modeling and speech acoustic information to recover articulation}, author = {Athanassios Katsamanis and George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisPapandreouMaragos_AudiovisualSpeechInversion_ieee-j-aslp09.pdf}, doi = {10.1109/TASL.2008.2008740}, issn = {15587916}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {17}, number = {3}, pages = {411--422}, abstract = {We are interested in recovering aspects of vocal tract's geometry$backslash$nand dynamics from speech, a problem referred to as speech inversion.$backslash$nTraditional audio-only speech inversion techniques are inherently$backslash$nill-posed since the same speech acoustics can be produced by multiple$backslash$narticulatory configurations. To alleviate the ill-posedness of the$backslash$naudio-only inversion process, we propose an inversion scheme which$backslash$nalso exploits visual information from the speaker's face. The complex$backslash$naudiovisual-to-articulatory mapping is approximated by an adaptive$backslash$npiecewise linear model. Model switching is governed by a Markovian$backslash$ndiscrete process which captures articulatory dynamic information.$backslash$nEach constituent linear mapping is effectively estimated via canonical$backslash$ncorrelation analysis. In the described multimodal context, we investigate$backslash$nalternative fusion schemes which allow interaction between the audio$backslash$nand visual modalities at various synchronization levels. For facial$backslash$nanalysis, we employ active appearance models (AAMs) and demonstrate$backslash$nfully automatic face tracking and visual feature extraction. Using$backslash$nthe AAM features in conjunction with audio features such as Mel frequency$backslash$ncepstral coefficients (MFCCs) or line spectral frequencies (LSFs)$backslash$nleads to effective estimation of the trajectories followed by certain$backslash$npoints of interest in the speech production system. We report experiments$backslash$non the QSMT and MOCHA databases which contain audio, video, and electromagnetic$backslash$narticulography data recorded in parallel. The results show that exploiting$backslash$nboth audio and visual modalities in a multistream hidden Markov model$backslash$nbased scheme clearly improves performance relative to either audio$backslash$nor visual-only estimation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We are interested in recovering aspects of vocal tract's geometry$backslash$nand dynamics from speech, a problem referred to as speech inversion.$backslash$nTraditional audio-only speech inversion techniques are inherently$backslash$nill-posed since the same speech acoustics can be produced by multiple$backslash$narticulatory configurations. To alleviate the ill-posedness of the$backslash$naudio-only inversion process, we propose an inversion scheme which$backslash$nalso exploits visual information from the speaker's face. The complex$backslash$naudiovisual-to-articulatory mapping is approximated by an adaptive$backslash$npiecewise linear model. Model switching is governed by a Markovian$backslash$ndiscrete process which captures articulatory dynamic information.$backslash$nEach constituent linear mapping is effectively estimated via canonical$backslash$ncorrelation analysis. In the described multimodal context, we investigate$backslash$nalternative fusion schemes which allow interaction between the audio$backslash$nand visual modalities at various synchronization levels. For facial$backslash$nanalysis, we employ active appearance models (AAMs) and demonstrate$backslash$nfully automatic face tracking and visual feature extraction. Using$backslash$nthe AAM features in conjunction with audio features such as Mel frequency$backslash$ncepstral coefficients (MFCCs) or line spectral frequencies (LSFs)$backslash$nleads to effective estimation of the trajectories followed by certain$backslash$npoints of interest in the speech production system. We report experiments$backslash$non the QSMT and MOCHA databases which contain audio, video, and electromagnetic$backslash$narticulography data recorded in parallel. The results show that exploiting$backslash$nboth audio and visual modalities in a multistream hidden Markov model$backslash$nbased scheme clearly improves performance relative to either audio$backslash$nor visual-only estimation. |
Dimitrios Dimitriadis, Alexandros Potamianos, Petros Maragos A comparison of the squared energy and teager-kaiser operators for short-term energy estimation in additive noise Journal Article IEEE Transactions on Signal Processing, 57 (7), pp. 2569–2581, 2009, ISSN: 1053587X. Abstract | BibTeX | Links: [PDF] @article{132, title = {A comparison of the squared energy and teager-kaiser operators for short-term energy estimation in additive noise}, author = {Dimitrios Dimitriadis and Alexandros Potamianos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisPotamianosMaragos_ComparisonSquaredAmpl-TKOper-EnergyEstimation_ieeetSP2008.pdf}, doi = {10.1109/TSP.2009.2019299}, issn = {1053587X}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Signal Processing}, volume = {57}, number = {7}, pages = {2569--2581}, abstract = {Time-frequency distributions that evaluate the signal's energy content both in the time and frequency domains are indispensable signal processing tools, especially, for nonstationary signals. Various short-time energy computation schemes are used in practice, including the mean squared amplitude and Teager-Kaiser energy approaches. Herein, we focus primarily on the short- and medium-term properties of these two energy estimation schemes, as well as, on their performance in the presence of additive noise. To facilitate this analysis and generalize the approach, we use a harmonic noise model to approximate the noise component. The error analysis is conducted both in the continuous- and discrete-time domains, deriving similar conclusions. The estimation errors are measured in terms of normalized deviations from the expected signal energy and are shown to greatly depend on both the signals' spectral content and the analysis window length. When medium- and long-term analysis windows are employed, the Teager-Kaiser energy operator is proven superior to the common squared energy operator, provided that the spectral content of the noise is more lowpass than the corresponding signal content, and vice versa. However, for shorter window lengths, the Teager-Kaiser operator always outperforms the squared energy operator. The theoretical results are experimentally verified for synthetic signals. Finally, the performance of the proposed energy operators is evaluated for short-term analysis of noisy speech signals and the implications for speech processing applications are outlined.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Time-frequency distributions that evaluate the signal's energy content both in the time and frequency domains are indispensable signal processing tools, especially, for nonstationary signals. Various short-time energy computation schemes are used in practice, including the mean squared amplitude and Teager-Kaiser energy approaches. Herein, we focus primarily on the short- and medium-term properties of these two energy estimation schemes, as well as, on their performance in the presence of additive noise. To facilitate this analysis and generalize the approach, we use a harmonic noise model to approximate the noise component. The error analysis is conducted both in the continuous- and discrete-time domains, deriving similar conclusions. The estimation errors are measured in terms of normalized deviations from the expected signal energy and are shown to greatly depend on both the signals' spectral content and the analysis window length. When medium- and long-term analysis windows are employed, the Teager-Kaiser energy operator is proven superior to the common squared energy operator, provided that the spectral content of the noise is more lowpass than the corresponding signal content, and vice versa. However, for shorter window lengths, the Teager-Kaiser operator always outperforms the squared energy operator. The theoretical results are experimentally verified for synthetic signals. Finally, the performance of the proposed energy operators is evaluated for short-term analysis of noisy speech signals and the implications for speech processing applications are outlined. |
Anastasios Roussos, Petros Maragos Reversible interpolation of vectorial images by an anisotropic diffusion-projection PDE Journal Article International Journal of Computer Vision, 84 (2), pp. 130–145, 2009, ISSN: 09205691. Abstract | BibTeX | Links: [PDF] @article{133, title = {Reversible interpolation of vectorial images by an anisotropic diffusion-projection PDE}, author = {Anastasios Roussos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosMaragos_InterpolAnisoDiffProjPDE_IJCV09.pdf}, doi = {10.1007/s11263-008-0132-x}, issn = {09205691}, year = {2009}, date = {2009-01-01}, journal = {International Journal of Computer Vision}, volume = {84}, number = {2}, pages = {130--145}, abstract = {In this paper, a nonlinear model for the interpo- lation of vector-valued images is proposed. This model is based on an anisotropic diffusion PDE and performs an in- terpolation that is reversible. The interpolation solution is restricted to the subspace of functions that can recover the discrete input image, after an appropriate smoothing and sampling. The proposed nonlinear diffusion flow lies on this subspace while its strength and anisotropy adapt to the lo- cal variations and geometry of image structures. The derived method effectively reconstructs the real image structures and yields a satisfactory interpolation result. Compared to clas- sic and other existing PDE-based interpolationmethods, our proposed method seems to increase the accuracy of the re- sult and to reduce the undesirable artifacts, such as blurring, ringing, block effects and edge distortion.We present exten- sive experimental results that demonstrate the potential of the method as applied to graylevel and color images.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper, a nonlinear model for the interpo- lation of vector-valued images is proposed. This model is based on an anisotropic diffusion PDE and performs an in- terpolation that is reversible. The interpolation solution is restricted to the subspace of functions that can recover the discrete input image, after an appropriate smoothing and sampling. The proposed nonlinear diffusion flow lies on this subspace while its strength and anisotropy adapt to the lo- cal variations and geometry of image structures. The derived method effectively reconstructs the real image structures and yields a satisfactory interpolation result. Compared to clas- sic and other existing PDE-based interpolationmethods, our proposed method seems to increase the accuracy of the re- sult and to reduce the undesirable artifacts, such as blurring, ringing, block effects and edge distortion.We present exten- sive experimental results that demonstrate the potential of the method as applied to graylevel and color images. |
Stamatios Lefkimmiatis, Petros Maragos, George Papandreou Bayesian inference on multiscale models for poisson intensity estimation: Applications to photon-limited image denoising Journal Article IEEE Transactions on Image Processing, 18 (8), pp. 1724–1741, 2009, ISSN: 10577149. Abstract | BibTeX | Links: [PDF] @article{134, title = {Bayesian inference on multiscale models for poisson intensity estimation: Applications to photon-limited image denoising}, author = {Stamatios Lefkimmiatis and Petros Maragos and George Papandreou}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LefkimmiatisMaragosPapandreou_BayesianMultiscalePoissonIntensityEstimation_ieee-j-ip09.pdf}, doi = {10.1109/TIP.2009.2022008}, issn = {10577149}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {18}, number = {8}, pages = {1724--1741}, abstract = {We present an improved statistical model for analyzing Poisson processes, with applications to photon-limited imaging. We build on previous work, adopting a multiscale representation of the Poisson process in which the ratios of the underlying Poisson intensities (rates) in adjacent scales are modeled as mixtures of conjugate parametric distributions. Our main contributions include: 1) a rigorous and robust regularized expectation-maximization (EM) algorithm for maximum-likelihood estimation of the rate-ratio density parameters directly from the noisy observed Poisson data (counts); 2) extension of the method to work under a multiscale hidden Markov tree model (HMT) which couples the mixture label assignments in consecutive scales, thus modeling interscale coefficient dependencies in the vicinity of image edges; 3) exploration of a 2-D recursive quad-tree image representation, involving Dirichlet-mixture rate-ratio densities, instead of the conventional separable binary-tree image representation involving beta-mixture rate-ratio densities; and 4) a novel multiscale image representation, which we term Poisson-Haar decomposition, that better models the image edge structure, thus yielding improved performance. Experimental results on standard images with artificially simulated Poisson noise and on real photon-limited images demonstrate the effectiveness of the proposed techniques.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We present an improved statistical model for analyzing Poisson processes, with applications to photon-limited imaging. We build on previous work, adopting a multiscale representation of the Poisson process in which the ratios of the underlying Poisson intensities (rates) in adjacent scales are modeled as mixtures of conjugate parametric distributions. Our main contributions include: 1) a rigorous and robust regularized expectation-maximization (EM) algorithm for maximum-likelihood estimation of the rate-ratio density parameters directly from the noisy observed Poisson data (counts); 2) extension of the method to work under a multiscale hidden Markov tree model (HMT) which couples the mixture label assignments in consecutive scales, thus modeling interscale coefficient dependencies in the vicinity of image edges; 3) exploration of a 2-D recursive quad-tree image representation, involving Dirichlet-mixture rate-ratio densities, instead of the conventional separable binary-tree image representation involving beta-mixture rate-ratio densities; and 4) a novel multiscale image representation, which we term Poisson-Haar decomposition, that better models the image edge structure, thus yielding improved performance. Experimental results on standard images with artificially simulated Poisson noise and on real photon-limited images demonstrate the effectiveness of the proposed techniques. |
G Evangelopoulos, A Zlatintsi, G Skoumas, K Rapantzikos, A Potamianos, P Maragos, Y Avrithis Video Event Detection and Summarization Using Audio, Visual and Text Saliency Conference Taipei, Taiwan, 2009. Abstract | BibTeX | Links: [PDF] @conference{EZS+09, title = {Video Event Detection and Summarization Using Audio, Visual and Text Saliency}, author = {G Evangelopoulos and A Zlatintsi and G Skoumas and K Rapantzikos and A Potamianos and P Maragos and Y Avrithis}, url = {http://robotics.ntua.gr/wp-content/publications/EvangelopoulosZlatintsiEtAl_VideoEventDetectionSummarizationUsingAVTSaliency_ICASSP09.pdf}, year = {2009}, date = {2009-04-01}, address = {Taipei, Taiwan}, abstract = {Detection of perceptually important video events is formulated here on the basis of saliency models for the audio, visual and textual information conveyed in a video stream. Audio saliency is assessed by cues that quantify multifrequency waveform modulations, extracted through nonlinear operators and energy tracking. Visual saliency is measured through a spatiotemporal attention model driven by intensity, color and motion. Text saliency is extracted from part-of-speech tagging on the subtitles information available with most movie distributions. The various modality curves are integrated in a single attention curve, where the presence of an event may be signised in one or multiple domains. This multimodal saliency curve is the basis of a bottom-up video summarization algorithm, that refines results from unimodal or audiovisual-based skimming. The algorithm performs favorably for video summarization in terms of informativeness and enjoyability.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Detection of perceptually important video events is formulated here on the basis of saliency models for the audio, visual and textual information conveyed in a video stream. Audio saliency is assessed by cues that quantify multifrequency waveform modulations, extracted through nonlinear operators and energy tracking. Visual saliency is measured through a spatiotemporal attention model driven by intensity, color and motion. Text saliency is extracted from part-of-speech tagging on the subtitles information available with most movie distributions. The various modality curves are integrated in a single attention curve, where the presence of an event may be signised in one or multiple domains. This multimodal saliency curve is the basis of a bottom-up video summarization algorithm, that refines results from unimodal or audiovisual-based skimming. The algorithm performs favorably for video summarization in terms of informativeness and enjoyability. |
G Evangelopoulos, A Zlatintsi, G Skoumas, K Rapantzikos, A Potamianos, P Maragos, Y Avrithis Video Event Detection and Summarization using Audio, Visual and Text Saliency Conference Icassp, (2), 2009, ISBN: 9781424423545. @conference{195, title = {Video Event Detection and Summarization using Audio, Visual and Text Saliency}, author = { G Evangelopoulos and A Zlatintsi and G Skoumas and K Rapantzikos and A Potamianos and P Maragos and Y Avrithis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EvangelopoulosZlatintsiEtAl_VideoEventDetectionSummarizationUsingAVTSaliency_ICASSP09.pdf}, isbn = {9781424423545}, year = {2009}, date = {2009-01-01}, booktitle = {Icassp}, number = {2}, pages = {3553--3556}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
S.E. Fotinea, Eleni Efthimiou, George Caridakis, Olga Diamanti, N. Mitsou, K. Karpouzis, C. Tzafestas, P. Maragos DIANOEMA: Visual analysis and sign recognition for GSL modelling and robot teleoperation Conference Language and Speech, 2009. @conference{37b, title = {DIANOEMA: Visual analysis and sign recognition for GSL modelling and robot teleoperation}, author = { S.E. Fotinea and Eleni Efthimiou and George Caridakis and Olga Diamanti and N. Mitsou and K. Karpouzis and C. Tzafestas and P. Maragos}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.149.1666&rep=rep1&type=pdf}, year = {2009}, date = {2009-01-01}, booktitle = {Language and Speech}, pages = {2--4}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stamatios Lefkimmiatis, George Papandreou, Petros Maragos Poisson-haar transform: A nonlinear multiscale representation for photon-limited image denoising Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{192, title = {Poisson-haar transform: A nonlinear multiscale representation for photon-limited image denoising}, author = { Stamatios Lefkimmiatis and George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LefkimmiatisPapandreouMaragos_PoissonHaarTransform_icip09.pdf}, doi = {10.1109/ICIP.2009.5414053}, issn = {15224880}, year = {2009}, date = {2009-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {3853--3856}, abstract = {We present a novel multiscale image representation belonging to the class of multiscale multiplicative decompositions, which we term Poisson-Haar transform. The proposed representation is well-suited for analyzing images degraded by signal-dependent Poisson noise, allowing efficient estimation of their underlying intensity by means of multiscale Bayesian schemes. The Poisson-Haar decomposition has a direct link to the standard 2-D Haar wavelet transform, thus retaining many of the properties that have made wavelets successful in signal processing and analysis. The practical relevance and effectiveness of the proposed approach is verified through denoising experiments on simulated and real-world photon-limited images.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We present a novel multiscale image representation belonging to the class of multiscale multiplicative decompositions, which we term Poisson-Haar transform. The proposed representation is well-suited for analyzing images degraded by signal-dependent Poisson noise, allowing efficient estimation of their underlying intensity by means of multiscale Bayesian schemes. The Poisson-Haar decomposition has a direct link to the standard 2-D Haar wavelet transform, thus retaining many of the properties that have made wavelets successful in signal processing and analysis. The practical relevance and effectiveness of the proposed approach is verified through denoising experiments on simulated and real-world photon-limited images. |
Petros Maragos, Corinne Vachier Overview of adaptive morphology: Trends and perspectives Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{194, title = {Overview of adaptive morphology: Trends and perspectives}, author = { Petros Maragos and Corinne Vachier}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosVachier_OverviewOfAdaptiveMorphology_ICIP09.pdf}, doi = {10.1109/ICIP.2009.5413961}, issn = {15224880}, year = {2009}, date = {2009-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {2241--2244}, abstract = {In this paper we briefly overview emerging trends in `Adaptive Morphology', i.e. work related to the theory and/or applications of image analysis filters, systems, or algorithms based on mathematical morphology, that are adaptive w.r.t. to space or intensity or use any other adaptive scheme. We present a new classification of work in this area structured along several major theoretical perspectives. We then sample specific approaches that develop spatially-variant structuring elements or intensity level-adaptive operators, modeled and implemented either via conventional nonlinear digital filtering or via geometric PDEs. Finally, we discuss some applications.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we briefly overview emerging trends in `Adaptive Morphology', i.e. work related to the theory and/or applications of image analysis filters, systems, or algorithms based on mathematical morphology, that are adaptive w.r.t. to space or intensity or use any other adaptive scheme. We present a new classification of work in this area structured along several major theoretical perspectives. We then sample specific approaches that develop spatially-variant structuring elements or intensity level-adaptive operators, modeled and implemented either via conventional nonlinear digital filtering or via geometric PDEs. Finally, we discuss some applications. |
Anastasios Roussos, Athanassios Katsamanis, Petros Maragos Tongue tracking in ultrasound images with active appearance models Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{193, title = {Tongue tracking in ultrasound images with active appearance models}, author = { Anastasios Roussos and Athanassios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosKatsamanisMaragos_TongueTrackingInUltrrasoundImages_ICIP09.pdf}, doi = {10.1109/ICIP.2009.5414520}, issn = {15224880}, year = {2009}, date = {2009-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {1733--1736}, abstract = {Tongue Ultrasound imaging is widely used for human speech production analysis and modeling. In this paper, we propose a novel method to automatically detect and track the tongue contour in Ultrasound (US) videos. Our method is built on a variant of Active Appearance Modeling. It incorporates shape prior information and can estimate the entire tongue contour robustly and accurately in a sequence of US frames. Experimental evaluation demonstrates the effectiveness of our approach and its improved performance compared to previously proposed tongue tracking techniques.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Tongue Ultrasound imaging is widely used for human speech production analysis and modeling. In this paper, we propose a novel method to automatically detect and track the tongue contour in Ultrasound (US) videos. Our method is built on a variant of Active Appearance Modeling. It incorporates shape prior information and can estimate the entire tongue contour robustly and accurately in a sequence of US frames. Experimental evaluation demonstrates the effectiveness of our approach and its improved performance compared to previously proposed tongue tracking techniques. |
Stavros Theodorakis, Athanassios Katsamanis, Petros Maragos Product-HMMS for automatic sign language recognition Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2009, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{197, title = {Product-HMMS for automatic sign language recognition}, author = { Stavros Theodorakis and Athanassios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/TheodorakisKatsamanisMaragos_ProductHMM-AutomaticSignLanguageRecogn_ICASSP2009.pdf}, doi = {10.1109/ICASSP.2009.4959905}, issn = {15206149}, year = {2009}, date = {2009-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {1601--1604}, abstract = {We address multistream sign language recognition and focus on ef- ficient multistream integration schemes. Alternative approaches are investigated and the application of Product-HMMs (PHMM) is pro- posed. The PHMM is a variant of the general multistream HMM that also allows for partial asynchrony between the streams. Exper- iments in classification and isolated sign recognition for the Greek Sign Language using different fusion methods, show that the PH- MMs perform the best. Fusing movement and shape information with the PHMMs has increased sign classification performance by 1,2% in comparison to the Parallel HMM fusion model. Isolated sign recognition rate increased by 8,3% over movement only mod- els and by 1,5% over movement-shape models using multistream HMMs. Index}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We address multistream sign language recognition and focus on ef- ficient multistream integration schemes. Alternative approaches are investigated and the application of Product-HMMs (PHMM) is pro- posed. The PHMM is a variant of the general multistream HMM that also allows for partial asynchrony between the streams. Exper- iments in classification and isolated sign recognition for the Greek Sign Language using different fusion methods, show that the PH- MMs perform the best. Fusing movement and shape information with the PHMMs has increased sign classification performance by 1,2% in comparison to the Parallel HMM fusion model. Isolated sign recognition rate increased by 8,3% over movement only mod- els and by 1,5% over movement-shape models using multistream HMMs. Index |
C Tzafestas Telehaptics: issues of control stability and human perception in remote kineasthetic exploration Conference 18th IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN 2009), Workshop on Robot-Human Synergies, Sept. 27 - Oct. 2 2009, 2009. @conference{38b, title = {Telehaptics: issues of control stability and human perception in remote kineasthetic exploration}, author = { C Tzafestas}, year = {2009}, date = {2009-01-01}, booktitle = {18th IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN 2009), Workshop on Robot-Human Synergies, Sept. 27 - Oct. 2 2009}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Tzafestas, N. Mitsou, N. Georgakarakos, O. Diamanti, P. Maragos, S. E. Fotinea, E. Efthimiou Gestural teleoperation of a mobile robot based on visual recognition of sign language static handshapes Conference Proceedings - IEEE International Workshop on Robot and Human Interactive Communication, 2009, ISSN: 19449445. @conference{39b, title = {Gestural teleoperation of a mobile robot based on visual recognition of sign language static handshapes}, author = { C. Tzafestas and N. Mitsou and N. Georgakarakos and O. Diamanti and P. Maragos and S. E. Fotinea and E. Efthimiou}, doi = {10.1109/ROMAN.2009.5326235}, issn = {19449445}, year = {2009}, date = {2009-01-01}, booktitle = {Proceedings - IEEE International Workshop on Robot and Human Interactive Communication}, pages = {1073--1079}, abstract = {This paper presents results achieved in the frames of a national research project (titled ldquoDIANOEMArdquo), where visual analysis and sign recognition techniques have been explored on Greek Sign Language (GSL) data. Besides GSL modelling, the aim was to develop a pilot application for teleoperating a mobile robot using natural hand signs. A small vocabulary of hand signs has been designed to enable desktopbased teleoperation at a high-level of supervisory telerobotic control. Real-time visual recognition of the hand images is performed by training a multi-layer perceptron (MLP) neural network. Various shape descriptors of the segmented hand posture images have been explored as inputs to the MLP network. These include Fourier shape descriptors on the contour of the segmented hand sign images, moments, compactness, eccentricity, and histogram of the curvature. We have examined which of these shape descriptors are best suited for real-time recognition of hand signs, in relation to the number and choice of hand postures, in order to achieve maximum recognition performance. The hand-sign recognizer has been integrated in a graphical user interface, and has been implemented with success on a pilot application for real-time desktop-based gestural teleoperation of a mobile robot vehicle.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper presents results achieved in the frames of a national research project (titled ldquoDIANOEMArdquo), where visual analysis and sign recognition techniques have been explored on Greek Sign Language (GSL) data. Besides GSL modelling, the aim was to develop a pilot application for teleoperating a mobile robot using natural hand signs. A small vocabulary of hand signs has been designed to enable desktopbased teleoperation at a high-level of supervisory telerobotic control. Real-time visual recognition of the hand images is performed by training a multi-layer perceptron (MLP) neural network. Various shape descriptors of the segmented hand posture images have been explored as inputs to the MLP network. These include Fourier shape descriptors on the contour of the segmented hand sign images, moments, compactness, eccentricity, and histogram of the curvature. We have examined which of these shape descriptors are best suited for real-time recognition of hand signs, in relation to the number and choice of hand postures, in order to achieve maximum recognition performance. The hand-sign recognizer has been integrated in a graphical user interface, and has been implemented with success on a pilot application for real-time desktop-based gestural teleoperation of a mobile robot vehicle. |
M. Alifragis, C.S. Tzafestas Stereo pair matching of archaeological scenes using phase domain methods Conference IMAGAPP 2009 - Proceedings of the 1st International Conference on Computer Imaging Theory and Applications, 2009, ISBN: 9789898111685. @conference{40b, title = {Stereo pair matching of archaeological scenes using phase domain methods}, author = { M. Alifragis and C.S. Tzafestas}, isbn = {9789898111685}, year = {2009}, date = {2009-01-01}, booktitle = {IMAGAPP 2009 - Proceedings of the 1st International Conference on Computer Imaging Theory and Applications}, abstract = {This paper conducts an experimental study on the application of some recent theories of image preprocessing and analysis in the frequency domain, particularly the phase congruency and monogenic filtering methods. Our goal was to examine the performance of such methods in a stereo matching problem setting, with photos of complicated scenes. Two objects were used: a scene of an ancient Greek temple of Acropolis and the out-side scene of the gate of an ancient theatre. Due to the complex structure of the photographed object, classic techniques used for feature detection and matching give poor results. The phase-domain approach follovved in this paper is based on the phase-congruency method for feature extraction, together with monogenic filtering and a new correlation measure in the frequency domain for image correspondence and stereo matching. Com-parative results show that the three-dimensional models of the scene computed when applying these phase domain methods are much more detailed and consistent as compared to the models obtained when using clas-sic approaches or the SIFT based techniques, which give poor depth representation and less accurate metric information.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper conducts an experimental study on the application of some recent theories of image preprocessing and analysis in the frequency domain, particularly the phase congruency and monogenic filtering methods. Our goal was to examine the performance of such methods in a stereo matching problem setting, with photos of complicated scenes. Two objects were used: a scene of an ancient Greek temple of Acropolis and the out-side scene of the gate of an ancient theatre. Due to the complex structure of the photographed object, classic techniques used for feature detection and matching give poor results. The phase-domain approach follovved in this paper is based on the phase-congruency method for feature extraction, together with monogenic filtering and a new correlation measure in the frequency domain for image correspondence and stereo matching. Com-parative results show that the three-dimensional models of the scene computed when applying these phase domain methods are much more detailed and consistent as compared to the models obtained when using clas-sic approaches or the SIFT based techniques, which give poor depth representation and less accurate metric information. |
D. Dimitriadis, A. Metallinou, I. Konstantinou, G. Goumas, P. Maragos, N. Koziris GridNews: A distributed automatic Greek broadcast transcription system Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2009, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{196, title = {GridNews: A distributed automatic Greek broadcast transcription system}, author = { D. Dimitriadis and A. Metallinou and I. Konstantinou and G. Goumas and P. Maragos and N. Koziris}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMetallinouEtAl_Gridnews-AutomaticGreekTranscriptionSystem_ICASSP09.pdf}, doi = {10.1109/ICASSP.2009.4959984}, issn = {15206149}, year = {2009}, date = {2009-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {1917--1920}, abstract = {In this paper, a distributed system storing and retrieving Broad-cast News data recorded from the Greek television is presented. These multimodal data are processed in a grid computational en-vironment interconnecting distributed data storage and processing subsystems. The innovative element of this system is the implemen-tation of the signal processing algorithms in this grid environment, offering additional flexibility and computational power. Among the developed signal processing modules are: the Segmentor, cutting up the original videos into shorter ones, the Classifier, recognizing whether these short videos contain speech or not, the Greek large-vocabulary speech Recognizer, transcribing speech into written text, and finally the text Search engine and the video Retriever. All the processed data are stored and retrieved in geographically distributed storage elements. A user-friendly, web-based interface is developed, facilitating the transparent import and storage of new multimodal data, their off-line processing and finally, their search and retrieval.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, a distributed system storing and retrieving Broad-cast News data recorded from the Greek television is presented. These multimodal data are processed in a grid computational en-vironment interconnecting distributed data storage and processing subsystems. The innovative element of this system is the implemen-tation of the signal processing algorithms in this grid environment, offering additional flexibility and computational power. Among the developed signal processing modules are: the Segmentor, cutting up the original videos into shorter ones, the Classifier, recognizing whether these short videos contain speech or not, the Greek large-vocabulary speech Recognizer, transcribing speech into written text, and finally the text Search engine and the video Retriever. All the processed data are stored and retrieved in geographically distributed storage elements. A user-friendly, web-based interface is developed, facilitating the transparent import and storage of new multimodal data, their off-line processing and finally, their search and retrieval. |
2008 |
Costas S Tzafestas, Kostas Birbas, Yiannis Koumpouros, Dimitri Christopoulos Pilot Evaluation Study of a Virtual Paracentesis Simulator for Skill Training and Assessment: The Beneficial Effect of Haptic Display Journal Article Presence: Teleoperators & Virtual Environments, 17 (2), pp. 212–229, 2008, ISBN: 10547460. Abstract | BibTeX | Links: [Webpage] @article{1_98, title = {Pilot Evaluation Study of a Virtual Paracentesis Simulator for Skill Training and Assessment: The Beneficial Effect of Haptic Display}, author = {Costas S Tzafestas and Kostas Birbas and Yiannis Koumpouros and Dimitri Christopoulos}, url = {http://ezproxy.lib.swin.edu.au/login?url=http://search.ebscohost.com/login.aspx?direct=true&db=a9h&AN=31446380&site=ehost-live&scope=site}, isbn = {10547460}, year = {2008}, date = {2008-01-01}, journal = {Presence: Teleoperators & Virtual Environments}, volume = {17}, number = {2}, pages = {212--229}, abstract = {Effective, real-time training of health care professionals in invasive procedures is a challenging task. Furthermore, assessing in practice the acquisition of the dexterity and skills required to safely perform such operations is particularly difficult to perform objectively and reliably. The development of virtual reality (VR) simulators offers great potential toward these objectives, and can help bypass some of the difficulties associated with classical surgical training and assessment procedures. In this context, we have developed a prototype VR simulator platform for training in a class of invasive procedures, such as accessing central vessels. This paper focuses more particularly on a pilot study treating the specific application case of subclavian vein paracentesis. The simulation incorporates 3D models of all the human anatomy structures involved in this procedure, where collision detection and response algorithms are implemented to simulate most of the potential complications in accordance with the situations encountered in real clinical practice. Furthermore, haptic display is integrated using a typical force feedback device providing the user with a sense of touch during the simulated operations. Our main objective in this study was to obtain quantitative evaluation results regarding the effect of haptic display on performance. Two user groups participated in the study: (I) novice users and (II) experienced surgeons. The system automatically provides quantitative assessment scores of users' performance, applying a set of objective measures that also involve the optimality of the needle insertion path and indicators of maneuvering errors. Training and skill assessment performance of the system is evaluated in a twofold manner, regarding respectively: (a) the learning curve of novice users, and (b) the correlation of the system-generated scores with the actual surgical experience of the user. These performance indicators are assessed with respect to the activation of the haptic display and to whether this has any beneficial effect (or not). The experimental findings of this first pilot study provide quantitative evidence about the significance of haptic display, not only as a means to enhance the realism of the surgical simulation, but especially as an irreplaceable component for achieving objective and reliable skill assessment. Further larger-scale and long-term clinical studies are needed to validate the effectiveness of such platforms for actual training and dexterity enhancement, particularly when more complex sensorimotor skills are involved. [ABSTRACT FROM AUTHOR] Copyright of Presence: Teleoperators & Virtual Environments is the property of MIT Press and its content may not be copied or emailed to multiple sites or posted to a listserv without the copyright holder's express written permission. However, users may print, download, or email articles for individual use. This abstract may be abridged. No warranty is given about the accuracy of the copy. Users should refer to the original published version of the material for the full abstract. (Copyright applies to all Abstracts.)}, keywords = {}, pubstate = {published}, tppubtype = {article} } Effective, real-time training of health care professionals in invasive procedures is a challenging task. Furthermore, assessing in practice the acquisition of the dexterity and skills required to safely perform such operations is particularly difficult to perform objectively and reliably. The development of virtual reality (VR) simulators offers great potential toward these objectives, and can help bypass some of the difficulties associated with classical surgical training and assessment procedures. In this context, we have developed a prototype VR simulator platform for training in a class of invasive procedures, such as accessing central vessels. This paper focuses more particularly on a pilot study treating the specific application case of subclavian vein paracentesis. The simulation incorporates 3D models of all the human anatomy structures involved in this procedure, where collision detection and response algorithms are implemented to simulate most of the potential complications in accordance with the situations encountered in real clinical practice. Furthermore, haptic display is integrated using a typical force feedback device providing the user with a sense of touch during the simulated operations. Our main objective in this study was to obtain quantitative evaluation results regarding the effect of haptic display on performance. Two user groups participated in the study: (I) novice users and (II) experienced surgeons. The system automatically provides quantitative assessment scores of users' performance, applying a set of objective measures that also involve the optimality of the needle insertion path and indicators of maneuvering errors. Training and skill assessment performance of the system is evaluated in a twofold manner, regarding respectively: (a) the learning curve of novice users, and (b) the correlation of the system-generated scores with the actual surgical experience of the user. These performance indicators are assessed with respect to the activation of the haptic display and to whether this has any beneficial effect (or not). The experimental findings of this first pilot study provide quantitative evidence about the significance of haptic display, not only as a means to enhance the realism of the surgical simulation, but especially as an irreplaceable component for achieving objective and reliable skill assessment. Further larger-scale and long-term clinical studies are needed to validate the effectiveness of such platforms for actual training and dexterity enhancement, particularly when more complex sensorimotor skills are involved. [ABSTRACT FROM AUTHOR] Copyright of Presence: Teleoperators & Virtual Environments is the property of MIT Press and its content may not be copied or emailed to multiple sites or posted to a listserv without the copyright holder's express written permission. However, users may print, download, or email articles for individual use. This abstract may be abridged. No warranty is given about the accuracy of the copy. Users should refer to the original published version of the material for the full abstract. (Copyright applies to all Abstracts.) |
Anastasia Sofou, Petros Maragos Generalized flooding and multicue PDE-based image segmentation Journal Article IEEE Transactions on Image Processing, 17 (3), pp. 364–376, 2008, ISSN: 10577149. Abstract | BibTeX | Links: [PDF] @article{126, title = {Generalized flooding and multicue PDE-based image segmentation}, author = {Anastasia Sofou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SofouMaragos_GeneralizedFloodingMulticuePDEBasedImageSegm_ImageProc08.pdf}, doi = {10.1109/TIP.2007.916156}, issn = {10577149}, year = {2008}, date = {2008-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {17}, number = {3}, pages = {364--376}, abstract = {Image segmentation remains an important, but hard-to-solve, problem since it appears to be application dependent with usually no a priori information available regarding the image structure. Moreover, the increasing demands of image analysis tasks in terms of segmentation results' quality introduce the necessity of employing multiple cues for improving image segmentation results. In this paper, we attempt to incorporate cues such as intensity contrast, region size, and texture in the segmentation procedure and derive improved results compared to using individual cues separately. We emphasize on the overall segmentation procedure, and we propose efficient simplification operators and feature extraction schemes, capable of quantifying important characteristics, like geometrical complexity, rate of change in local contrast variations, and orientation, that eventually favor the final segmentation result. Based on the well-known morphological paradigm of watershed transform segmentation, which exploits intensity contrast and region size criteria, we investigate its partial differential equation (PDE) formulation, and we extend it in order to satisfy various flooding criteria, thus making it applicable to a wider range of images. Going a step further, we introduce a segmentation scheme that couples contrast criteria in flooding with texture information. The modeling of the proposed scheme is done via PDEs and the efficient incorporation of the available contrast and texture information, is done by selecting an appropriate cartoon-texture image decomposition scheme. The proposed coupled segmentation scheme is driven by two separate image components: cartoon U (for contrast information) and texture component V. The performance of the proposed segmentation scheme is demonstrated through a complete set of experimental results and substantiated using quantitative and qualitative criteria.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Image segmentation remains an important, but hard-to-solve, problem since it appears to be application dependent with usually no a priori information available regarding the image structure. Moreover, the increasing demands of image analysis tasks in terms of segmentation results' quality introduce the necessity of employing multiple cues for improving image segmentation results. In this paper, we attempt to incorporate cues such as intensity contrast, region size, and texture in the segmentation procedure and derive improved results compared to using individual cues separately. We emphasize on the overall segmentation procedure, and we propose efficient simplification operators and feature extraction schemes, capable of quantifying important characteristics, like geometrical complexity, rate of change in local contrast variations, and orientation, that eventually favor the final segmentation result. Based on the well-known morphological paradigm of watershed transform segmentation, which exploits intensity contrast and region size criteria, we investigate its partial differential equation (PDE) formulation, and we extend it in order to satisfy various flooding criteria, thus making it applicable to a wider range of images. Going a step further, we introduce a segmentation scheme that couples contrast criteria in flooding with texture information. The modeling of the proposed scheme is done via PDEs and the efficient incorporation of the available contrast and texture information, is done by selecting an appropriate cartoon-texture image decomposition scheme. The proposed coupled segmentation scheme is driven by two separate image components: cartoon U (for contrast information) and texture component V. The performance of the proposed segmentation scheme is demonstrated through a complete set of experimental results and substantiated using quantitative and qualitative criteria. |
I Kokkinos, R Deriche, O Faugeras, P Maragos Computational Analysis and Learning for a Biologically Motivated Model of Boundary Detection and Image Segmentation Journal Article Neurocomputing, 71 (10-12), pp. 1798–1812, 2008. @article{127, title = {Computational Analysis and Learning for a Biologically Motivated Model of Boundary Detection and Image Segmentation}, author = {I Kokkinos and R Deriche and O Faugeras and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KokkinosDericheFaugersMaragos_Neurocomputing_2008_preprint.pdf}, year = {2008}, date = {2008-01-01}, journal = {Neurocomputing}, volume = {71}, number = {10-12}, pages = {1798--1812}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Margarita Kotti, Dimitrios Ververidis, Georgios Evangelopoulos, Ioannis Panagakis, Constantine Kotropoulos, Petros Maragos, Ioannis Pitas Audio-assisted movie dialogue detection Journal Article IEEE Transactions on Circuits and Systems for Video Technology, 18 (11), pp. 1618–1627, 2008, ISSN: 10518215. Abstract | BibTeX | Links: [PDF] @article{128, title = {Audio-assisted movie dialogue detection}, author = {Margarita Kotti and Dimitrios Ververidis and Georgios Evangelopoulos and Ioannis Panagakis and Constantine Kotropoulos and Petros Maragos and Ioannis Pitas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Kotti_Audio-Assisted-Movie-Dialogue-Detection_ieeeTCSVT2008.pdf}, doi = {10.1109/TCSVT.2008.2005613}, issn = {10518215}, year = {2008}, date = {2008-01-01}, journal = {IEEE Transactions on Circuits and Systems for Video Technology}, volume = {18}, number = {11}, pages = {1618--1627}, abstract = {An audio-assisted system is investigated that detects if a movie scene is a dialogue or not. The system is based on actor indicator functions. That is, functions which define if an actor speaks at a certain time instant. In particular, the cross-correlation and the magnitude of the corresponding the cross-power spectral density of a pair of indicator functions are input to various classifiers, such as voted perceptrons, radial basis function networks, random trees, and support vector machines for dialogue/non-dialogue detection. To boost classifier efficiency AdaBoost is also exploited. The aforementioned classifiers are trained using ground truth indicator functions determined by human annotators for 41 dialogue and another 20 non-dialogue audio instances. For testing, actual indicator functions are derived by applying audio activity detection and actor clustering to audio recordings. 23 instances are randomly chosen among the aforementioned 41 dialogue instances, 17 of which correspond to dialogue scenes and 6 to non-dialogue ones. Accuracy ranging between 0.739 and 0.826 is reported.}, keywords = {}, pubstate = {published}, tppubtype = {article} } An audio-assisted system is investigated that detects if a movie scene is a dialogue or not. The system is based on actor indicator functions. That is, functions which define if an actor speaks at a certain time instant. In particular, the cross-correlation and the magnitude of the corresponding the cross-power spectral density of a pair of indicator functions are input to various classifiers, such as voted perceptrons, radial basis function networks, random trees, and support vector machines for dialogue/non-dialogue detection. To boost classifier efficiency AdaBoost is also exploited. The aforementioned classifiers are trained using ground truth indicator functions determined by human annotators for 41 dialogue and another 20 non-dialogue audio instances. For testing, actual indicator functions are derived by applying audio activity detection and actor clustering to audio recordings. 23 instances are randomly chosen among the aforementioned 41 dialogue instances, 17 of which correspond to dialogue scenes and 6 to non-dialogue ones. Accuracy ranging between 0.739 and 0.826 is reported. |
G Evangelopoulos, K Rapantzikos, A Potamianos, P Maragos, A Zlatintsi, Y Avrithis Movie Summarization based on Audiovisual Saliency Detection Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, San Diego, CA, U.S.A., 2008. Abstract | BibTeX | Links: [PDF] @conference{ERP+08, title = {Movie Summarization based on Audiovisual Saliency Detection}, author = {G Evangelopoulos and K Rapantzikos and A Potamianos and P Maragos and A Zlatintsi and Y Avrithis}, url = {http://robotics.ntua.gr/wp-content/publications/EvangelopoulosRapantzikosEtAl_MovieSum_ICIP2008_fancyhead.pdf}, year = {2008}, date = {2008-10-01}, booktitle = {Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing}, address = {San Diego, CA, U.S.A.}, abstract = {Based on perceptual and computational attention modeling studies, we formulate measures of saliency for an audiovisual stream. Audio saliency is captured by signal modulations and related multi-frequency band features, extracted through nonlinear operators and energy tracking. Visual saliency is measured by means of a spatiotemporal attention model driven by various feature cues (intensity, color, motion). Audio and video curves are integrated in a single attention curve, where events may be enhanced, suppressed or vanished. The presence of salient events is signified on this audiovisual curve by geometrical features such as local extrema, sharp transition points and level sets. An audiovisual saliency-based movie summarization algorithm is proposed and evaluated. The algorithm is shown to perform very well in terms of summary informativeness and enjoyability for movie clips of various genres.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Based on perceptual and computational attention modeling studies, we formulate measures of saliency for an audiovisual stream. Audio saliency is captured by signal modulations and related multi-frequency band features, extracted through nonlinear operators and energy tracking. Visual saliency is measured by means of a spatiotemporal attention model driven by various feature cues (intensity, color, motion). Audio and video curves are integrated in a single attention curve, where events may be enhanced, suppressed or vanished. The presence of salient events is signified on this audiovisual curve by geometrical features such as local extrema, sharp transition points and level sets. An audiovisual saliency-based movie summarization algorithm is proposed and evaluated. The algorithm is shown to perform very well in terms of summary informativeness and enjoyability for movie clips of various genres. |
D Spachos, A Zlatintsi, V Moschou, P Antonopoulos, E Benetos, M Kotti, K Tzimouli, C Kotropoulos, N Nikolaidis, P Maragos, I Pitas MUSCLE Movie Database: A Multimodal Corpus With Rich Annotation For Dialogue And Saliency Detection Conference Marrakech, Morocco, 2008. Abstract | BibTeX | Links: [PDF] @conference{SZM+-8, title = {MUSCLE Movie Database: A Multimodal Corpus With Rich Annotation For Dialogue And Saliency Detection}, author = {D Spachos and A Zlatintsi and V Moschou and P Antonopoulos and E Benetos and M Kotti and K Tzimouli and C Kotropoulos and N Nikolaidis and P Maragos and I Pitas}, url = {http://robotics.ntua.gr/wp-content/publications/SpachosZlatintsi+_MuscleMovieDatabase_LREC08.pdf}, year = {2008}, date = {2008-05-01}, address = {Marrakech, Morocco}, abstract = {Semantic annotation of multimedia content is important for training, testing, and assessing content-based algorithms for indexing, organization, browsing, and retrieval. To this end, an annotated multimodal movie corpus has been collected to be used as a test bed for development and assessment of content-based multimedia processing, such as speaker clustering, speaker turn detection, visual speech activity detection, face detection, face clustering, scene segmentation, saliency detection, and visual dialogue detection. All metadata are saved in XML format following the MPEG-7 ISO prototype to ensure data compatibility and reusability. The entire MUSCLE movie database is available for download through the web. Visual speech activity and dialogue detection algorithms that have been developed within the software package DIVA3D and tested on this database are also briefly described. Furthermore, we review existing annotation tools with emphasis on the novel annotation tool Anthropos7 Editor.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Semantic annotation of multimedia content is important for training, testing, and assessing content-based algorithms for indexing, organization, browsing, and retrieval. To this end, an annotated multimodal movie corpus has been collected to be used as a test bed for development and assessment of content-based multimedia processing, such as speaker clustering, speaker turn detection, visual speech activity detection, face detection, face clustering, scene segmentation, saliency detection, and visual dialogue detection. All metadata are saved in XML format following the MPEG-7 ISO prototype to ensure data compatibility and reusability. The entire MUSCLE movie database is available for download through the web. Visual speech activity and dialogue detection algorithms that have been developed within the software package DIVA3D and tested on this database are also briefly described. Furthermore, we review existing annotation tools with emphasis on the novel annotation tool Anthropos7 Editor. |
Manthos Alifragis, Andreas Mantelos, Costas S. Tzafestas Web-based remote and virtual programming console of the V+ robotic system Conference IECON Proceedings (Industrial Electronics Conference), 2008, ISSN: 1553-572X. @conference{42b, title = {Web-based remote and virtual programming console of the V+ robotic system}, author = { Manthos Alifragis and Andreas Mantelos and Costas S. Tzafestas}, doi = {10.1109/IECON.2008.4758534}, issn = {1553-572X}, year = {2008}, date = {2008-01-01}, booktitle = {IECON Proceedings (Industrial Electronics Conference)}, pages = {3551--3555}, abstract = {The objective of practical training is a major issue in students education, in many engineering disciplines. The access to specialized technological equipment for education is often limited by specific time restriction, or not provided at all. Therefore, the benefits by providing a Web-based platform for remote experimentation via LAN or Internet are evident. This paper describes the development of an e-laboratory platform intending to be used as a distance training system in the field of robotic task planning (e.g. programming of a robotic pick and place task). In prior work, this platform was evaluated by training students remotely to implement robotic tasks, using the robotpsilas Teach Pendant. This paper is focusing on the design of a training platform, aiming to make students familiar with the Vtextlesssuptextgreater+textless/suptextgreater robotic operating system. The proposed platform intends to remotely provide the students with the ability of programming robotic manipulation tasks using directly Vtextlesssuptextgreater+textless/suptextgreater scripts. An evaluation protocol, presented in [11], [12], is considered to be employed in the near future, in order to assess the performance of the proposed e-laboratory platform, with respect to the level of students learning and assimilating of the robotpsilas programming language (Vtextlesssuptextgreater+textless/suptextgreater).}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The objective of practical training is a major issue in students education, in many engineering disciplines. The access to specialized technological equipment for education is often limited by specific time restriction, or not provided at all. Therefore, the benefits by providing a Web-based platform for remote experimentation via LAN or Internet are evident. This paper describes the development of an e-laboratory platform intending to be used as a distance training system in the field of robotic task planning (e.g. programming of a robotic pick and place task). In prior work, this platform was evaluated by training students remotely to implement robotic tasks, using the robotpsilas Teach Pendant. This paper is focusing on the design of a training platform, aiming to make students familiar with the Vtextlesssuptextgreater+textless/suptextgreater robotic operating system. The proposed platform intends to remotely provide the students with the ability of programming robotic manipulation tasks using directly Vtextlesssuptextgreater+textless/suptextgreater scripts. An evaluation protocol, presented in [11], [12], is considered to be employed in the near future, in order to assess the performance of the proposed e-laboratory platform, with respect to the level of students learning and assimilating of the robotpsilas programming language (Vtextlesssuptextgreater+textless/suptextgreater). |
Michael Aron, Anastasios Roussos, Marie Odile Berger, Erwan Kerrien, Petros Maragos Multimodality acquisition of articulatory data and processing Conference European Signal Processing Conference, 2008, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{205, title = {Multimodality acquisition of articulatory data and processing}, author = { Michael Aron and Anastasios Roussos and Marie Odile Berger and Erwan Kerrien and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/AronRoussosBergerKerrienMaragos_MultimodAcquisArticulDataProcessing_EUSIPCO2008.pdf}, issn = {22195491}, year = {2008}, date = {2008-01-01}, booktitle = {European Signal Processing Conference}, abstract = {In this paper1, a framework to acquire and process dynamic data of the tongue during speech processing is presented. First, a setup to acquire data of the tongue shape combining ultrasound images, electromagnetic localization sensors and sound is presented. Techniques to automatically calibrate and synchronize the data are described. A method to extract the tongue shape is then proposed, by combining a preprocessing of the ultrasound images with an imagebased tracking method that integrates adapted constraints. copyright by EURASIP.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper1, a framework to acquire and process dynamic data of the tongue during speech processing is presented. First, a setup to acquire data of the tongue shape combining ultrasound images, electromagnetic localization sensors and sound is presented. Techniques to automatically calibrate and synchronize the data are described. A method to extract the tongue shape is then proposed, by combining a preprocessing of the ultrasound images with an imagebased tracking method that integrates adapted constraints. copyright by EURASIP. |
George Caridakis, Olga Diamanti, Kostas Karpouzis, Petros Maragos Automatic sign language recognition Conference Proceedings of the 1st ACM international conference on PErvasive Technologies Related to Assistive Environments - PETRA '08, 2008, ISBN: 9781605580678. Abstract | BibTeX | Links: [Webpage] @conference{208, title = {Automatic sign language recognition}, author = { George Caridakis and Olga Diamanti and Kostas Karpouzis and Petros Maragos}, url = {http://portal.acm.org/citation.cfm?doid=1389586.1389687}, doi = {10.1145/1389586.1389687}, isbn = {9781605580678}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of the 1st ACM international conference on PErvasive Technologies Related to Assistive Environments - PETRA '08}, pages = {1}, abstract = {This work focuses on two of the research problems comprising automatic sign language recognition, namely robust computer vision techniques for consistent hand detection and tracking, while preserving the hand shape contour which is useful for extraction of features related to the handshape and a novel classification scheme incorporating Self-organizing maps, Markov chains and Hidden Markov Models. Geodesic Active Contours enhanced with skin color and motion information are employed for the hand detection and the extraction of the hand silhouette, while features extracted describe hand trajectory, region and shape. Extracted features are used as input to separate classifiers, forming a robust and adaptive architecture whose main contribution is the optimal utilization of the neighboring characteristic of the SOM during the decoding stage of the Markov chain, representing the sign class.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This work focuses on two of the research problems comprising automatic sign language recognition, namely robust computer vision techniques for consistent hand detection and tracking, while preserving the hand shape contour which is useful for extraction of features related to the handshape and a novel classification scheme incorporating Self-organizing maps, Markov chains and Hidden Markov Models. Geodesic Active Contours enhanced with skin color and motion information are employed for the hand detection and the extraction of the hand silhouette, while features extracted describe hand trajectory, region and shape. Extracted features are used as input to separate classifiers, forming a robust and adaptive architecture whose main contribution is the optimal utilization of the neighboring characteristic of the SOM during the decoding stage of the Markov chain, representing the sign class. |
Olga Diamanti, Petros Maragos Geodesic active regions for segmentation and tracking of human gestures in sign language videos Conference 2008 15th IEEE International Conference on Image Processing, 2008, ISSN: 1522-4880. @conference{202, title = {Geodesic active regions for segmentation and tracking of human gestures in sign language videos}, author = { Olga Diamanti and Petros Maragos}, url = {http://ieeexplore.ieee.org/document/4711950/}, doi = {10.1109/ICIP.2008.4711950}, issn = {1522-4880}, year = {2008}, date = {2008-01-01}, booktitle = {2008 15th IEEE International Conference on Image Processing}, pages = {1096--1099}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Evangelopoulos, P Maragos Texture Modulation-Constrained Image Decomposition Conference Proc. Int'l Conference on Image Processing (ICIP-2008), San Diego, California, Oct. 2008, 2008. @conference{200, title = {Texture Modulation-Constrained Image Decomposition}, author = { G Evangelopoulos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EvangelopoulosMaragos_ukv_Image Decomposition_ICIP08.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Proc. Int'l Conference on Image Processing (ICIP-2008), San Diego, California, Oct. 2008}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G. Evangelopoulos, K. Rapantzikos, A. Potamianos, P. Maragos, A. Zlatintsi, Y. Avrithis Movie summarization based on audiovisual saliency detection Conference Proceedings - International Conference on Image Processing, ICIP, 2008, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{203, title = {Movie summarization based on audiovisual saliency detection}, author = { G. Evangelopoulos and K. Rapantzikos and A. Potamianos and P. Maragos and A. Zlatintsi and Y. Avrithis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ERPMZA_MovieSummarizAVSaliency_ICIP2008.pdf}, doi = {10.1109/ICIP.2008.4712308}, issn = {15224880}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {2528--2531}, abstract = {Based on perceptual and computational attention modeling studies, we formulate measures of saliency for an audiovisual stream. Audio saliency is captured by signal modulations and related multi-frequency band features, extracted through nonlinear operators and energy tracking. Visual saliency is measured by means of a spatiotemporal attention model driven by various feature cues (intensity, color, motion). Audio and video curves are integrated in a single attention curve, where events may be enhanced, suppressed or vanished. The presence of salient events is signified on this audiovisual curve by geometrical features such as local extrema, sharp transition points and level sets. An audiovisual saliency-based movie summarization algorithm is proposed and evaluated. The algorithm is shown to perform very well in terms of summary informativeness and enjoyability for movie clips of various genres.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Based on perceptual and computational attention modeling studies, we formulate measures of saliency for an audiovisual stream. Audio saliency is captured by signal modulations and related multi-frequency band features, extracted through nonlinear operators and energy tracking. Visual saliency is measured by means of a spatiotemporal attention model driven by various feature cues (intensity, color, motion). Audio and video curves are integrated in a single attention curve, where events may be enhanced, suppressed or vanished. The presence of salient events is signified on this audiovisual curve by geometrical features such as local extrema, sharp transition points and level sets. An audiovisual saliency-based movie summarization algorithm is proposed and evaluated. The algorithm is shown to perform very well in terms of summary informativeness and enjoyability for movie clips of various genres. |
Georgios Evangelopoulos Image Decomposition into Structure and Texture Subcomponents with Conference Image (Rochester, N.Y.), 2008, ISBN: 9781424422432. @conference{210, title = {Image Decomposition into Structure and Texture Subcomponents with}, author = { Georgios Evangelopoulos}, isbn = {9781424422432}, year = {2008}, date = {2008-01-01}, booktitle = {Image (Rochester, N.Y.)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
John N. Karigiannis, Costas S. Tzafestas Multi-agent hierarchical architecture modeling kinematic chains employing continuous RL learning with fuzzified state space Conference Proceedings of the 2nd Biennial IEEE/RAS-EMBS International Conference on Biomedical Robotics and Biomechatronics, BioRob 2008, 2008, ISBN: 9781424428830. @conference{41b, title = {Multi-agent hierarchical architecture modeling kinematic chains employing continuous RL learning with fuzzified state space}, author = { John N. Karigiannis and Costas S. Tzafestas}, doi = {10.1109/BIOROB.2008.4762862}, isbn = {9781424428830}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of the 2nd Biennial IEEE/RAS-EMBS International Conference on Biomedical Robotics and Biomechatronics, BioRob 2008}, pages = {716--723}, abstract = {In the context of multi-agent systems, we are proposing a hierarchical robot control architecture that comprises artificial intelligence (AI) techniques and traditional control methodologies, based on the realization of a learning team of agents in a continuous problem setting. In a multiagent system, action selection is important for cooperation and coordination among the agents. By employing reinforcement learning (RL) methods in a fuzzified state-space, we accomplish to design a control architecture and a corresponding methodology, engaged in a continuous space, which enables the agents to learn, over a period of time, to perform sequences of continuous actions in a cooperative manner, in order to reach their goal without any prior generated task model. By organizing the agents in a nested architecture, as proposed in this work, a type of problem-specific recursive knowledge acquisition is attempted. Furthermore, the agents try to exploit the knowledge gathered in order to be in position to execute tasks that indicate certain degree of similarity. The agents correspond in fact to independent degrees of freedom of the system, and achieve to gain experience over the task that they collaboratively perform, by exploring and exploiting their state-to-action mapping space. A numerical experiment is presented in this paper, performed on a simulated planar 4 degrees of freedom (DOF) manipulator, in order to evaluate both the proposed hierarchical multiagent architecture as well as the proposed methodological framework. It is anticipated that such an approach can be highly scalable for the control of robotic systems that are kinematically more complex, comprising multiple DOFs and potentially redundancies in open or closed kinematic chains, particularly dexterous manipulators. textcopyright 2008 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In the context of multi-agent systems, we are proposing a hierarchical robot control architecture that comprises artificial intelligence (AI) techniques and traditional control methodologies, based on the realization of a learning team of agents in a continuous problem setting. In a multiagent system, action selection is important for cooperation and coordination among the agents. By employing reinforcement learning (RL) methods in a fuzzified state-space, we accomplish to design a control architecture and a corresponding methodology, engaged in a continuous space, which enables the agents to learn, over a period of time, to perform sequences of continuous actions in a cooperative manner, in order to reach their goal without any prior generated task model. By organizing the agents in a nested architecture, as proposed in this work, a type of problem-specific recursive knowledge acquisition is attempted. Furthermore, the agents try to exploit the knowledge gathered in order to be in position to execute tasks that indicate certain degree of similarity. The agents correspond in fact to independent degrees of freedom of the system, and achieve to gain experience over the task that they collaboratively perform, by exploring and exploiting their state-to-action mapping space. A numerical experiment is presented in this paper, performed on a simulated planar 4 degrees of freedom (DOF) manipulator, in order to evaluate both the proposed hierarchical multiagent architecture as well as the proposed methodological framework. It is anticipated that such an approach can be highly scalable for the control of robotic systems that are kinematically more complex, comprising multiple DOFs and potentially redundancies in open or closed kinematic chains, particularly dexterous manipulators. textcopyright 2008 IEEE. |
A Katsamanis, G Papandreou, P Maragos Audiovisual-to-Articulatory Speech Inversion Using Active Appearance Models for the Face and Hidden Markov Models for the Dynamics Conference IEEE Int. Conference on Acoustics, Speech, and Signal Processing, 2008. @conference{214, title = {Audiovisual-to-Articulatory Speech Inversion Using Active Appearance Models for the Face and Hidden Markov Models for the Dynamics}, author = { A Katsamanis and G Papandreou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisPapandreouMaragos_AamAvHmmInversion_icassp08.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {IEEE Int. Conference on Acoustics, Speech, and Signal Processing}, pages = {2237--2240}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A Katsamanis, A Roussos, P Maragos, M Aron, M.-O. Berger Inversion from audiovisual speech to articulatory information by exploiting multimodal data Conference Proceedings of ISSP 2008 - 8th International Seminar on Speech Production, 2008. Abstract | BibTeX | Links: [Webpage] @conference{198, title = {Inversion from audiovisual speech to articulatory information by exploiting multimodal data}, author = { A Katsamanis and A Roussos and P Maragos and M Aron and M.-O. Berger}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84902379110&partnerID=40&md5=e9c293b717940a725c6d5239569fab7d}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of ISSP 2008 - 8th International Seminar on Speech Production}, pages = {301--304}, abstract = {We present an inversion framework to identify speech production properties from audiovisual information. Our system is built on a multimodal articulatory dataset comprising ultrasound, X-ray, magnetic resonance images, electromagnetic articulography data as well as audio and stereovisual recordings of the speaker. Visual information is captured via stereovision while the vocal tract state is represented by a properly trained articulatory model. The audiovisual-to-articulation relationship is approximated by an adaptive piecewise linear mapping. The presented system can recover the hidden vocal tract shapes and may serve as a basis for a more widely applicable inversion setup.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We present an inversion framework to identify speech production properties from audiovisual information. Our system is built on a multimodal articulatory dataset comprising ultrasound, X-ray, magnetic resonance images, electromagnetic articulography data as well as audio and stereovisual recordings of the speaker. Visual information is captured via stereovision while the vocal tract state is represented by a properly trained articulatory model. The audiovisual-to-articulation relationship is approximated by an adaptive piecewise linear mapping. The presented system can recover the hidden vocal tract shapes and may serve as a basis for a more widely applicable inversion setup. |
A. Katsamanis, G. Ananthakrishnan, G. Papandreou, P. Maragos, O. Engwall Audiovisual speech inversion by switching dynamical modeling governed by a Hidden Markov process Conference European Signal Processing Conference, 2008, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{206, title = {Audiovisual speech inversion by switching dynamical modeling governed by a Hidden Markov process}, author = { A. Katsamanis and G. Ananthakrishnan and G. Papandreou and P. Maragos and O. Engwall}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisAnanthPapandreouMaragosEngwall_AV-Speechinvers-SwitchDynModel-HidMarkov_EUSIPCO2008.pdf}, issn = {22195491}, year = {2008}, date = {2008-01-01}, booktitle = {European Signal Processing Conference}, abstract = {We propose a unified framework to recover articulation from$backslash$naudiovisual speech. The nonlinear audiovisual-to-articulatory$backslash$nmapping is modeled by means of a switching linear dynamical$backslash$nsystem. Switching is governed by a state sequence determined via a$backslash$nHidden Markov Model alignment process. Mel Frequency Cepstral$backslash$nCoefficients are extracted from audio while visual analysis is$backslash$nperformed using Active Appearance Models. The articulatory state is$backslash$nrepresented by the coordinates of points on important articulators,$backslash$ne.g., tongue and lips. To evaluate our inversion approach, instead$backslash$nof just using the conventional correlation coefficients and root$backslash$nmean squared errors, we introduce a novel evaluation scheme that is$backslash$nmore specific to the inversion problem. Prediction errors in the$backslash$npositions of the articulators are weighted differently depending on$backslash$ntheir relevant importance in the production of the corresponding$backslash$nsound. The applied weights are determined by an articulatory$backslash$nclassification analysis using Support Vector Machines with a radial$backslash$nbasis function kernel. Experiments are conducted in the$backslash$naudiovisual-articulatory MOCHA database.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose a unified framework to recover articulation from$backslash$naudiovisual speech. The nonlinear audiovisual-to-articulatory$backslash$nmapping is modeled by means of a switching linear dynamical$backslash$nsystem. Switching is governed by a state sequence determined via a$backslash$nHidden Markov Model alignment process. Mel Frequency Cepstral$backslash$nCoefficients are extracted from audio while visual analysis is$backslash$nperformed using Active Appearance Models. The articulatory state is$backslash$nrepresented by the coordinates of points on important articulators,$backslash$ne.g., tongue and lips. To evaluate our inversion approach, instead$backslash$nof just using the conventional correlation coefficients and root$backslash$nmean squared errors, we introduce a novel evaluation scheme that is$backslash$nmore specific to the inversion problem. Prediction errors in the$backslash$npositions of the articulators are weighted differently depending on$backslash$ntheir relevant importance in the production of the corresponding$backslash$nsound. The applied weights are determined by an articulatory$backslash$nclassification analysis using Support Vector Machines with a radial$backslash$nbasis function kernel. Experiments are conducted in the$backslash$naudiovisual-articulatory MOCHA database. |
Yves Laprie, Petros Maragos, Jean Schoentgen How can acoustic-to-articulatory maps be constrained? Conference European Signal Processing Conference, 2008, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{207, title = {How can acoustic-to-articulatory maps be constrained?}, author = { Yves Laprie and Petros Maragos and Jean Schoentgen}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LaprieMaragosSchoentgen_HowCanAcousticToArticulatoryMapsBeConstrained_EUSIPCO2008.pdf}, issn = {22195491}, year = {2008}, date = {2008-01-01}, booktitle = {European Signal Processing Conference}, abstract = {The objective of the presentation is to examine issues in constraining$backslash$nacoustic-to-articulatory maps by means of facial data and other apriori$backslash$nknowledge regarding speech production. Constraints that are considered are$backslash$nthe insertion of data on lip opening, spread and protrusion, as well as$backslash$nother facial data together with constraints on the vocal tract length. A$backslash$npriori knowledge that has been taken into account concerns the deformation$backslash$nand speed of deformation of the vocal tract as well as phonetic rules$backslash$nregarding vowel-typical tract shapes. Inverse maps that have been tested$backslash$nare formant-to-area and formant-to-parametric sagittal profile maps as well$backslash$nas audio/visual-to-electromagnetic coil trajectory maps. The results$backslash$nobtained while mapping audio-only data compared to audio combined with$backslash$nother data are discussed.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The objective of the presentation is to examine issues in constraining$backslash$nacoustic-to-articulatory maps by means of facial data and other apriori$backslash$nknowledge regarding speech production. Constraints that are considered are$backslash$nthe insertion of data on lip opening, spread and protrusion, as well as$backslash$nother facial data together with constraints on the vocal tract length. A$backslash$npriori knowledge that has been taken into account concerns the deformation$backslash$nand speed of deformation of the vocal tract as well as phonetic rules$backslash$nregarding vowel-typical tract shapes. Inverse maps that have been tested$backslash$nare formant-to-area and formant-to-parametric sagittal profile maps as well$backslash$nas audio/visual-to-electromagnetic coil trajectory maps. The results$backslash$nobtained while mapping audio-only data compared to audio combined with$backslash$nother data are discussed. |
G. P. Maragos Papandreou, A. Kokaram. Image inpainting with a wavelet domain hidden Markov tree model Conference Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008, 2008. @conference{213, title = {Image inpainting with a wavelet domain hidden Markov tree model}, author = { G. P. Maragos} {Papandreou and A. Kokaram.}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PapandreouMaragosKokaram_HmtInpainting_icassp08.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
George Papandreou, Petros Maragos Adaptive and constrained algorithms for inverse compositional active appearance model fitting Conference 26th IEEE Conference on Computer Vision and Pattern Recognition, CVPR, 2008, ISSN: 1063-6919. @conference{209, title = {Adaptive and constrained algorithms for inverse compositional active appearance model fitting}, author = { George Papandreou and Petros Maragos}, doi = {10.1109/CVPR.2008.4587540}, issn = {1063-6919}, year = {2008}, date = {2008-01-01}, booktitle = {26th IEEE Conference on Computer Vision and Pattern Recognition, CVPR}, abstract = {Parametric models of shape and texture such as active appearance models (AAMs) are diverse tools for deformable object appearance modeling and have found important applications in both image synthesis and analysis problems. Among the numerous algorithms that have been proposed for AAM fitting, those based on the inverse-compositional image alignment technique have recently received considerable attention due to their potential for high efficiency. However, existing fitting algorithms perform poorly when used in conjunction with models exhibiting significant appearance variation, such as AAMs trained on multiple-subject human face images. We introduce two enhancements to inverse-compositional AAM matching algorithms in order to overcome this limitation. First, we propose fitting algorithm adaptation, by means of (a) fitting matrix adjustment and (b) AAM mean template update. Second, we show how prior information can be incorporated and constrain the AAM fitting process. The inverse-compositional nature of the algorithm allows efficient implementation of these enhancements. Both techniques substantially improve AAM fitting performance, as demonstrated with experiments on publicly available multi-face datasets.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Parametric models of shape and texture such as active appearance models (AAMs) are diverse tools for deformable object appearance modeling and have found important applications in both image synthesis and analysis problems. Among the numerous algorithms that have been proposed for AAM fitting, those based on the inverse-compositional image alignment technique have recently received considerable attention due to their potential for high efficiency. However, existing fitting algorithms perform poorly when used in conjunction with models exhibiting significant appearance variation, such as AAMs trained on multiple-subject human face images. We introduce two enhancements to inverse-compositional AAM matching algorithms in order to overcome this limitation. First, we propose fitting algorithm adaptation, by means of (a) fitting matrix adjustment and (b) AAM mean template update. Second, we show how prior information can be incorporated and constrain the AAM fitting process. The inverse-compositional nature of the algorithm allows efficient implementation of these enhancements. Both techniques substantially improve AAM fitting performance, as demonstrated with experiments on publicly available multi-face datasets. |
Eftychios A. Pnevmatikakis, Petros Maragos An inpainting system for automatic image structure-texture restoration with text removal Conference Proceedings - International Conference on Image Processing, ICIP, 2008, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{201, title = {An inpainting system for automatic image structure-texture restoration with text removal}, author = { Eftychios A. Pnevmatikakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PnevmatikakisMaragos_TextureRestorationWithTextRemoval_ICIP08.pdf}, doi = {10.1109/ICIP.2008.4712330}, issn = {15224880}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {2616--2619}, abstract = {In this paper we deal with the inpainting problem and with the problem of finding text in images. We first review many of the methods used for structure and texture inpaintings. The novel contribution of the paper is the combination of the inpainting techniques with the techniques of finding text in images and a simple morphological algorithm that links them. This combination results in an automatic system for text removal and image restoration that requires no user interface at all. Examples on real images show very good performance of the proposed system and the importance of the new linking algorithm.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we deal with the inpainting problem and with the problem of finding text in images. We first review many of the methods used for structure and texture inpaintings. The novel contribution of the paper is the combination of the inpainting techniques with the techniques of finding text in images and a simple morphological algorithm that links them. This combination results in an automatic system for text removal and image restoration that requires no user interface at all. Examples on real images show very good performance of the proposed system and the importance of the new linking algorithm. |
D Spachos, A Zlatintsi MUSCLE movie database: A multimodal corpus with rich annotation for dialogue and saliency detection Conference Programme of the Workshop on Multimodal Corpora, 2008. @conference{211, title = {MUSCLE movie database: A multimodal corpus with rich annotation for dialogue and saliency detection}, author = { D Spachos and A Zlatintsi}, url = {http://users.uoi.gr/cs01702/MargaritaKotti/MypublicationsPDFs/Muscle movie.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Programme of the Workshop on Multimodal Corpora}, pages = {16}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stamatios Lefkimmiatis , George Papandreou, Petros Maragos PHOTON-LIMITED IMAGE DENOISING BY INFERENCE ON MULTISCALE MODELS Conference Proc. Int’l Conference on Image Processing, 2008, ISBN: 9781424417643. @conference{204, title = {PHOTON-LIMITED IMAGE DENOISING BY INFERENCE ON MULTISCALE MODELS}, author = {Stamatios Lefkimmiatis , George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LefkimmiatisPapandreouMaragos_PhotonLimitedImageDenoisingByInferenceMultiscaleModels_ICIP08.pdf}, isbn = {9781424417643}, year = {2008}, date = {2008-01-01}, booktitle = {Proc. Int’l Conference on Image Processing}, pages = {2332--2335}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stamatios Lefkimmiatis, Petros Maragos, Athanassios Katsamanis MULTISENSOR MULTIBAND CROSS-ENERGY TRACKING FOR FEATURE EXTRACTION AND RECOGNITION Conference Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008, 2008. @conference{212, title = {MULTISENSOR MULTIBAND CROSS-ENERGY TRACKING FOR FEATURE EXTRACTION AND RECOGNITION }, author = {Stamatios Lefkimmiatis and Petros Maragos and Athanassios Katsamanis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LefkimmiatisMaragosKatsamanis_MultisensorMBandASR_icassp08.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Costas Tzafestas, Spyros Velanas, George Fakiridis Adaptive impedance control in haptic teleoperation to improve transparency under time-delay Conference Proceedings - IEEE International Conference on Robotics and Automation, 2008, ISSN: 10504729. @conference{43b, title = {Adaptive impedance control in haptic teleoperation to improve transparency under time-delay}, author = { Costas Tzafestas and Spyros Velanas and George Fakiridis}, doi = {10.1109/ROBOT.2008.4543211}, issn = {10504729}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings - IEEE International Conference on Robotics and Automation}, pages = {212--219}, abstract = {This paper proposes the application of an adaptive impedance control scheme to alleviate some of the problems associated with the presence of time delays in a haptic teleoperation system. Continuous on-line estimation of the remote environment's impedance is performed, and is then used as a local model for haptic display control. Lyapunov stability of the proposed impedance adaptation law is demonstrated. A series of experiments is performed to evaluate the performance of this teleoperation control scheme. Two performance measures are defined to assess transparency and stability of the teleoperator. Simulation results show the superior performance of the proposed adaptive scheme, with respect to direct teleoperation, particularly in terms of increasing the stability margin and of significantly ameliorating transparency in the presence of large time delays. Experimental results, using a phantom omni as the haptic master device, support this conclusion.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper proposes the application of an adaptive impedance control scheme to alleviate some of the problems associated with the presence of time delays in a haptic teleoperation system. Continuous on-line estimation of the remote environment's impedance is performed, and is then used as a local model for haptic display control. Lyapunov stability of the proposed impedance adaptation law is demonstrated. A series of experiments is performed to evaluate the performance of this teleoperation control scheme. Two performance measures are defined to assess transparency and stability of the teleoperator. Simulation results show the superior performance of the proposed adaptive scheme, with respect to direct teleoperation, particularly in terms of increasing the stability margin and of significantly ameliorating transparency in the presence of large time delays. Experimental results, using a phantom omni as the haptic master device, support this conclusion. |
Costas S. Tzafestas, Kostas Birbas, Yiannis Koumpouros, Dimitris Christopoulos Pilot evaluation study of a virtual paracentesis simulator for skill training and assessment: The beneficial effect of haptic display Conference Presence: Teleoperators and Virtual Environments, 17 (2), 2008, ISSN: 10547460. Abstract | BibTeX | Links: [PDF] @conference{49b, title = {Pilot evaluation study of a virtual paracentesis simulator for skill training and assessment: The beneficial effect of haptic display}, author = { Costas S. Tzafestas and Kostas Birbas and Yiannis Koumpouros and Dimitris Christopoulos}, url = {http://users.softlab.ece.ntua.gr/~ktzaf/Publications/EuroHaptics2006_Tzafestas_finalpaper_91.pdf}, doi = {10.1162/pres.17.2.212}, issn = {10547460}, year = {2008}, date = {2008-01-01}, booktitle = {Presence: Teleoperators and Virtual Environments}, volume = {17}, number = {2}, pages = {212--229}, abstract = {Abstract Effective, real-time training of health care professionals in invasive procedures is a challenging task. Furthermore, assessing in practice the acquisition of the dexterity and skills required to safely perform such operations is particularly difficult to per- form objectively and reliably. The development of virtual reality (VR) simulators offers great potential toward these objectives, and can help bypass some of the difficulties associated with classical surgical training and assessment procedures. In this context, we have developed a prototype VR simulator platform for training in a class of invasive procedures, such as accessing central vessels. This paper focuses more particularly on a pilot study treating the specific application case of subclavian vein paracentesis. The simulation incorporates 3D models of all the human anatomy structures involved in this procedure, where collision detection and response algo- rithms are implemented to simulate most of the potential complications in accor- dance with the situations encountered in real clinical practice. Furthermore, haptic display is integrated using a typical force feedback device providing the user with a sense of touch during the simulated operations. Our main objective in this study was to obtain quantitative evaluation results regarding the effect of haptic display on performance. Two user groups participated in the study: (I) novice users and (II) experienced surgeons. The system automatically provides quantitative assessment scores of users' performance, applying a set of objective measures that also involve the optimality of the needle insertion path and indicators of maneuvering errors. Training and skill assessment performance of the system is evaluated in a twofold manner, regarding respectively: (a) the learning curve of novice users, and (b) the correlation of the system-generated scores with the actual surgical experience of the user. These performance indicators are assessed with respect to the activation of the haptic display and to whether this has any beneficial effect (or not). The ex- perimental findings of this first pilot study provide quantitative evidence about the significance of haptic display, not only as a means to enhance the realism of the surgical simulation, but especially as an irreplaceable component for achieving objec- tive and reliable skill assessment. Further larger-scale and long-term clinical studies are needed to validate the effectiveness of such platforms for actual training and dexterity enhancement, particularly when more complex sensorimotor skills are involved.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Abstract Effective, real-time training of health care professionals in invasive procedures is a challenging task. Furthermore, assessing in practice the acquisition of the dexterity and skills required to safely perform such operations is particularly difficult to per- form objectively and reliably. The development of virtual reality (VR) simulators offers great potential toward these objectives, and can help bypass some of the difficulties associated with classical surgical training and assessment procedures. In this context, we have developed a prototype VR simulator platform for training in a class of invasive procedures, such as accessing central vessels. This paper focuses more particularly on a pilot study treating the specific application case of subclavian vein paracentesis. The simulation incorporates 3D models of all the human anatomy structures involved in this procedure, where collision detection and response algo- rithms are implemented to simulate most of the potential complications in accor- dance with the situations encountered in real clinical practice. Furthermore, haptic display is integrated using a typical force feedback device providing the user with a sense of touch during the simulated operations. Our main objective in this study was to obtain quantitative evaluation results regarding the effect of haptic display on performance. Two user groups participated in the study: (I) novice users and (II) experienced surgeons. The system automatically provides quantitative assessment scores of users' performance, applying a set of objective measures that also involve the optimality of the needle insertion path and indicators of maneuvering errors. Training and skill assessment performance of the system is evaluated in a twofold manner, regarding respectively: (a) the learning curve of novice users, and (b) the correlation of the system-generated scores with the actual surgical experience of the user. These performance indicators are assessed with respect to the activation of the haptic display and to whether this has any beneficial effect (or not). The ex- perimental findings of this first pilot study provide quantitative evidence about the significance of haptic display, not only as a means to enhance the realism of the surgical simulation, but especially as an irreplaceable component for achieving objec- tive and reliable skill assessment. Further larger-scale and long-term clinical studies are needed to validate the effectiveness of such platforms for actual training and dexterity enhancement, particularly when more complex sensorimotor skills are involved. |
Petros Maragos, Corinne Vachier A PDE formulation for viscous morphological operators with extensions to intensity-adaptive operators Conference Proc. Int’l Conference on Image Processing (ICIP-2008), San Diego, California, 2008, ISSN: 15224880. @conference{199, title = {A PDE formulation for viscous morphological operators with extensions to intensity-adaptive operators}, author = {Petros Maragos and Corinne Vachier}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosVachier_PDEFormulationViscousMorphologicalOperatorsToIntensityAdaptiveOps_ICIP08.pdf}, doi = {10.1109/ICIP.2008.4712226}, issn = {15224880}, year = {2008}, date = {2008-01-01}, booktitle = {Proc. Int’l Conference on Image Processing (ICIP-2008)}, journal = {Proceedings - International Conference on Image Processing, ICIP}, pages = {2200--2203}, address = {San Diego, California}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgios Evangelopoulos, Konstantinos Rapantzikos, Petros Maragos, Yannis Avrithis, Alexandros Potamianos Audiovisual Attention Modeling and Salient Event Detection Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–21, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. @inbook{Evangelopoulos2008, title = {Audiovisual Attention Modeling and Salient Event Detection}, author = {Georgios Evangelopoulos and Konstantinos Rapantzikos and Petros Maragos and Yannis Avrithis and Alexandros Potamianos}, editor = {Petros Maragos and Alexandros Potamianos and Patrick Gros}, url = {https://doi.org/10.1007/978-0-387-76316-3_8}, doi = {10.1007/978-0-387-76316-3_8}, isbn = {978-0-387-76316-3}, year = {2008}, date = {2008-01-01}, booktitle = {Multimodal Processing and Interaction: Audio, Video, Text}, pages = {1--21}, publisher = {Springer US}, address = {Boston, MA}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
Petros Maragos, Patrick Gros, Athanassios Katsamanis, George Papandreou Cross-Modal Integration for Performance Improving in Multimedia: A Review Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–46, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. @inbook{Maragos2008, title = {Cross-Modal Integration for Performance Improving in Multimedia: A Review}, author = {Petros Maragos and Patrick Gros and Athanassios Katsamanis and George Papandreou}, editor = {Petros Maragos and Alexandros Potamianos and Patrick Gros}, url = {https://doi.org/10.1007/978-0-387-76316-3_1}, doi = {10.1007/978-0-387-76316-3_1}, isbn = {978-0-387-76316-3}, year = {2008}, date = {2008-01-01}, booktitle = {Multimodal Processing and Interaction: Audio, Video, Text}, pages = {1--46}, publisher = {Springer US}, address = {Boston, MA}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
George Papandreou, Athanassios Katsamanis, Athanassios Katsamanis, Vassilis Pitsikalis, Petros Maragos Adaptive Multimodal Fusion by Uncertainty Compensation with Application to Audio-Visual Speech Recognition Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–15, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. @inbook{Papandreou2008, title = {Adaptive Multimodal Fusion by Uncertainty Compensation with Application to Audio-Visual Speech Recognition}, author = {George Papandreou and Athanassios Katsamanis and Athanassios Katsamanis and Vassilis Pitsikalis and Petros Maragos}, editor = {Petros Maragos and Alexandros Potamianos and Patrick Gros}, url = {https://doi.org/10.1007/978-0-387-76316-3_4}, doi = {10.1007/978-0-387-76316-3_4}, isbn = {978-0-387-76316-3}, year = {2008}, date = {2008-01-01}, booktitle = {Multimodal Processing and Interaction: Audio, Video, Text}, pages = {1--15}, publisher = {Springer US}, address = {Boston, MA}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
2007 |
Stamatios Lefkimmiatis, Petros Maragos A generalized estimation approach for linear and nonlinear microphone array post-filters Journal Article Speech Communication, 49 (7-8), pp. 657–666, 2007, ISSN: 01676393. Abstract | BibTeX | Links: [PDF] @article{124, title = {A generalized estimation approach for linear and nonlinear microphone array post-filters}, author = {Stamatios Lefkimmiatis and Petros Maragos}, url = {https://www.scopus.com/inward/record.url?eid=2-s2.0-34447096369&partnerID=40&md5=2a28c43abbc35eb2d516a43e23ea6602http://robotics.ntua.gr/wp-content/uploads/sites/2/LefkimmiatisMaragos_GeneralizedEstimationMicrophoneArrays_specom2007.pdf}, doi = {10.1016/j.specom.2007.02.004}, issn = {01676393}, year = {2007}, date = {2007-01-01}, journal = {Speech Communication}, volume = {49}, number = {7-8}, pages = {657--666}, abstract = {This paper presents a robust and general method for estimating the transfer functions of microphone array post-filters, derived under various speech enhancement criteria. For the case of the mean square error (MSE) criterion, the proposed method is an improvement of the existing McCowan post-filter, which under the assumption of a known noise field coherence function uses the auto- and cross-spectral densities of the microphone array noisy inputs to estimate the Wiener post-filter transfer function. In contrast to McCowan post-filter, the proposed method takes into account the noise reduction performed by the minimum variance distortionless response (MVDR) beamformer and obtains a more accurate estimation of the noise spectral density. Furthermore, the proposed estimation approach is general and can be used for the derivation of both linear and nonlinear microphone array post-filters, according to the utilized enhancement criterion. In experiments with real noise multichannel recordings the proposed technique has shown to obtain a significant gain over the other studied methods in terms of five different objective speech quality measures. textcopyright 2007 Elsevier B.V. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper presents a robust and general method for estimating the transfer functions of microphone array post-filters, derived under various speech enhancement criteria. For the case of the mean square error (MSE) criterion, the proposed method is an improvement of the existing McCowan post-filter, which under the assumption of a known noise field coherence function uses the auto- and cross-spectral densities of the microphone array noisy inputs to estimate the Wiener post-filter transfer function. In contrast to McCowan post-filter, the proposed method takes into account the noise reduction performed by the minimum variance distortionless response (MVDR) beamformer and obtains a more accurate estimation of the noise spectral density. Furthermore, the proposed estimation approach is general and can be used for the derivation of both linear and nonlinear microphone array post-filters, according to the utilized enhancement criterion. In experiments with real noise multichannel recordings the proposed technique has shown to obtain a significant gain over the other studied methods in terms of five different objective speech quality measures. textcopyright 2007 Elsevier B.V. All rights reserved. |
George Papandreou, Petros Maragos Multigrid Geometric Active Contour Models Journal Article IEEE Transactions on Image Processing, 16 (1), pp. 229-240, 2007. @article{125, title = {Multigrid Geometric Active Contour Models}, author = {George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PapandreouMaragos_MultigridGeometricActiveContourModels_tip07.pdf}, doi = {10.1109/TIP.2006.884952}, year = {2007}, date = {2007-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {16}, number = {1}, pages = {229-240}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Maciej Cytowski Partial Differential Equations in Image Processing Conference Invited Paper, Proc. European Signal Processing Conf. (EUSIPCO--98), Rhodes, Greece, pp. 527-536, Sep. 1998, (May), 2007. @conference{265, title = {Partial Differential Equations in Image Processing}, author = { Maciej Cytowski}, year = {2007}, date = {2007-01-01}, booktitle = {Invited Paper, Proc. European Signal Processing Conf. (EUSIPCO--98), Rhodes, Greece, pp. 527-536, Sep. 1998}, number = {May}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos D. Dimitriadis, S Lefkimmiatis Multiband, Multisensor Features for Robust Speech Recognition Conference Proc. Int'l Conf. on Speech Technology and Communication (InterSpeech 2007 EuroSpeech), Antwerp, Belgium, Aug. 2007, 2007. @conference{220, title = {Multiband, Multisensor Features for Robust Speech Recognition}, author = { P Maragos {D. Dimitriadis} and S Lefkimmiatis}, url = {DimitriadisMaragosLefkimmiatis_MinTECC_ASR_InterSpeech2007.pdf}, year = {2007}, date = {2007-01-01}, booktitle = {Proc. Int'l Conf. on Speech Technology and Communication (InterSpeech 2007 EuroSpeech), Antwerp, Belgium, Aug. 2007}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Dimitrios Dimitriadis, JC Segura, Luz Garcia Advanced front-end for robust speech recognition in extremely adverse environments Conference Proc. łdots, 2007, ISBN: 9781605603162. Abstract | BibTeX | Links: [PDF] @conference{221, title = {Advanced front-end for robust speech recognition in extremely adverse environments}, author = { Dimitrios Dimitriadis and JC Segura and Luz Garcia}, url = {http://cvsp.cs.ntua.gr/projects/pub/HIWIRE/HiwirePublications/DMSP_HAFE_ASR_Interspeech07.pdf}, isbn = {9781605603162}, year = {2007}, date = {2007-01-01}, booktitle = {Proc. łdots}, pages = {1--4}, abstract = {In this paper, a unified approach to speech enhancement, feature extraction and feature normalization for speech recognition in adverse recording conditions is presented. The proposed front-end system consists of several different, independent, process-ing modules. Each of the algorithms contained in these modules has been independently applied to the problem of speech recog-nition in noise, significantly improving the recognition rates. In this work, these algorithms are merged in a single front-end and their combined performance is demonstrated. Specifically, the proposed advanced front-end extracts noise-invariant features via the following modules: Wiener filtering, voice-activity de-tection, robust feature extraction (nonlinear modulation or frac-tal features), parameter equalization and frame-dropping. The advanced front-end is applied to extremely adverse environ-ments where most feature extraction schemes fail. We show that by combining speech enhancement, robust feature extrac-tion and feature normalization up to a fivefold error rate reduc-tion can be achieved for certain tasks.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, a unified approach to speech enhancement, feature extraction and feature normalization for speech recognition in adverse recording conditions is presented. The proposed front-end system consists of several different, independent, process-ing modules. Each of the algorithms contained in these modules has been independently applied to the problem of speech recog-nition in noise, significantly improving the recognition rates. In this work, these algorithms are merged in a single front-end and their combined performance is demonstrated. Specifically, the proposed advanced front-end extracts noise-invariant features via the following modules: Wiener filtering, voice-activity de-tection, robust feature extraction (nonlinear modulation or frac-tal features), parameter equalization and frame-dropping. The advanced front-end is applied to extremely adverse environ-ments where most feature extraction schemes fail. We show that by combining speech enhancement, robust feature extrac-tion and feature normalization up to a fivefold error rate reduc-tion can be achieved for certain tasks. |
Athanassios Katsamanis, George Papandreou, Petros Maragos Audiovisual-to-articulatory speech inversion using HMMs Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, 2007, ISBN: 1424412749. Abstract | BibTeX | Links: [PDF] @conference{217, title = {Audiovisual-to-articulatory speech inversion using HMMs}, author = { Athanassios Katsamanis and George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisPapandreouMaragos_AVSpeechInversionHMM_mmsp07.pdf}, doi = {10.1109/MMSP.2007.4412915}, isbn = {1424412749}, year = {2007}, date = {2007-01-01}, booktitle = {2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings}, pages = {457--460}, abstract = {We address the problem of audiovisual speech inversion, namely recovering the vocal tract's geometry from auditory and visual speech cues. We approach the problem in a statistical framework, combining ideas from multistream Hidden Markov Models and canonical correlation analysis, and demonstrate effective estimation of the trajectories followed by certain points of interest in the speech production system. Our experiments show that exploiting both audio and visual modalities clearly improves performance relative to either audio-only or visual-only estimation. We report experiments on the QSMT database which contains audio, video, and electromagnetic articulography data recorded in parallel.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We address the problem of audiovisual speech inversion, namely recovering the vocal tract's geometry from auditory and visual speech cues. We approach the problem in a statistical framework, combining ideas from multistream Hidden Markov Models and canonical correlation analysis, and demonstrate effective estimation of the trajectories followed by certain points of interest in the speech production system. Our experiments show that exploiting both audio and visual modalities clearly improves performance relative to either audio-only or visual-only estimation. We report experiments on the QSMT database which contains audio, video, and electromagnetic articulography data recorded in parallel. |
P. Maragos, Georgios Evangelopoulos Leveling cartoons, texture energy markers, and image decomposition Conference 8th Int. Symp. on Mathematical Morphology, 1 , 2007. @conference{215, title = {Leveling cartoons, texture energy markers, and image decomposition}, author = { P. Maragos and Georgios Evangelopoulos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosEvangelopoulos_LevelCartoons-TextureEnergyMarkers-ImageDecompos_ISMM2007.pdf}, year = {2007}, date = {2007-01-01}, booktitle = {8th Int. Symp. on Mathematical Morphology}, volume = {1}, pages = {125--138}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikos C. Mitsou, Costas S. Tzafestas Temporal Occupancy Grid for mobile robot dynamic environment mapping Conference 2007 Mediterranean Conference on Control and Automation, MED, 2007, ISBN: 142441282X. @conference{45b, title = {Temporal Occupancy Grid for mobile robot dynamic environment mapping}, author = { Nikos C. Mitsou and Costas S. Tzafestas}, doi = {10.1109/MED.2007.4433892}, isbn = {142441282X}, year = {2007}, date = {2007-01-01}, booktitle = {2007 Mediterranean Conference on Control and Automation, MED}, abstract = {Mapping dynamic environments is an open issue in the field of robotics. In this paper, we extend the well known Occupancy Grid structure to address the problem of generating valid maps for dynamic indoor environments. We propose a spatiotemporal access method to store all sensor values (instead of preserving only one value for each cell as in the common occupancy grid case). By searching for similar time series, we can detect moving objects that appear only in a limited number of possible configurations (e.g. doors or chairs). Simulated experiments demonstrate the potentialities of the proposed system.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mapping dynamic environments is an open issue in the field of robotics. In this paper, we extend the well known Occupancy Grid structure to address the problem of generating valid maps for dynamic indoor environments. We propose a spatiotemporal access method to store all sensor values (instead of preserving only one value for each cell as in the common occupancy grid case). By searching for similar time series, we can detect moving objects that appear only in a limited number of possible configurations (e.g. doors or chairs). Simulated experiments demonstrate the potentialities of the proposed system. |
Nikos Mitsou, Costas Tzafestas Maximum likelihood SLAM in dynamic environments Conference Proceedings - International Conference on Tools with Artificial Intelligence, ICTAI, 1 , 2007, ISSN: 10823409. @conference{44b, title = {Maximum likelihood SLAM in dynamic environments}, author = { Nikos Mitsou and Costas Tzafestas}, doi = {10.1109/ICTAI.2007.168}, issn = {10823409}, year = {2007}, date = {2007-01-01}, booktitle = {Proceedings - International Conference on Tools with Artificial Intelligence, ICTAI}, volume = {1}, pages = {152--156}, abstract = {Simultaneous Localization and Mapping in dynamic environments is an open issue in the field of robotics. Traditionally, the related approaches assume that the environment remains static during the robot's exploration phase. In this work, we overcome this assumption and propose an algorithm that exploits the dynamic nature of the environment during robot exploration so as to improve the localization process. We use a Histogram Grid to store all the past occupancy values of every cell and thus to select the most probable pose of the robot based on the occupancy evolution. Experiments on a simulated robot indicate the effectiveness of the proposed approach.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Simultaneous Localization and Mapping in dynamic environments is an open issue in the field of robotics. Traditionally, the related approaches assume that the environment remains static during the robot's exploration phase. In this work, we overcome this assumption and propose an algorithm that exploits the dynamic nature of the environment during robot exploration so as to improve the localization process. We use a Histogram Grid to store all the past occupancy values of every cell and thus to select the most probable pose of the robot based on the occupancy evolution. Experiments on a simulated robot indicate the effectiveness of the proposed approach. |
George Papandreou, Athanassios Katsamanis, Vassilis Pitsikalis, Perros Maragos Multimodal fusion and learning with uncertain features applied to audiovisual speech recognition Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, (November 2015), 2007, ISBN: 1424412749. Abstract | BibTeX | Links: [PDF] @conference{218, title = {Multimodal fusion and learning with uncertain features applied to audiovisual speech recognition}, author = { George Papandreou and Athanassios Katsamanis and Vassilis Pitsikalis and Perros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PapandreouKatsamanisPitsikalisMaragos_AudiovisualUncertaintyLearning_mmsp07.pdf}, doi = {10.1109/MMSP.2007.4412868}, isbn = {1424412749}, year = {2007}, date = {2007-01-01}, booktitle = {2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings}, number = {November 2015}, pages = {264--267}, abstract = {We study the effect of uncertain feature measurements and show how classification and learning rules should be adjusted to compensate for it. Our approach is particularly fruitful in multimodal fusion scenarios, such as audio-visual speech recognition, where multiple streams of complementary features whose reliability is time-varying are integrated. For such applications, by taking the measurement noise uncertainty of each feature stream into account, the proposed framework leads to highly adaptive multimodal fusion rules for classification and learning which are widely applicable and easy to implement. We further show that previous multimodal fusion methods relying on stream weights fall under our scheme under certain assumptions; this provides novel insights into their applicability for various tasks and suggests new practical ways for estimating the stream weights adaptively. The potential of our approach is demonstrated in audio-visual speech recognition experiments.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We study the effect of uncertain feature measurements and show how classification and learning rules should be adjusted to compensate for it. Our approach is particularly fruitful in multimodal fusion scenarios, such as audio-visual speech recognition, where multiple streams of complementary features whose reliability is time-varying are integrated. For such applications, by taking the measurement noise uncertainty of each feature stream into account, the proposed framework leads to highly adaptive multimodal fusion rules for classification and learning which are widely applicable and easy to implement. We further show that previous multimodal fusion methods relying on stream weights fall under our scheme under certain assumptions; this provides novel insights into their applicability for various tasks and suggests new practical ways for estimating the stream weights adaptively. The potential of our approach is demonstrated in audio-visual speech recognition experiments. |
Konstantinos Rapantzikos, Georgios Evangelopoulos, Petros Maragos, Yannis Avrithis An audio-visual saliency model for movie summarization Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, 2007, ISBN: 1424412749. Abstract | BibTeX | Links: [PDF] @conference{219, title = {An audio-visual saliency model for movie summarization}, author = { Konstantinos Rapantzikos and Georgios Evangelopoulos and Petros Maragos and Yannis Avrithis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RapantzikosEvangelopoulosMaragosEtAl_AVSaliencyDetectionSummarization_mmsp07.pdf}, doi = {10.1109/MMSP.2007.4412882}, isbn = {1424412749}, year = {2007}, date = {2007-01-01}, booktitle = {2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings}, pages = {320--323}, abstract = {A saliency-based method for generating video summaries is presented, which exploits coupled audiovisual information from both media streams. Efficient and advanced speech and image processing algorithms to detect key frames that are acoustically and visually salient are used. Promising results are shown from experiments on a movie database.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } A saliency-based method for generating video summaries is presented, which exploits coupled audiovisual information from both media streams. Efficient and advanced speech and image processing algorithms to detect key frames that are acoustically and visually salient are used. Promising results are shown from experiments on a movie database. |
Anastasios Roussos, Petros Maragos Vector-Valued Image Interpolation by an Anisotropic Diffusion-Projection PDE Conference Ssvm, LNCS 4885 , 2007, ISSN: 03029743. Abstract | BibTeX | Links: [PDF] @conference{222, title = {Vector-Valued Image Interpolation by an Anisotropic Diffusion-Projection PDE}, author = { Anastasios Roussos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosMaragos_VectorImgInterpolProjDiffPDE_SSVM2007.pdf}, issn = {03029743}, year = {2007}, date = {2007-01-01}, booktitle = {Ssvm}, volume = {LNCS 4885}, pages = {104--115}, abstract = {We propose a nonlinear image interpolation method, based on an anisotropic diffusion PDE and designed for the general case of vector-valued images. The interpolation solution is restricted to the sub- space of functions that can recover the discrete input image, after an appropriate smoothing and sampling. The proposed nonlinear diffusion flow lies on this subspace and its strength and anisotropy effectively adapt to the local variations and geometry of image structures. The de- rived model efficiently reconstructs the real image structures, leading to a natural interpolation, with reduced blurring, staircase and ringing ar- tifacts of classic methods. This method also outperforms other existing PDE-based interpolation methods.We present experimental results that prove the potential and efficacy of the method as applied to graylevel and color images.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose a nonlinear image interpolation method, based on an anisotropic diffusion PDE and designed for the general case of vector-valued images. The interpolation solution is restricted to the sub- space of functions that can recover the discrete input image, after an appropriate smoothing and sampling. The proposed nonlinear diffusion flow lies on this subspace and its strength and anisotropy effectively adapt to the local variations and geometry of image structures. The de- rived model efficiently reconstructs the real image structures, leading to a natural interpolation, with reduced blurring, staircase and ringing ar- tifacts of classic methods. This method also outperforms other existing PDE-based interpolation methods.We present experimental results that prove the potential and efficacy of the method as applied to graylevel and color images. |
Anastasia Sofou, Petros Maragos Generalized Watershed and PDEs for Geometric- Textural Segmentation Conference Proceedings of 8th International Symposium on Mathematical Morphology (ISMM 2007), Rio de Janeiro, Brazil, Oct. 2007. Published in Mathematical Morphology and its Applications to Signal and Image Processing, edited by G.J.F. Banon, J. Barrera and U. Braga-Neto, MCT/INPE, Brazil, 2007, pp.411-422, 2007. Abstract | BibTeX | Links: [PDF] @conference{216, title = {Generalized Watershed and PDEs for Geometric- Textural Segmentation}, author = { Anastasia Sofou and Petros Maragos}, url = {SofouMaragos_GeneralWshedPDE-GeomTexturSegm_ISMM2007.pdf}, year = {2007}, date = {2007-01-01}, booktitle = {Proceedings of 8th International Symposium on Mathematical Morphology (ISMM 2007), Rio de Janeiro, Brazil, Oct. 2007. Published in Mathematical Morphology and its Applications to Signal and Image Processing, edited by G.J.F. Banon, J. Barrera and U. Braga-Neto, MCT/INPE, Brazil, 2007, pp.411-422}, abstract = {In this paper we approach the segmentation problem by attempt-ing to incorporate cues such as intensity contrast, region size and texture in the segmentation procedure and derive improved results compared to using individual cues separately. We propose efficient simplification operators and feature extraction schemes, capable of quantifying important characteristics like geometrical complexity, rate of change in local contrast variations and orientation, that eventually favor the final segmentation result. Based on the mor-phological paradigm of watershed transform we investigate and ex-tend its PDE formulation in order to satisfy various flooding cri-teria, and couple them with texture information thus making it applicable to a wider range of images.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we approach the segmentation problem by attempt-ing to incorporate cues such as intensity contrast, region size and texture in the segmentation procedure and derive improved results compared to using individual cues separately. We propose efficient simplification operators and feature extraction schemes, capable of quantifying important characteristics like geometrical complexity, rate of change in local contrast variations and orientation, that eventually favor the final segmentation result. Based on the mor-phological paradigm of watershed transform we investigate and ex-tend its PDE formulation in order to satisfy various flooding cri-teria, and couple them with texture information thus making it applicable to a wider range of images. |
2006 |
Costas S Tzafestas, Nektaria Palaiologou, Manthos Alifragis Virtual and remote robotic laboratory: Comparative experimental evaluation Journal Article IEEE Transactions on Education, 49 (3), pp. 360–369, 2006, ISSN: 00189359. @article{3_64, title = {Virtual and remote robotic laboratory: Comparative experimental evaluation}, author = {Costas S Tzafestas and Nektaria Palaiologou and Manthos Alifragis}, doi = {10.1109/TE.2006.879255}, issn = {00189359}, year = {2006}, date = {2006-01-01}, journal = {IEEE Transactions on Education}, volume = {49}, number = {3}, pages = {360--369}, abstract = {This paper describes the development and experimental evaluation of an e-laboratory platform in the field of robotics. The system in its current configuration is designed to enable distance training of students in real scenarios of robot manipulator programming. From a technological perspective, the research work presented in this paper is directed towards the adaptation of concepts and techniques developed in the field of telerobotics and virtual reality, and their integration in such e-laboratory settings. This paper focuses particularly on the educational impact of such systems. The goal is to assess the performance of e-laboratory scenarios in terms of the efficacy of training provided to students. The results of a pilot experimental study are presented, providing a comparative evaluation for three training modalities: real, remote, and virtual training on robot manipulator programming. The experiments were conducted according to an evaluation protocol specially designed for the considered target training task, using scoring charts to obtain quantitative performance measures and assess the performance of the student groups participating in the course. Training, as a dynamic process, is approached according to a classical three dimensional model, and performance scores are accordingly assessed in these dimensions (namely: low-level versus mid and high-level skills and understanding). The obtained results reveal certain differences between the three groups, particularly as related to the low-level skill training score, giving some insight about the training `dimensions' that are expected to be mostly affected by the absence of physical (or realistic virtual) presence in a real hands-on experimentation. Statistical analysis indicates, however, that, despite these apparent differences, such e-laboratory modules can be integrated quite effectively in practical scenarios, creating virtual training environments that can provide adequate learning elements, as related p-$backslash$n-$backslash$narticularly to mid and high-level skill acquisition. Further work and large-scale studies are still needed, though, in order to explore the extent to which such a general conclusion is valid in different training settings, and to form the basis of a more theoretical evaluation for a comprehensive understanding of the pedagogical differences between real, virtual, and remote learning/training methodologies and experiences}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper describes the development and experimental evaluation of an e-laboratory platform in the field of robotics. The system in its current configuration is designed to enable distance training of students in real scenarios of robot manipulator programming. From a technological perspective, the research work presented in this paper is directed towards the adaptation of concepts and techniques developed in the field of telerobotics and virtual reality, and their integration in such e-laboratory settings. This paper focuses particularly on the educational impact of such systems. The goal is to assess the performance of e-laboratory scenarios in terms of the efficacy of training provided to students. The results of a pilot experimental study are presented, providing a comp |
2024 |
Time perception in film viewing: A modulation of scene's duration estimates as a function of film editing Journal Article Acta Psychologica, 244 , pp. 104206, 2024. |
Person Identification and Relapse Detection from Continuous Recordings of Biosignals Challenge: Overview and Results Journal Article IEEE Open Journal of Signal Processing, 2024. |
3D Facial Expressions through Analysis-by-Neural-Synthesis Inproceedings Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2490-2501, 2024. |
Matrix Factorization in Tropical and Mixed Tropical-Linear Algebras Inproceedings Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seoul, Korea, 2024. |
Augmenting Transformer Autoencoders with Phenotype Classification for Robust Detection of Psychotic Relapses Inproceedings Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seoul, Korea, 2024. |
2023 |
Greek sign language recognition for an education platform Journal Article Universal Access in the Information Society, 2023. |
Mushroom Detection and Three Dimensional Pose Estimation from Multi-View Point Clouds Journal Article Sensors, 23 (7), pp. 3576, 2023. |
Smartwatch digital phenotypes predict positive and negative symptom variation in a longitudinal monitoring study of patients with psychotic disorders Journal Article Frontiers in Psychiatry, 14 , 2023. |
Multi-Source Contrastive Learning from Musical Audio Conference Proc. 20th Sound and Music Computing Conference (SMC 2023), Stockholm, Sweden, 2023. |
Exploring Polyphonic Accompaniment Generation using Generative Adversarial Networks Conference Proc. 20th Sound and Music Computing Conference (SMC 2023), Stockholm, Sweden, 2023. |
From Digital Phenotype Identification To Detection Of Psychotic Relapses Conference Proc. IEEE International Conference on Healthcare Informatics, Houston, TX, USA, 2023. |
A Realistic Synthetic Mushroom Scenes Dataset Conference Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop, Vancouver, Canada, 2023. |
Mushroom Segmentation and 3D Pose Estimation From Point Clouds Using Fully Convolutional Geometric Features and Implicit Pose Encoding Conference Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop, Vancouver, Canada, 2023. |
SPECTRE: Visual Speech-Informed Perceptual 3D Facial Expression Reconstruction from Videos Conference Proc. 2023 IEEE/CVF Conf. on Computer Vision and Pattern Recognition (CVPR), 5th Workshop and Competition on Affective Behavior Analysis in-the-wild (ABAW), Vancouver, Canada, 2023. |
Neural Sign Reenactor: Deep Photorealistic Sign Language Retargeting Conference Proc. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), AI for Content Creation Workshop (AI4CC), Vancouver, Canada, 2023. |
Newton-based Trainable Learning Rate Conference Proc. 48th IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2023), Rhodes, Greece, 2023. |
Relapse Prediction from Long-Term Wearable Data using Self-Supervised Learning and Survival Analysis Conference Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023), Rhodes Island, 2023. |
Convolutional Recurrent Neural Networks for the Classification of Cetacean Bioacoustic Patterns Conference Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023), Rhodes Island, 2023. |
3D Neural Sculpting (3DNS): Editing Neural Signed Distance Functions Conference Proc. IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 2023. |
Enhancing CLIP with a Third Modality Inproceedings Proc. 37th Conference on Neural Information Processing Systems (NeurIPS 2023): Workshop on Self-Supervised Learning - Theory and Practice, New Orleans, 2023. |
Feather: An Elegant Solution to Effective DNN Sparsification Inproceedings Proc. 34th Bristish Machine Vision Conference (BMVC 2023), Aberdeen, UK, 2023. |
Enhancing Action Recognition in Vehicle Environments With Human Pose Information Inproceedings Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023), 2023. |
Medical Face Masks and Emotion Recognition from the Body: Insights from a Deep Learning Perspective Inproceedings Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023), 2023. |
2022 |
Reducing Computational Cost During Robot Navigation and Human-Robot Interaction with a Human-Inspired Reinforcement Learning Architectures Journal Article International Journal of Social Robotics, 2022. |
e-Prevention: Advanced Support System for Monitoring and Relapse Prevention in Patients with Psychotic Disorders Analysing Long-Term Multimodal Data from Wearables and Video Captures Journal Article Sensors, 22 (19), pp. 7544, 2022. |
Toward a Sparsity Theory on Weighted Lattices Journal Article Journal of Mathematical Imaging and Vision, 2022. |
ChildBot: Multi-robot perception and interaction with children Journal Article Robotics and Autonomous Systems, 150 , pp. 103975, 2022. |
Child Engagement Estimation in Heterogeneous Child-Robot Interactions Using Spatiotemporal Visual Cues Conference Proc. 2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2022), Kyoto, Japan, 2022. |
Towards Unsupervised Subject-Independent Speech-Based Relapse Detection in Patients with Psychosis using Variational Autoencoders Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. |
A Comparative Study of Autoencoder Architectures for Mental Health Analysis using Wearable Sensors Data Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. |
Multi-band Masking for Waveform-based Singing Voice Separation Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. |
A Semantic Enhancement of Unified Geometric Representations for Improving Indoor Visual SLAM Conference Proc. 19th Int'l Conf. on Ubiquitous Robots (UR 2022), Jeju, Korea, 2022. |
Attribute-based Gesture Recognition: Generalization to Unseen Classes Conference Proc. 14th Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022), Nafplio, Greece, 2022. |
Cartoonized Anonymization of Sign Language Videos Conference Proc. 14th IEEE Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022), Nafplio, Greece, 2022. |
Neural Emotion Director: Speech-preserving semantic control of facial expressions in “in-the-wild” videos Conference Proc. 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, USA, 2022, (CVPR-2022 Best Paper Finalist). |
Enhancing Affective Representations of Music-Induced EEG through Multimodal Supervision and Latent Domain Adaptation Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. |
A Few-sample Strategy for Guitar Tablature Transcription Based on Inharmonicity Analysis and Playability Constraints Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. |
Spatio-Temporal Graph Convolutional Networks for Continuous Sign Language Recognition Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. |
Neural Network Approximation based on Hausdorff Distance of Tropical Zonotopes Conference Proc. Int’l Conf. on Learning Representations (ICLR 2022), 2022. |
2021 |
Tropical Geometry and Machine Learning Journal Article Proceedings of the IEEE, 109 (5), pp. 728-755, 2021. |
The i-Walk Lightweight Assistive Rollator: First Evaluation Study Journal Article Frontiers in Robotics and AI, 8 , pp. 272, 2021, ISSN: 2296-9144. |
A linear method for camera pair self-calibration Journal Article Computer Vision and Image Understanding, 210 , pp. 103223, 2021. |
Visual Robotic Perception System with Incremental Learning for Child–Robot Interaction Scenarios Journal Article Technologies, 9 (4), pp. 86, 2021. |
Grounding Consistency: Distilling Spatial Common Sense for Precise Visual Relationship Detection Conference Proceedings of International Conference on Computer Vision (ICCV-2021), 2021. |
Exploiting Emotional Dependencies with Graph Convolutional Networks for Facial Expression Recognition Conference Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021), 2021. |
Leveraging Semantic Scene Characteristics and Multi-Stream Convolutional Architectures in a Contextual Approach for Video-Based Visual Emotion Recognition in the Wild Conference Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021), 2021. |
Exploring Temporal Context and Human Movement Dynamics for Online Action Detection in Videos Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. |
An Audiovisual Child Emotion Recognition System for Child-Robot Interaction Applications Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. |
HTMD-NET: A Hybrid Masking-Denoising Approach to Time-Domain Monaural Singing Voice Separation Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. |
Multiscale Fractal Analysis on EEG Signals for Music-Induced Emotion Recognition Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. |
Overlapped Sound Event Classification via Multi- Channel Sound Separation Network Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. |
An Unsupervised Learning Approach for Detecting Relapses from Spontaneous Speech in Patients with Psychosis Conference Proc. IEEE-EMBS International Conference on Biomedical and Health Informatics (BHI-2021), 2021. |
Engagement Estimation During Child Robot Interaction Using DeepConvolutional Networks Focusing on ASD Children Conference Proc. IEEE Int'l Conf. Robotics and Automation (ICRA-2021), Xi'an, 2021. |
Independent Sign Language Recognition with 3D Body, Hands, and Face Reconstruction Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. |
Sparsity in Max-Plus Algebra and Applications in Multivariate Convex Regression Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. |
Advances in Morphological Neural Networks: Training, Pruning and Enforcing Shape Constraints Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. |
Deep Convolutional and Recurrent Networks for Polyphonic Instrument Classification from Monophonic Raw Audio Waveforms Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. |
Towards a User Adaptive Assistive Robot: Learning from Demonstration Using Navigation Functions Conference 2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2021. |
Deep Leg Tracking by Detection and Gait Analysis in 2D Range Data for Intelligent Robotic Assistants Conference 2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2021. |
2020 |
iMuSciCA Workbench: Web-based Music Activities For Science Education Journal Article Journal of the Audio Engineering Society, 68 (10), pp. 738-746, 2020. |
Evaluating the task effectiveness and user satisfaction with different operation modes of an assistive bathing robot in older adults Journal Article Assistive Technology, 0 , 2020, (PMID: 32286163). |
I-Support: A robotic platform of an assistive bathing robot for the elderly population Journal Article Robotics and Autonomous Systems, 126 , pp. 103451, 2020, ISSN: 0921-8890. |
Improving gesture-based interaction between an assistive bathing robot and older adults via user training on the gestural commands Journal Article Archives of Gerontology and Geriatrics, 87 , pp. 103996, 2020, ISSN: 0167-4943. |
Enhancing Handwritten Text Recognition with N-gram sequence decomposition and Multitask Learning Conference Intl' Conference Pattern Recognition (ICPR 2020), Milan, Italy, 2020. |
From Saturation to Zero-Shot Visual Relationship Detection Using Local Context Conference British Machine Vision Conference, Online, 2020. |
i-Walk Intelligent Assessment System: Activity, Mobility, Intention, Communication Conference Proc. 16th European Computer Vision Conference Workshops (ECCVW) – 8th Int’l Workshop on Assistive Computer Vision and Robotics (ACVR-2020), Online, 2020. |
Multiclass Neural Network Minimization via Tropical Newton Polytope Approximation Conference International Conference on Machine Learning (ICML), Online, 2020. |
An intelligent cloud-based platform for effective monitoring of patients with psychotic disorders Conference Int’l Conf. on Artificial Intelligence Applications and Innovation (AIAI-2020), Halkidiki, Greece, 2020. |
STAViS: Spatio-Temporal AudioVisual Saliency Network Conference IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Seattle, USA, 2020. |
SL-ReDu: Greek Sign Language Recognition for Educational Applications. Project Description and Early Results Conference PETRA '20: Proceedings of the 13th ACM International Conference on PErvasive Technologies Related to Assistive Environments, Corfu, Greece, 2020. |
An LSTM-Based Dynamic Chord Progression Generation System for Interactive Music Performance Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. |
Person Identification Using Deep Convolutional Neural Networks on Short-Term Signals from Wearable Sensors Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. |
Maxpolynomial Division with Application To Neural Network Simplification Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. |
Multivariate Tropical Regression and Piecewise-Linear Surface Fitting Conference Proc. 45th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2020), Barcelona, 2020. |
Tropical Geometry and Piecewise-Linear Approximation of Curves and Surfaces on Weighted Lattices Book Chapter M. Breuss A. Bruckstein, Kiselman C; Maragos, P (Ed.): Shape Analysis: Euclidean, Discrete and Algebraic Geometric Methods, Springer, 2020. |
How to track your dragon: A Multi-Attentional Framework for Real-time RGB-D 6-DOF Object Pose Tracking Inproceedings Proc. European Conference on Computer Vision Workshops (ECCVW) - 6th Int’l Workshop on Recovering 6D Object Pose, 2020. |
Emotion Understanding in Videos Through Body, Context, and Visual-Semantic Embedding Loss Workshop Proc. 16th European Computer Vision Conference Workshops (ECCVW) - Workshop on Bodily Expressed Emotion Understanding, 2020. |
2019 |
Learn to adapt to human walking: A Model-based Reinforcement Learning Approach for a Robotic Assistant Rollator Journal Article IEEE Robotics and Automation Letters (with IROS option), 4 (4), pp. 3774–3781, 2019. |
Fusing Body Posture With Facial Expressions for Joint Recognition of Affect in Child–Robot Interaction Journal Article IEEE Robotics and Automation Letters (with IROS option), 4 (4), pp. 4011-4018, 2019. |
Improved Frequency Modulation Features for Multichannel Distant Speech Recognition Journal Article IEEE Journal of Selected Topics in Signal Processing, 13 (4), pp. 841-849, 2019. |
Sparsity in max-plus algebra and systems Journal Article Discrete Event Dynamic Systems, 29 (2), pp. 163–189, 2019. |
A behaviorally inspired fusion approach for computational audiovisual saliency modeling Journal Article Signal Processing: Image Communication, 76 , pp. 186 - 200, 2019. |
Room-localized speech activity detection in multi-microphone smart homes Journal Article EURASIP Journal on Audio, Speech, and Music Processing, 2019 (1), pp. 15, 2019, ISSN: 1687-4722. |
Comparing the Impact of Robotic Rollator Control Schemes on Elderly Gait using on-line LRF-based Gait Analysis Conference Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. |
RecNets: Channel-wise Recurrent Convolutional Neural Networks Conference Proceedings of the British Machine Vision Conference (BMVC), Cardiff, 2019. |
On-line Human Gait Stability Prediction using LSTMs for the fusion of Deep-based Pose Estimation and LRF-based Augmented Gait State Estimation in an Intelligent Robotic Rollator Conference IEEE International Conference on Robotics & Automation (ICRA), Montreal, Canada, 2019. |
An Environment for Gestural Interaction with 3D Virual Musical Instruments as an Educational Tool Conference Proc. 27th European Conf.(EUSIPCO-19), A Coruna, Spain, 2019. |
Tropical Geometry, Mathematical Morphology and Weighted Lattices Conference Proc. 14th Int’l Symposium on Mathematical Morphology (ISMM-2019), Saarbruecken, Germany, 2019. |
Tropical Modeling of Weighted Transducer Algorithms on Graphs Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP), 2019, ISSN: 2379-190X. |
LSTM-based Network for Human Gait Stability Prediction in an Intelligent Robotic Rollator Conference Proc. 2019 IEEE International Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. |
User Centered HRI Design for an Intelligent Robotic Rollator Conference Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. |
Tropical Geometry, Mathematical Morphology and Weighted Lattices Conference Mathematical Morphology and Its Applications to Signal and Image Processing - 14th International Symposium, ISMM 2019, Saarbrücken, Germany, July 8-10, 2019, Proceedings, 2019. |
SUSiNet: See, Understand and Summarize it Conference Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, 2019. |
Deeply Supervised Multimodal Attentional Translation Embeddings for Visual Relationship Detection Conference 2019 IEEE International Conference on Image Processing (ICIP), 2019. |
Tropical Geometry, Mathematical Morphology and Weighted Lattices Conference Mathematical Morphology and Its Applications to Signal and Image Processing, Springer International Publishing, Cham, 2019, ISBN: 978-3-030-20867-7. |
RecNets: Channel-wise Recurrent Convolutional Neural Networks Conference British Machine Vision Conference (BMVC-2019), Cardiff, UK, 2019. |
A Deep Learning Approach for Multi-View Engagement Estimation of Children in a Child-Robot Joint Attention Task Conference 2019 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2019), Macau, China, 2019. |
Chapter 4 - Active contour methods on arbitrary graphs based on partial differential equations Incollection Kimmel, R; Tai, X -C (Ed.): Processing, Analyzing and Learning of Images, Shapes, and Forms: Part 2, 20 , pp. 149-190, Elsevier North-Holland, 2019. |
2018 |
Vision-Based Online Adaptation of Motion Primitives to Dynamic Surfaces: Application to an Interactive Robotic Wiping Task Journal Article IEEE Robotics and Automation Letters, 3 (3), pp. 1410-1417, 2018. |
Augmented Human State Estimation Using Interacting Multiple Model Particle Filters With Probabilistic Data Association Journal Article IEEE Robotics and Automation Letters, 3 (3), pp. 1872-1879, 2018, ISSN: 2377-3766. |
Stochastic stability in Max-Product and Max-Plus Systems with Markovian Jumps Journal Article Automatica, 92 , pp. 123–132, 2018, ISSN: 00051098. |
Online Wideband Spectrum Sensing Using Sparsity Journal Article IEEE Journal of Selected Topics in Signal Processing, 12 (1), pp. 35–44, 2018, ISSN: 19324553. |
Robot fast adaptation to changes in human engagement during simulated dynamic social interaction with active exploration in parameterized reinforcement learning Journal Article IEEE Transactions on Cognitive and Developmental Systems, 10 , pp. 881 - 893, 2018. |
MULTI-VIEW FUSION FOR ACTION RECOGNITION IN CHILD-ROBOT INTERACTION Conference Proc. IEEE Int'l Conf. on Image Processing, Athens, Greece, 2018. |
Object Assembly Guidance in Child-Robot Interaction using RGB-D based 3D Tracking Conference Proc. of 2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2018), Madrid, Spain, 2018. |
A Web-based Real-Time Kinect Application for Gestural Interaction with Virtual Musical Instruments Conference Proc. of Audio Mostly Conference (AM’18), Wrexham, North Wales, UK, 2018. |
A Collaborative System for Composing Music via Motion Using a Kinect Sensor and Skeletal Data Conference Proc. 15th International Sound & Music Computing Conference (SMC-2018), Limmesol, Cyprus, 2018. |
Human-Centered Service Robotic Systems for Assisted Living Conference Proceedings of the 27th International Conference on Robotics in Alpe-Adria Danube Region (RAAD 2018), 2018. |
Multimodal Visual Concept Learning with Weakly Supervised Techniques Conference Proc. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Salt Lake City, Utah, USA, 2018. |
Analysis of the Viterbi Algorithm Using Tropical Algebra and Geometry Conference Proc. IEEE International Workshop on Signal Processing Advances in Wireless Communications (SPAWC-18), Kalamata, Greece,, 2018. |
Multi3: Multi-sensory Perception System for Multi-modal Child Interaction with Multiple Robots Conference IEEE International Conference on Robotics and Automation (ICRA), Brisbane, Australia, 2018. |
Multimodal Signal Processing and Learning Aspects of Human-Robot Interaction for an Assistive Bathing Robot Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing, Calgary, Canada, 2018. |
FAR-FIELD AUDIO-VISUAL SCENE PERCEPTION OF MULTI-PARTY HUMAN-ROBOT INTERACTION FOR CHILDREN AND ADULTS Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP), Calgary, Canada, 2018. |
Object assembly guidance in child-robot interaction using RGB-D based 3d tracking Conference 2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), IEEE 2018. |
2017 |
Dynamical systems on weighted lattices: general theory, Journal Article Math. Control Signals Syst., 29 (1), 2017. |
Room-localized spoken command recognition in multi-room, multi-microphone environments Journal Article Computer Speech & Language, 46 , pp. 419-443, 2017. |
Theoretical Analysis of Active Contours on Graphs Journal Article SIAM J. Imaging Sciences, 2017, ISSN: 1936-4954. |
Graph-driven diffusion and random walk schemes for image segmentation Journal Article IEEE Transactions on Image Processing, 26 (1), pp. 35–50, 2017, ISSN: 10577149. |
Video-realistic expressive audio-visual speech synthesis for the Greek language Journal Article Speech Communication, 95 , pp. 137–152, 2017, ISSN: 01676393. |
COGNIMUSE: a multimodal video database annotated with saliency, events, semantics and emotion with application to summarization Journal Article EURASIP Journal on Image and Video Processing, 54 , pp. 1–24, 2017. |
Dynamical Systems on Weighted Lattices: General Theory Journal Article Math. Control Signals Syst., 29 (21), 2017. |
Towards a user-adaptive context-aware robotic walker with a pathological gait assessment system: First experimental study Conference IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2017. |
HMM-based Pathological Gait Analyzer for a User-Adaptive Intelligent Robotic Walker Conference Proc. 25th European Conf.(EUSIPCO-17) Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017. |
Real-time End-effector Motion Behavior Planning Approach Using On-line Point-cloud Data Towards a User Adaptive Assistive Bath Robot Conference 2017 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), Vancouver, Canada, 2017. |
Integrated Speech-based Perception System for User Adaptive Robot Motion Planning in Assistive Bath Scenarios Conference Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017. |
Bio-inspired meta-learning for active exploration during non-stationary multi-armed bandit tasks Conference Proc. IEEE Intelligent Systems Conference, London, UK, 2017. |
Estimating double support in pathological gaits using an HMM-based analyzer for an intelligent robotic walker Conference IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN), 2017. |
Online adaptation to human engagement perturbations in simulated human-robot interaction using hybrid reinforcement learning Conference Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017., Kos, Greece, 2017. |
Comparative experimental validation of human gait tracking algorithms for an intelligent robotic rollator Conference IEEE International Conference on Robotics and Automation (ICRA), 2017. |
Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task Conference Proc. IEEE Int'l Conference on Robotic Computing, Taichung, Taiwan, 2017. |
Social Human-Robot Interaction for the Elderly: Two Real-life Use Cases, Conference ACM/IEEE International Conference on Human-Robot Interaction (HRI), Vienna, Austria, 2017. |
Intelligent Assistive Robotic Systems for the Elderly: Two Real-life Use Cases Conference C_PETRA, ACM, Island of Rhodes, Greece, 2017, ISBN: 978-1-4503-5227-7. |
Audio-based Distributional Semantic Models for Music Auto-tagging and Similarity Measurement Conference Proc. MultiLearn2017: Multimodal Processing, Modeling and Learning for Human-Computer/Robot Interaction Workshop, in conjuction with European Signal Processing Conference, Kos, Greece, 2017. |
Morphological perceptrons: Geometry and training algorithms Conference Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 10225 LNCS , 2017, ISSN: 16113349. |
Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task Conference Proceedings - 2017 1st IEEE International Conference on Robotic Computing, IRC 2017, 2017, ISBN: 9781509067237. |
Multimodal Gesture Recognition Book Chapter The Handbook of Multimodal-Multisensor Interfaces: Foundations, User Modeling, and Common Modality Combinations - Volume 1, pp. 449–487, Association for Computing Machinery and Morgan & Claypool, 2017, ISBN: 9781970001679. |
2016 |
Model-free learning on robot kinematic chains using a nested multi-agent topology Journal Article Journal of Experimental and Theoretical Artificial Intelligence, 28 (6), pp. 913–954, 2016, ISSN: 13623079. |
A Platform for Building New Human-Computer Interface Systems that Support Online Automatic Recognition of Audio-Gestural Commands Conference Proceedings of the 2017 ACM on Multimedia Conference, Amsterdam, The Netherlands, 2016. |
Human Joint Angle Estimation and Gesture Recognition for Assistive Robotic Vision Conference Proc. of Workshop on Assistive Computer Vision and Robotics, European Conf. on Computer Vision (ECCV-2016), Amsterdam, The Netherlands, 2016. |
Introducing Temporal Order of Dominant Visual Word Sub-Sequences for Human Action Recognition Conference Proc. of IEEE Int'l Conf. on Image Processing (ICIP-2016), Phoenix, AZ, USA, 2016. |
fMRI-based Perceptual Validation of a computational Model for Visual and Auditory Saliency in Videos Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Phoenix, AZ, USA, 2016. |
Experimental comparison of human gait tracking algorithms: Towards a context-aware mobility assistance robotic walker Conference Mediterranean Conference on Control and Automation (MED), 2016. |
Towards ICT-supported Bath Robots: Control Architecture Description and Localized Perception of User for Robot Motion Planning Conference Mediterranean Conference on Control and Automation (MED), Athens, Greece, 2016. |
Experimental validation of human pathological gait analysis for an assisted living intelligent robotic walker Conference C_BIOROB, 2016. |
Towards a behaviorally-validated computational audiovisual saliency model Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing, Shanghai, China, 2016. |
Audio-Based Distributional Representations of Meaning Using a Fusion of Feature Encodings Conference 2016. |
On Shape Recognition and Language Conference Perspectives in Shape Analysis, Springer International Publishing, Cham, 2016, ISBN: 978-3-319-24726-7. |
Projective non-negative matrix factorization for unsupervised graph clustering Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. |
FMRI-based perceptual validation of a computational model for visual and auditory saliency in videos Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. |
A multimedia gesture dataset for human robot communication: Acquisition, tools and recognition results Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. |
On Shape Recognition and Language Incollection Breuss, M; Bruckstein, A; Maragos, P; Wuhrer, S (Ed.): Perspectives in Shape Analysis, pp. 321-344, Springer, Cham, 2016. |
2015 |
Structure Tensor Total Variation Journal Article SIAM Journal on Imaging Sciences, 8 (2), pp. 1090–1122, 2015, ISSN: 1936-4954. |
Multimodal Gesture Recognition via Multiple Hypotheses Rescoring Journal Article Journal of Machine Learning Research, 16 (1), pp. 255-284, 2015. |
The development of the Athens Emotional States Inventory (AESI): collection, validation and automatic processing of emotionally loaded sentences Journal Article The World Journal of Biological Psychiatry, 16 (5), pp. 312–322, 2015. |
ICT-Supported Bath Robots: Design Concepts Conference C_ICSR, 2015. |
User-Oriented Cognitive Interaction and Control for an Intelligent Robotic Walker Conference 17th International Conference on Social Robotics (ICSR 2015), 2015. |
Gait Modelling for a Context-Aware User-Adaptive Robotic Assistant Platform Conference 2015, ISSN: 978-88-97999-63-8. |
Hidden markov modeling of human pathological gait using laser range finder for an assisted living intelligent robotic walker Conference IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2015. |
Predicting Audio-Visual Salient Events Based on Visual, Audio and Text Modalities for Movie Summarization Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Quebec, Canada, 2015. |
Audio Salient Event Detection and Summarization using Audio and Text Modalities Conference Nice, France, 2015. |
Quality Evaluation of Computational Models for Movie Summarization Conference Costa Navarino, Messinia, Greece, 2015. |
UNIFYING THE RANDOM WALKER ALGORITHM AND THE SIR MODEL FOR GRAPH CLUSTERING AND IMAGE SEGMENTATION Conference Icip 2015, 2 (3), 2015, ISBN: 9781479983391. |
MULTI-ROOM SPEECH ACTIVITY DETECTION USING A DISTRIBUTED MICROPHONE NETWORK IN DOMESTIC ENVIRONMENTS Conference Proc. European Signal Processing Conf. (EUSIPCO-2015), Nice, France, Sep. 2015, 2015, ISBN: 9780992862633. |
Predicting audio-visual salient events based on visual, audio and text modalities for movie summarization Conference Proceedings - International Conference on Image Processing, ICIP, 2015-December , 2015, ISSN: 15224880. |
Estimation of eye gaze direction angles based on active appearance models Conference 2015 IEEE International Conference on Image Processing (ICIP), 2015, ISBN: 978-1-4799-8339-1. |
Multimodal Sensory Processing for Human Action Recognition in Mobility Assistive Robotics Conference Proc. IROS-2015 Workshop on Cognitive Mobility Assistance Robots, Hamburg, Germany, Sep. 2015, 2015. |
Multichannel Speech Enhancement Using Mems Microphones Conference IEEE International Conference on Acoustics, Speech and Signal Processing, 2015, ISBN: 978-1-4673-6997-8. |
User-Oriented Human-Robot Interaction for an Intelligent Walking Assistant Robotic Device Conference Workshop IEEE/RSJ Int'l Conf. on Intelligent Robots and Systems, Invited Session: "Cognitive Mobility Assistance Robots: Scientific Advances and Perspectives", Hamburg, Germany, Sept. 28 - Oct. 02, 2015, 2015. |
2014 |
RMAP: A rectangular cuboid approximation framework for 3D environment mapping Journal Article Autonomous Robots, 37 (3), pp. 261–277, 2014, ISSN: 09295593. |
Classification of extreme facial events in sign language videos Journal Article Eurasip Journal on Image and Video Processing, 2014 , 2014, ISSN: 16875281. |
RMAP: a Rectangular Cuboid Approximation Framework for 3D Environment Mapping Journal Article 37 (3), pp. 261–277, 2014, ISSN: 1573-7527. |
Comparison of Different Representations Based on Nonlinear Features for Music Genre Classification Conference Proc. European Signal Processing Conference, Lisbon, Portugal, 2014. |
Experiments in acoustic source localization using sparse arrays in adverse indoors environments Conference Proc. European Signal Processing Conference, Lisbon, Portugal, 2014. |
ATHENA: A Greek Multi-Sensory Database for Home Automation Control uthor: Isidoros Rodomagoulakis (NTUA, Greece) Conference Proc. Int'l Conf. on Speech Communication and Technology (INTERSPEECH), Singapore, 2014. |
Hidden Markov modeling of human normal gait using laser range finder for a mobility assistance robot Conference IEEE International Conference on Robotics and Automation (ICRA), 2014, ISSN: 1050-4729. |
Advances in Intelligent Mobility Assistance Robot Integrating Multimodal Sensory Processing Conference J_HCII, Springer International Publishing, Cham, 2014, ISBN: 978-3-319-07446-7. |
Emotion classification of speech using modulation features Conference European Signal Processing Conference, 2014, ISSN: 22195491. |
Multi-microphone fusion for detection of speech and acoustic events in smart spaces Conference European Signal Processing Conference, 2014, ISSN: 22195491. |
Robust far-field spoken command recognition for home automation combining adaptation and multichannel processing Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2014, ISSN: 15206149. |
ADVANCES ON ACTION RECOGNITION IN VIDEOS USING AN INTEREST POINT DETECTOR BASED ON MULTIBAND SPATIO-TEMPORAL ENERGIES Conference Icip, 2014, ISBN: 9781479957514. |
Kinect-based multimodal gesture recognition using a two-pass fusion scheme Conference 2014 IEEE International Conference on Image Processing, ICIP 2014, 2014, ISBN: 9781479957514. |
2013 |
Dynamic Affine-Invariant Shape-Appearance Handshape Features and Classification in Sign Language Videos Journal Article Journal of Machine Learning Research, 14 , pp. 1627-1663, 2013. |
Multiscale Fractal Analysis of Musical Instrument Signals with Application to Recognition Journal Article 21 (4), pp. 737–748, 2013. |
Telehaptic perception of delayed stiffness using adaptive impedance control: Experimental psychophysical analysis Journal Article Presence: Teleoperators and Virtual Environments, 22 (4), pp. 323–344, 2013, ISSN: 15313263. |
Multimodal saliency and fusion for movie summarization based on aural, visual, and textual attention Journal Article IEEE Transactions on Multimedia, 15 (7), pp. 1553–1568, 2013, ISSN: 15209210. |
An integrated system for digital restoration of prehistoric theran wall paintings Conference 2013 18th International Conference on Digital Signal Processing, DSP 2013, 2013, ISBN: 9781467358057. |
Supplementary Material for the SSVM-2013 paper entitled “ Convex Generalizations of Total Variation based on the Structure Tensor with Applications to Inverse Problems ” Conference Scale Space and Variational Methods in Computer Vision (SSVM-2013), pp.48-60, 2013, 2013. |
Enhancing surgical accuracy using virtual fixtures and motion compensation in robotic beating heart surgery Conference 2013 21st Mediterranean Conference on Control and Automation, MED 2013 - Conference Proceedings, 2013, ISSN: 1050-4729. |
Enhancing surgical accuracy using virtual fixtures and motion compensation in robotic beating heart surgery Conference 2013 21st Mediterranean Conference on Control and Automation, MED 2013 - Conference Proceedings, 2013, ISSN: 10504729. |
Active motion compensation in robotic cardiac surgery Conference 2013 European Control Conference, ECC 2013, 2013, ISBN: 9783033039629. |
Experiments on far-field multichannel speech processing in smart homes Conference 2013 18th International Conference on Digital Signal Processing, DSP 2013, 2013, ISBN: 9781467358057. |
Segmentation and Skeletonization on Arbitrary Graphs Using Multiscale Morphology and Active Contours Book Chapter Breuß, Michael; Bruckstein, Alfred; Maragos, Petros (Ed.): Innovations for Shape Analysis: Models and Algorithms, pp. 53–75, Springer Berlin Heidelberg, Berlin, Heidelberg, 2013, ISBN: 978-3-642-34141-0. |
Chapter Two - Representations for Morphological Image Operators and Analogies with Linear Operators Book Chapter Hawkes, Peter W (Ed.): Advances in Imaging and Electron Physics, 177 , pp. 45 - 187, Elsevier, 2013, ISSN: 1076-5670. |
Music Signal Processing and Applications in Recognition PhD Thesis School of ECE, NTUA, 2013. |
2012 |
Assessing the effect of physical differences in the articulation of consonants and vowels on audiovisual temporal perception Journal Article Frontiers in Integrative Neuroscience, 6 , 2012, ISSN: 1662-5145. |
Active contours on graphs: Multiscale morphology and graphcuts Journal Article IEEE Journal on Selected Topics in Signal Processing, 6 (7), pp. 780–794, 2012, ISSN: 19324553. |
AM-FM Modulation Features for Music Instrument Signal Analysis and Recognition Conference Proc. European Signal Processing Conference, Bucharest, Romania, 2012. |
A Saliency-Based Approach to Audio Event Detection and Summarization Conference Proc. European Signal Processing Conference, Bucharest, Romania, 2012. |
Unsupervised classification of extreme facial events using active appearance models tracking for sign language videos Conference Proceedings - International Conference on Image Processing, ICIP, 2012, ISSN: 15224880. |
Dominant spatio-temporal modulations and energy tracking in videos: Application to interest point detection for action recognition Conference Proceedings - International Conference on Image Processing, ICIP, 2012, ISSN: 15224880. |
Robustness and generalization of model-free learning for robot kinematic control using a nested-hierarchical multi-agent topology Conference Proceedings of the IEEE RAS and EMBS International Conference on Biomedical Robotics and Biomechatronics, 2012, ISSN: 21551774. |
An optimization approach for 3D environment mapping using normal vector uncertainty Conference 2012 12th International Conference on Control Automation Robotics & Vision (ICARCV), 2012 (December), 2012, ISBN: 978-1-4673-1872-3. |
Gesture and Sign Language in Human-Computer Interaction and Embodied Communication Conference Spatial Cognition VIII, 7206 , 2012, ISSN: 03029743. |
Hand tracking and affine shape-appearance handshape sub-units in continuous sign language recognition Conference Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 6553 LNCS (PART 1), 2012, ISSN: 03029743. |
Human action recognition using Histographic methods and hidden Markov models for visual martial arts applications Conference Image Processing (ICIP), 2012 19th IEEE International Conference on, 2012. |
RECOGNITION WITH RAW CANONICAL PHONETIC MOVEMENT AND HANDSHAPE SUBUNITS ON VIDEOS OF CONTINUOUS SIGN LANGUAGE Conference Proc. IEEE Int'l Conf. on Image Processing, (Icip), 2012, ISBN: 9781467325332. |
Model-mediated telehaptic perception of delayed curvature Conference Proceedings - IEEE International Workshop on Robot and Human Interactive Communication, 2012, ISBN: 9781467346054. |
2011 |
On the effects of filterbank design and energy computation on robust speech recognition Journal Article IEEE Transactions on Audio, Speech and Language Processing, 19 (6), pp. 1504–1516, 2011, ISSN: 15587916. |
Musical Instruments Signal Analysis and Recognition Using Fractal Features Conference Proc. European Signal Processing Conference, Barcelona, Spain, 2011. |
A Supervised Approach to Movie Emotion Tracking Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Prague, Czech Republic, 2011. |
MUSICAL INSTRUMENTS SIGNAL ANALYSIS AND RECOGNITION USING FRACTAL FEATURES Conference Proc. 19th European Signal Processing Conference (EUSIPCO-2011), (Eusipco), 2011. |
Revealing cluster formation over huge volatile robotic data Conference Proceedings - IEEE International Conference on Data Mining, ICDM, 2011, ISSN: 15504786. |
Advances in phonetics-based sub-unit modeling for transcription alignment and sign language recognition Conference IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops, 2011, ISSN: 21607508. |
Experiments on global and local active appearance models for analysis of sign language facial expressions Conference 9th International Gesture łdots, 2011. |
Advances in Dynamic-Static Integration of Manual Cues for Sign Language Recognition Conference łdots Gesture Workshop (GW 2011): Gesture in łdots, 2011. |
2010 |
Spatial Bayesian surprise for image saliency and quality assessment Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. |
Hierarchical Multi-Agent Architecture employing TD ( $łambda$ ) Learning with Function Approximators for Robot Skill Acquisition Conference Architecture, 2010. |
Fuzzy rule based neuro-dynamic programming for mobile robot skill acquisition on the basis of a nested multi-agent architecture Conference 2010 IEEE International Conference on Robotics and Biomimetics, ROBIO 2010, 2010, ISBN: 9781424493173. |
Data-Driven Sub-Units and Modeling Structure for Continuous Sign Language Recognition with Multiple Cues Conference Proceedings of the Language Resources and Evaluation Conference Workshop on the Representation and Processing of Sign Languages : Corpora and Sign Languages Technologies, 2010. |
Tensor-based image diffusions derived from generalizations of the total variation and beltrami functionals Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. |
Affine-invariant modeling of shape-appearance images applied on sign language handshape classification Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. |
Model-level data-driven sub-units for signs in videos of continuous sign language Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2010, ISSN: 15206149. |
Human Telehaptic Perception of Stiffness using an Adaptive Impedance Re fl ection Bilateral Teleoperation Control Scheme Conference In: Proceedings of the IEEE International Workshop on Robot and Human Interactive Communication, ROMAN-2010, 2010, ISBN: 9781424479894. |
2009 |
Synergy between object recognition and image segmentation using the expectation-maximization algorithm Journal Article IEEE Transactions on Pattern Analysis and Machine Intelligence, 31 (8), pp. 1486–1501, 2009, ISSN: 01628828. |
Analysis and classification of speech signals by generalized fractal dimension features Journal Article Speech Communication, 51 (12), pp. 1206–1223, 2009, ISSN: 01676393. |
Adaptive multimodal fusion by uncertainty compensation with application to audiovisual speech recognition Journal Article IEEE Transactions on Audio, Speech and Language Processing, 17 (3), pp. 423–435, 2009, ISSN: 15587916. |
Texture Analysis and Segmentation Using Modulation Features, Generative Models, and Weighted Curve Evolution Journal Article IEEE Transactions on Pattern Analysis and Machine Intelligence, 31 (1), pp. 142-157, 2009, ISSN: 0162-8828. |
Face active appearance modeling and speech acoustic information to recover articulation Journal Article IEEE Transactions on Audio, Speech and Language Processing, 17 (3), pp. 411–422, 2009, ISSN: 15587916. |
A comparison of the squared energy and teager-kaiser operators for short-term energy estimation in additive noise Journal Article IEEE Transactions on Signal Processing, 57 (7), pp. 2569–2581, 2009, ISSN: 1053587X. |
Reversible interpolation of vectorial images by an anisotropic diffusion-projection PDE Journal Article International Journal of Computer Vision, 84 (2), pp. 130–145, 2009, ISSN: 09205691. |
Bayesian inference on multiscale models for poisson intensity estimation: Applications to photon-limited image denoising Journal Article IEEE Transactions on Image Processing, 18 (8), pp. 1724–1741, 2009, ISSN: 10577149. |
Video Event Detection and Summarization Using Audio, Visual and Text Saliency Conference Taipei, Taiwan, 2009. |
Video Event Detection and Summarization using Audio, Visual and Text Saliency Conference Icassp, (2), 2009, ISBN: 9781424423545. |
DIANOEMA: Visual analysis and sign recognition for GSL modelling and robot teleoperation Conference Language and Speech, 2009. |
Poisson-haar transform: A nonlinear multiscale representation for photon-limited image denoising Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. |
Overview of adaptive morphology: Trends and perspectives Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. |
Tongue tracking in ultrasound images with active appearance models Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. |
Product-HMMS for automatic sign language recognition Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2009, ISSN: 15206149. |
Telehaptics: issues of control stability and human perception in remote kineasthetic exploration Conference 18th IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN 2009), Workshop on Robot-Human Synergies, Sept. 27 - Oct. 2 2009, 2009. |
Gestural teleoperation of a mobile robot based on visual recognition of sign language static handshapes Conference Proceedings - IEEE International Workshop on Robot and Human Interactive Communication, 2009, ISSN: 19449445. |
Stereo pair matching of archaeological scenes using phase domain methods Conference IMAGAPP 2009 - Proceedings of the 1st International Conference on Computer Imaging Theory and Applications, 2009, ISBN: 9789898111685. |
GridNews: A distributed automatic Greek broadcast transcription system Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2009, ISSN: 15206149. |
2008 |
Pilot Evaluation Study of a Virtual Paracentesis Simulator for Skill Training and Assessment: The Beneficial Effect of Haptic Display Journal Article Presence: Teleoperators & Virtual Environments, 17 (2), pp. 212–229, 2008, ISBN: 10547460. |
Generalized flooding and multicue PDE-based image segmentation Journal Article IEEE Transactions on Image Processing, 17 (3), pp. 364–376, 2008, ISSN: 10577149. |
Computational Analysis and Learning for a Biologically Motivated Model of Boundary Detection and Image Segmentation Journal Article Neurocomputing, 71 (10-12), pp. 1798–1812, 2008. |
Audio-assisted movie dialogue detection Journal Article IEEE Transactions on Circuits and Systems for Video Technology, 18 (11), pp. 1618–1627, 2008, ISSN: 10518215. |
Movie Summarization based on Audiovisual Saliency Detection Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, San Diego, CA, U.S.A., 2008. |
MUSCLE Movie Database: A Multimodal Corpus With Rich Annotation For Dialogue And Saliency Detection Conference Marrakech, Morocco, 2008. |
Web-based remote and virtual programming console of the V+ robotic system Conference IECON Proceedings (Industrial Electronics Conference), 2008, ISSN: 1553-572X. |
Multimodality acquisition of articulatory data and processing Conference European Signal Processing Conference, 2008, ISSN: 22195491. |
Automatic sign language recognition Conference Proceedings of the 1st ACM international conference on PErvasive Technologies Related to Assistive Environments - PETRA '08, 2008, ISBN: 9781605580678. |
Geodesic active regions for segmentation and tracking of human gestures in sign language videos Conference 2008 15th IEEE International Conference on Image Processing, 2008, ISSN: 1522-4880. |
Texture Modulation-Constrained Image Decomposition Conference Proc. Int'l Conference on Image Processing (ICIP-2008), San Diego, California, Oct. 2008, 2008. |
Movie summarization based on audiovisual saliency detection Conference Proceedings - International Conference on Image Processing, ICIP, 2008, ISSN: 15224880. |
Image Decomposition into Structure and Texture Subcomponents with Conference Image (Rochester, N.Y.), 2008, ISBN: 9781424422432. |
Multi-agent hierarchical architecture modeling kinematic chains employing continuous RL learning with fuzzified state space Conference Proceedings of the 2nd Biennial IEEE/RAS-EMBS International Conference on Biomedical Robotics and Biomechatronics, BioRob 2008, 2008, ISBN: 9781424428830. |
Audiovisual-to-Articulatory Speech Inversion Using Active Appearance Models for the Face and Hidden Markov Models for the Dynamics Conference IEEE Int. Conference on Acoustics, Speech, and Signal Processing, 2008. |
Inversion from audiovisual speech to articulatory information by exploiting multimodal data Conference Proceedings of ISSP 2008 - 8th International Seminar on Speech Production, 2008. |
Audiovisual speech inversion by switching dynamical modeling governed by a Hidden Markov process Conference European Signal Processing Conference, 2008, ISSN: 22195491. |
How can acoustic-to-articulatory maps be constrained? Conference European Signal Processing Conference, 2008, ISSN: 22195491. |
Image inpainting with a wavelet domain hidden Markov tree model Conference Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008, 2008. |
Adaptive and constrained algorithms for inverse compositional active appearance model fitting Conference 26th IEEE Conference on Computer Vision and Pattern Recognition, CVPR, 2008, ISSN: 1063-6919. |
An inpainting system for automatic image structure-texture restoration with text removal Conference Proceedings - International Conference on Image Processing, ICIP, 2008, ISSN: 15224880. |
MUSCLE movie database: A multimodal corpus with rich annotation for dialogue and saliency detection Conference Programme of the Workshop on Multimodal Corpora, 2008. |
PHOTON-LIMITED IMAGE DENOISING BY INFERENCE ON MULTISCALE MODELS Conference Proc. Int’l Conference on Image Processing, 2008, ISBN: 9781424417643. |
MULTISENSOR MULTIBAND CROSS-ENERGY TRACKING FOR FEATURE EXTRACTION AND RECOGNITION Conference Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008, 2008. |
Adaptive impedance control in haptic teleoperation to improve transparency under time-delay Conference Proceedings - IEEE International Conference on Robotics and Automation, 2008, ISSN: 10504729. |
Pilot evaluation study of a virtual paracentesis simulator for skill training and assessment: The beneficial effect of haptic display Conference Presence: Teleoperators and Virtual Environments, 17 (2), 2008, ISSN: 10547460. |
A PDE formulation for viscous morphological operators with extensions to intensity-adaptive operators Conference Proc. Int’l Conference on Image Processing (ICIP-2008), San Diego, California, 2008, ISSN: 15224880. |
Audiovisual Attention Modeling and Salient Event Detection Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–21, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. |
Cross-Modal Integration for Performance Improving in Multimedia: A Review Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–46, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. |
Adaptive Multimodal Fusion by Uncertainty Compensation with Application to Audio-Visual Speech Recognition Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–15, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. |
2007 |
A generalized estimation approach for linear and nonlinear microphone array post-filters Journal Article Speech Communication, 49 (7-8), pp. 657–666, 2007, ISSN: 01676393. |
Multigrid Geometric Active Contour Models Journal Article IEEE Transactions on Image Processing, 16 (1), pp. 229-240, 2007. |
Partial Differential Equations in Image Processing Conference Invited Paper, Proc. European Signal Processing Conf. (EUSIPCO--98), Rhodes, Greece, pp. 527-536, Sep. 1998, (May), 2007. |
Multiband, Multisensor Features for Robust Speech Recognition Conference Proc. Int'l Conf. on Speech Technology and Communication (InterSpeech 2007 EuroSpeech), Antwerp, Belgium, Aug. 2007, 2007. |
Advanced front-end for robust speech recognition in extremely adverse environments Conference Proc. łdots, 2007, ISBN: 9781605603162. |
Audiovisual-to-articulatory speech inversion using HMMs Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, 2007, ISBN: 1424412749. |
Leveling cartoons, texture energy markers, and image decomposition Conference 8th Int. Symp. on Mathematical Morphology, 1 , 2007. |
Temporal Occupancy Grid for mobile robot dynamic environment mapping Conference 2007 Mediterranean Conference on Control and Automation, MED, 2007, ISBN: 142441282X. |
Maximum likelihood SLAM in dynamic environments Conference Proceedings - International Conference on Tools with Artificial Intelligence, ICTAI, 1 , 2007, ISSN: 10823409. |
Multimodal fusion and learning with uncertain features applied to audiovisual speech recognition Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, (November 2015), 2007, ISBN: 1424412749. |
An audio-visual saliency model for movie summarization Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, 2007, ISBN: 1424412749. |
Vector-Valued Image Interpolation by an Anisotropic Diffusion-Projection PDE Conference Ssvm, LNCS 4885 , 2007, ISSN: 03029743. |
Generalized Watershed and PDEs for Geometric- Textural Segmentation Conference Proceedings of 8th International Symposium on Mathematical Morphology (ISMM 2007), Rio de Janeiro, Brazil, Oct. 2007. Published in Mathematical Morphology and its Applications to Signal and Image Processing, edited by G.J.F. Banon, J. Barrera and U. Braga-Neto, MCT/INPE, Brazil, 2007, pp.411-422, 2007. |
2006 |
Virtual and remote robotic laboratory: Comparative experimental evaluation Journal Article IEEE Transactions on Education, 49 (3), pp. 360–369, 2006, ISSN: 00189359. |