2024 |
L Liapi, E Manoudi, M Revelou, K Christodoulou, P Koutras, P Maragos, Argiro Vatakis Time perception in film viewing: A modulation of scene's duration estimates as a function of film editing Journal Article Acta Psychologica, 244 , pp. 104206, 2024. @article{liapi2024time, title = {Time perception in film viewing: A modulation of scene's duration estimates as a function of film editing}, author = {L Liapi and E Manoudi and M Revelou and K Christodoulou and P Koutras and P Maragos and Argiro Vatakis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2024_LiapiVataki_TimePerceptionInFilmViewing-ModulationOfSceneDuration_ActaPsychol.pdf}, doi = {10.1016/j.actpsy.2024.104206}, year = {2024}, date = {2024-01-01}, journal = {Acta Psychologica}, volume = {244}, pages = {104206}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, P P Filntisis, N Efthymiou, C Garoufis, G Retsinas, T Sounapoglou, I Maglogiannis, P Tsanakas, N Smyrnis, P Maragos Person Identification and Relapse Detection from Continuous Recordings of Biosignals Challenge: Overview and Results Journal Article IEEE Open Journal of Signal Processing, 2024. @article{zlatintsi2024person, title = {Person Identification and Relapse Detection from Continuous Recordings of Biosignals Challenge: Overview and Results}, author = {A Zlatintsi and P P Filntisis and N Efthymiou and C Garoufis and G Retsinas and T Sounapoglou and I Maglogiannis and P Tsanakas and N Smyrnis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Zlatintsi_e-PreventionChallengeOverview_OJSP-2024_preprint.pdf}, doi = {10.1109/OJSP.2024.3376300}, year = {2024}, date = {2024-01-01}, journal = {IEEE Open Journal of Signal Processing}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Manetas, P Mermigkas, P Maragos SDPL-SLAM: Introducing Lines in Dynamic Visual SLAM and Multi-Object Tracking Inproceedings Proc. IEEE/RSJ Int'l Conf. Intelligent Robots and Systems (IROS 2024), Abu Dhabi, UAE, 2024. @inproceedings{Manetas2024, title = {SDPL-SLAM: Introducing Lines in Dynamic Visual SLAM and Multi-Object Tracking}, author = {A Manetas and P Mermigkas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2024_Manetas_IntroduceLinesInVisualSLAM-MultiObjectTrack_IROS.pdf}, year = {2024}, date = {2024-10-01}, booktitle = {Proc. IEEE/RSJ Int'l Conf. Intelligent Robots and Systems (IROS 2024)}, address = {Abu Dhabi, UAE}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C Garoufis, A Zlatintsi, P Maragos Pre-Training Music Classification Models via Music Source Separation Inproceedings Proc. 32nd European Signal Processing Conference (EUSIPCO 2024), Lyon, France, 2024. @inproceedings{Garoufis2024, title = {Pre-Training Music Classification Models via Music Source Separation}, author = {C Garoufis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2024_GaroufisPretrainMusicClassifModelsViaMSS_EUSIPCO.pdf}, year = {2024}, date = {2024-08-01}, booktitle = {Proc. 32nd European Signal Processing Conference (EUSIPCO 2024)}, address = {Lyon, France}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
George Retsinas, Panagiotis P Filntisis, Radek Danecek, Victoria F Abrevaya, Anastasios Roussos, Timo Bolkart, Petros Maragos 3D Facial Expressions through Analysis-by-Neural-Synthesis Inproceedings Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2490-2501, 2024. @inproceedings{Retsinas2024, title = {3D Facial Expressions through Analysis-by-Neural-Synthesis}, author = {George Retsinas and Panagiotis P Filntisis and Radek Danecek and Victoria F Abrevaya and Anastasios Roussos and Timo Bolkart and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Retsinas_SMIRK-3D_Facial_Expressions_through_Analysis-by-Neural-Synthesis_CVPR2024.pdf}, year = {2024}, date = {2024-06-17}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, pages = {2490-2501}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
I Kordonis, E Theodosis, G Retsinas, P Maragos Matrix Factorization in Tropical and Mixed Tropical-Linear Algebras Inproceedings Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seoul, Korea, 2024. @inproceedings{Kordonis2024, title = {Matrix Factorization in Tropical and Mixed Tropical-Linear Algebras}, author = {I Kordonis and E Theodosis and G Retsinas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Kordonis_MatrixFactorizationInMixedTropicalLinearAlegbras_ICASSP2024.pdf}, year = {2024}, date = {2024-04-01}, booktitle = {Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)}, address = {Seoul, Korea}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
N Efthymiou, G Retsinas, P P Filntisis, P Maragos Augmenting Transformer Autoencoders with Phenotype Classification for Robust Detection of Psychotic Relapses Inproceedings Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seoul, Korea, 2024. @inproceedings{Efthymiou2024, title = {Augmenting Transformer Autoencoders with Phenotype Classification for Robust Detection of Psychotic Relapses}, author = {N Efthymiou and G Retsinas and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Efthymiou_TransformerPhenotypeClassif-DetectPsychoticRelapses_ICASSP2024.pdf}, year = {2024}, date = {2024-04-01}, booktitle = {Proc. IEEE Int’l Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)}, address = {Seoul, Korea}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2023 |
K Papadimitriou, G Potamianos, G Sapountzaki, T Goulas, E Efthimiou, S ‐E Fotinea, P Maragos Greek sign language recognition for an education platform Journal Article Universal Access in the Information Society, 2023. @article{papadimitriou2023greek, title = {Greek sign language recognition for an education platform}, author = {K Papadimitriou and G Potamianos and G Sapountzaki and T Goulas and E Efthimiou and S ‐E Fotinea and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2023_Papadimitriou_GreekSignLangRecognForEducation_UAIS.pdf}, doi = {10.1007/s10209-023-01017-7}, year = {2023}, date = {2023-01-01}, journal = {Universal Access in the Information Society}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
G Retsinas, N Efthymiou, D Anagnostopoulou, P Maragos Mushroom Detection and Three Dimensional Pose Estimation from Multi-View Point Clouds Journal Article Sensors, 23 (7), pp. 3576, 2023. @article{retsinas2023mushroom, title = {Mushroom Detection and Three Dimensional Pose Estimation from Multi-View Point Clouds}, author = {G Retsinas and N Efthymiou and D Anagnostopoulou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2023_Retsinas_MushromDetection-and-3D-PoseEstimation_Sensors.pdf}, doi = {10.3390/s23073576}, year = {2023}, date = {2023-01-01}, journal = {Sensors}, volume = {23}, number = {7}, pages = {3576}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
E Kalisperakis, T Karantinos, M Lazaridi, V Garyfalli, P P Filntisis, A Zlatintsi, N Efthymiou, A Mantas, L Mantonakis, T Mougiakos, I Maglogiannis, P Tsanakas, P Maragos, N Smyrnis Smartwatch digital phenotypes predict positive and negative symptom variation in a longitudinal monitoring study of patients with psychotic disorders Journal Article Frontiers in Psychiatry, 14 , 2023. @article{kalisperakis2023smartwatch, title = {Smartwatch digital phenotypes predict positive and negative symptom variation in a longitudinal monitoring study of patients with psychotic disorders}, author = {E Kalisperakis and T Karantinos and M Lazaridi and V Garyfalli and P P Filntisis and A Zlatintsi and N Efthymiou and A Mantas and L Mantonakis and T Mougiakos and I Maglogiannis and P Tsanakas and P Maragos and N Smyrnis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2023_KalisperakisEtAl_ePrevention_FrontiersPsychiatry.pdf}, doi = {10.3389/fpsyt.2023.1024965}, year = {2023}, date = {2023-01-01}, journal = {Frontiers in Psychiatry}, volume = {14}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
C Garoufis, A Zlatintsi, P Maragos Multi-Source Contrastive Learning from Musical Audio Conference Proc. 20th Sound and Music Computing Conference (SMC 2023), Stockholm, Sweden, 2023. BibTeX | Links: [PDF] [Poster] [Slides] @conference{garoufis2023multi, title = {Multi-Source Contrastive Learning from Musical Audio}, author = {C Garoufis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_SMC2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_SMC2023_poster.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_SMC2023_slides.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 20th Sound and Music Computing Conference (SMC 2023)}, address = {Stockholm, Sweden}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D Charitou, C Garoufis, A Zlatintsi, P Maragos Exploring Polyphonic Accompaniment Generation using Generative Adversarial Networks Conference Proc. 20th Sound and Music Computing Conference (SMC 2023), Stockholm, Sweden, 2023. BibTeX | Links: [PDF] [Slides] [Poster] @conference{charitou2023exploring, title = {Exploring Polyphonic Accompaniment Generation using Generative Adversarial Networks}, author = {D Charitou and C Garoufis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Charitou_SMC2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Charitou_SMC2023_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Charitou_SMC2023_poster.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 20th Sound and Music Computing Conference (SMC 2023)}, address = {Stockholm, Sweden}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
N Efthymiou, G Retsinas, P P Filntisis, A Zlatintsi, E Kalisperakis, V Garyfalli, T Karantinos, M Lazaridi, N Smyrnis, P Maragos From Digital Phenotype Identification To Detection Of Psychotic Relapses Conference Proc. IEEE International Conference on Healthcare Informatics, Houston, TX, USA, 2023. BibTeX | Links: [PDF] [Slides] @conference{efthymiou2023digital, title = {From Digital Phenotype Identification To Detection Of Psychotic Relapses}, author = {N Efthymiou and G Retsinas and P P Filntisis and A Zlatintsi and E Kalisperakis and V Garyfalli and T Karantinos and M Lazaridi and N Smyrnis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Efthymiou_ICHI2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Efthymiou_ICHI2023_slides.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. IEEE International Conference on Healthcare Informatics}, address = {Houston, TX, USA}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D Anagnostopoulou, G Retsinas, N Efthymiou, P P Filntisis, P Maragos A Realistic Synthetic Mushroom Scenes Dataset Conference Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop, Vancouver, Canada, 2023. BibTeX | Links: [PDF] [Poster] @conference{anagnostopoulou2023realistic, title = {A Realistic Synthetic Mushroom Scenes Dataset}, author = {D Anagnostopoulou and G Retsinas and N Efthymiou and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_CVPRW2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_CVPRW2023_poster.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop}, address = {Vancouver, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Retsinas, N Efthymiou, P Maragos Mushroom Segmentation and 3D Pose Estimation From Point Clouds Using Fully Convolutional Geometric Features and Implicit Pose Encoding Conference Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop, Vancouver, Canada, 2023. @conference{retsinas2023mushroomb, title = {Mushroom Segmentation and 3D Pose Estimation From Point Clouds Using Fully Convolutional Geometric Features and Implicit Pose Encoding}, author = {G Retsinas and N Efthymiou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Retsinas_CVPRW2023_Mushroom_Segmentation_and_3D_Pose_Estimation_From_Point_Clouds_Using_paper.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4th Agriculture Vision Workshop}, address = {Vancouver, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P P Filntisis, G Retsinas, F Paraperas-Papantoniou, A Katsamanis, A Roussos, P Maragos SPECTRE: Visual Speech-Informed Perceptual 3D Facial Expression Reconstruction from Videos Conference Proc. 2023 IEEE/CVF Conf. on Computer Vision and Pattern Recognition (CVPR), 5th Workshop and Competition on Affective Behavior Analysis in-the-wild (ABAW), Vancouver, Canada, 2023. @conference{filntisis2023spectre, title = {SPECTRE: Visual Speech-Informed Perceptual 3D Facial Expression Reconstruction from Videos}, author = {P P Filntisis and G Retsinas and F Paraperas-Papantoniou and A Katsamanis and A Roussos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Filntisis_CVPRW2023_SPECTRE_Visual_Speech-Informed_Perceptual_3D_Facial_Expression_Reconstruction_From_Videos_paper.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 2023 IEEE/CVF Conf. on Computer Vision and Pattern Recognition (CVPR), 5th Workshop and Competition on Affective Behavior Analysis in-the-wild (ABAW)}, address = {Vancouver, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C O Tze, P P Filntisis, A -L Dimou, A Roussos, P Maragos Neural Sign Reenactor: Deep Photorealistic Sign Language Retargeting Conference Proc. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), AI for Content Creation Workshop (AI4CC), Vancouver, Canada, 2023. BibTeX | Links: [PDF] [Poster] @conference{tze2023neural, title = {Neural Sign Reenactor: Deep Photorealistic Sign Language Retargeting}, author = {C O Tze and P P Filntisis and A -L Dimou and A Roussos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Tze_CVPRW2023_Neural_Sign_Reenactor_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Tze_CVPRW2023_Neural_Sign_Reenactor_Poster.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), AI for Content Creation Workshop (AI4CC)}, address = {Vancouver, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Retsinas, G Sfikas, P P Filntisis, P Maragos Newton-based Trainable Learning Rate Conference Proc. 48th IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2023), Rhodes, Greece, 2023. @conference{retsinas2023newton, title = {Newton-based Trainable Learning Rate}, author = {G Retsinas and G Sfikas and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Retsinas_ICASSP2023_Newton-Based-Trainable-Learning-Rate.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 48th IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2023)}, address = {Rhodes, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
E Fekas, A Zlatintsi, P P Filntisis, C Garoufis, N Efthymiou, P Maragos Relapse Prediction from Long-Term Wearable Data using Self-Supervised Learning and Survival Analysis Conference Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023), Rhodes Island, 2023. BibTeX | Links: [PDF] [Slides] @conference{fekas2023relapse, title = {Relapse Prediction from Long-Term Wearable Data using Self-Supervised Learning and Survival Analysis}, author = {E Fekas and A Zlatintsi and P P Filntisis and C Garoufis and N Efthymiou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Fekas_ICASSP2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Fekas_ICASSP2023_slides.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023)}, address = {Rhodes Island}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D N Makropoulos, A Tsiami, A Prospathopoulos, D Kassis, A Frantzzis, E Skarsoulis, G Piperakis, P Maragos Convolutional Recurrent Neural Networks for the Classification of Cetacean Bioacoustic Patterns Conference Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023), Rhodes Island, 2023. BibTeX | Links: [PDF] [Poster] @conference{makropoulos2023convolutional, title = {Convolutional Recurrent Neural Networks for the Classification of Cetacean Bioacoustic Patterns}, author = {D N Makropoulos and A Tsiami and A Prospathopoulos and D Kassis and A Frantzzis and E Skarsoulis and G Piperakis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Makropoulos_ICASSP2023_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Makropoulos_ICASSP2023_poster.pdf}, year = {2023}, date = {2023-06-01}, booktitle = {Proc. 48th Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-2023)}, address = {Rhodes Island}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Tzathas, P Maragos, A Roussos 3D Neural Sculpting (3DNS): Editing Neural Signed Distance Functions Conference Proc. IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 2023. @conference{tzathas2023neural, title = {3D Neural Sculpting (3DNS): Editing Neural Signed Distance Functions}, author = {P Tzathas and P Maragos and A Roussos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Tzathas_3D_Neural_Sculpting_3DNS_Editing_Neural_Signed_Distance_Functions_WACV2023.pdf}, year = {2023}, date = {2023-01-01}, booktitle = {Proc. IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
E Tsaprazlis, G Smyrnis, A G Dimakis, P Maragos Enhancing CLIP with a Third Modality Inproceedings Proc. 37th Conference on Neural Information Processing Systems (NeurIPS 2023): Workshop on Self-Supervised Learning - Theory and Practice, New Orleans, 2023. @inproceedings{Tsaprazlis2023, title = {Enhancing CLIP with a Third Modality}, author = {E Tsaprazlis and G Smyrnis and A G Dimakis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/TsaprazlisEtAl_Enhance-CLIP-with-ThirdModality_NIPSW2023.pdf}, year = {2023}, date = {2023-12-01}, booktitle = {Proc. 37th Conference on Neural Information Processing Systems (NeurIPS 2023): Workshop on Self-Supervised Learning - Theory and Practice}, address = {New Orleans}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
A Glentis-Georgoulakis, G Retsinas, P Maragos Feather: An Elegant Solution to Effective DNN Sparsification Inproceedings Proc. 34th Bristish Machine Vision Conference (BMVC 2023), Aberdeen, UK, 2023. @inproceedings{Glentis-Georgoulakis2023, title = {Feather: An Elegant Solution to Effective DNN Sparsification}, author = {A Glentis-Georgoulakis and G Retsinas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/GlentisEtAl_Feather-EffectiveSolution-to-DNN-Sparsification_BMVC2023.pdf}, year = {2023}, date = {2023-11-01}, booktitle = {Proc. 34th Bristish Machine Vision Conference (BMVC 2023)}, address = {Aberdeen, UK}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
M Konstantinou, G Retsinas, P Maragos Enhancing Action Recognition in Vehicle Environments With Human Pose Information Inproceedings Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023), 2023. @inproceedings{Konstantinou2023, title = {Enhancing Action Recognition in Vehicle Environments With Human Pose Information}, author = {M Konstantinou and G Retsinas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Konstantinou_ActionRecogn-in-VehicleEnvironment_PETRA2023.pdf}, year = {2023}, date = {2023-07-01}, booktitle = {Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
N Kegkeroglou, P P Filntisis, P Maragos Medical Face Masks and Emotion Recognition from the Body: Insights from a Deep Learning Perspective Inproceedings Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023), 2023. @inproceedings{Kegkeroglou2023, title = {Medical Face Masks and Emotion Recognition from the Body: Insights from a Deep Learning Perspective}, author = {N Kegkeroglou and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Kegkeroglou_MedicalFaceMasks-EmotionRecognFromBody_PETRA2023.pdf}, year = {2023}, date = {2023-07-01}, booktitle = {Proc. Int'l Conf. on Pervasive Technologies Related to Assistive Environments (PETRA 2023)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2022 |
R Dromnelle, E Renaudo, M Chetouani, P Maragos, R Chatila, B Girard, M Khamassi Reducing Computational Cost During Robot Navigation and Human-Robot Interaction with a Human-Inspired Reinforcement Learning Architectures Journal Article International Journal of Social Robotics, 2022. @article{dromnelle2022reducing, title = {Reducing Computational Cost During Robot Navigation and Human-Robot Interaction with a Human-Inspired Reinforcement Learning Architectures}, author = {R Dromnelle and E Renaudo and M Chetouani and P Maragos and R Chatila and B Girard and M Khamassi}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_DromnelleEtAl_RL-ReduceComputationRobotNavigation-HRI_IJSR.pdf}, doi = {10.1007/s12369-022-00942-6}, year = {2022}, date = {2022-01-01}, journal = {International Journal of Social Robotics}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, P P Filntisis, C Garoufis, N Efthymiou, P Maragos, A Manychtas, I Maglogiannis, P Tsanakas, T Sounapoglou, E Kalisperakis, T Karantinos, M Lazaridi, V Garyfali, A Mantas, L Mantonakis, N Smyrnis e-Prevention: Advanced Support System for Monitoring and Relapse Prevention in Patients with Psychotic Disorders Analysing Long-Term Multimodal Data from Wearables and Video Captures Journal Article Sensors, 22 (19), pp. 7544, 2022. @article{zlatintsi2022e-prevention, title = {e-Prevention: Advanced Support System for Monitoring and Relapse Prevention in Patients with Psychotic Disorders Analysing Long-Term Multimodal Data from Wearables and Video Captures}, author = {A Zlatintsi and P P Filntisis and C Garoufis and N Efthymiou and P Maragos and A Manychtas and I Maglogiannis and P Tsanakas and T Sounapoglou and E Kalisperakis and T Karantinos and M Lazaridi and V Garyfali and A Mantas and L Mantonakis and N Smyrnis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_ZlatintsiEtAl_EPrevention_SENSORS2022.pdf}, doi = {10.3390/s22197544}, year = {2022}, date = {2022-01-01}, journal = {Sensors}, volume = {22}, number = {19}, pages = {7544}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
N Tsilivis, A Tsiamis, P Maragos Toward a Sparsity Theory on Weighted Lattices Journal Article Journal of Mathematical Imaging and Vision, 2022. @article{tsilivis2022toward, title = {Toward a Sparsity Theory on Weighted Lattices}, author = {N Tsilivis and A Tsiamis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_TsilivisTsiamisMaragos_SparsityTheoryOnWeightedLattices_JMIV.pdf}, doi = {10.1007/s10851-022-01075-1}, year = {2022}, date = {2022-01-01}, journal = {Journal of Mathematical Imaging and Vision}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
N Efthymiou, P P Filntisis, P Koutras, A Tsiami, J Hadfield, G Potamianos, P Maragos ChildBot: Multi-robot perception and interaction with children Journal Article Robotics and Autonomous Systems, 150 , pp. 103975, 2022. @article{efthymiou2022childbot, title = {ChildBot: Multi-robot perception and interaction with children}, author = {N Efthymiou and P P Filntisis and P Koutras and A Tsiami and J Hadfield and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_EfthymiouEtAl_ChildBot-MultiRobotPerception-InteractionChildren_RAS.pdf}, doi = {10.1016/j.robot.2021.103975}, year = {2022}, date = {2022-01-01}, journal = {Robotics and Autonomous Systems}, volume = {150}, pages = {103975}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
D Anagnostopoulou, N Efthymiou, C Papailiou, P Maragos Child Engagement Estimation in Heterogeneous Child-Robot Interactions Using Spatiotemporal Visual Cues Conference Proc. 2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2022), Kyoto, Japan, 2022. @conference{anagnostopoulou2022child, title = {Child Engagement Estimation in Heterogeneous Child-Robot Interactions Using Spatiotemporal Visual Cues}, author = {D Anagnostopoulou and N Efthymiou and C Papailiou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_IROS2022_paper.pdf}, year = {2022}, date = {2022-10-01}, booktitle = {Proc. 2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2022)}, address = {Kyoto, Japan}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C Garoufis, A Zlatintsi, P P Filntisis, N Efthymiou, E Kalisperakis, T Karantinos, V Garyfalli, M Lazaridi, N Smyrnis, P Maragos Towards Unsupervised Subject-Independent Speech-Based Relapse Detection in Patients with Psychosis using Variational Autoencoders Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. BibTeX | Links: [PDF] [Slides] @conference{garoufis2022towards, title = {Towards Unsupervised Subject-Independent Speech-Based Relapse Detection in Patients with Psychosis using Variational Autoencoders}, author = {C Garoufis and A Zlatintsi and P P Filntisis and N Efthymiou and E Kalisperakis and T Karantinos and V Garyfalli and M Lazaridi and N Smyrnis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_GaroufisEtAl_UnsupervisedSpeechBasedRelapseDetectionVAES_EUSIPCO2022.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_SubjectIndependentRelapseDetectionAudioVAEs_EUSIPCO22_slides.pdf}, year = {2022}, date = {2022-09-01}, booktitle = {Proc. 30th European Signal Processing Conference (EUSIPCO)}, address = {Belgrade, Serbia}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
M Panagiotou, A Zlatintsi, P P Filntisis, A J Roumeliotis, N Efthymiou, P Maragos A Comparative Study of Autoencoder Architectures for Mental Health Analysis using Wearable Sensors Data Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. BibTeX | Links: [PDF] [Slides] @conference{panagiotou2022comparative, title = {A Comparative Study of Autoencoder Architectures for Mental Health Analysis using Wearable Sensors Data}, author = {M Panagiotou and A Zlatintsi and P P Filntisis and A J Roumeliotis and N Efthymiou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_PanagiotouEtAl_ComStudyAutoencodersMentalHealthWearables_EUSIPCO2022.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Panagiotou_EUSIPCO2022_Presentation_slides.pdf}, year = {2022}, date = {2022-09-01}, booktitle = {Proc. 30th European Signal Processing Conference (EUSIPCO)}, address = {Belgrade, Serbia}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Papantonakis, C Garoufis, P Maragos Multi-band Masking for Waveform-based Singing Voice Separation Conference Proc. 30th European Signal Processing Conference (EUSIPCO), Belgrade, Serbia, 2022. BibTeX | Links: [PDF] [Poster] @conference{papantonakis2022multi, title = {Multi-band Masking for Waveform-based Singing Voice Separation}, author = {P Papantonakis and C Garoufis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Papantonakis_MultibandMaskingSVS_EUSIPCO22_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Papantonakis_MultibandMaskingSVS_EUSIPCO22_Poster.pdf}, year = {2022}, date = {2022-08-01}, booktitle = {Proc. 30th European Signal Processing Conference (EUSIPCO)}, address = {Belgrade, Serbia}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I Asmanis, P Mermigkas, G Chalvatzaki, J Peters, P Maragos A Semantic Enhancement of Unified Geometric Representations for Improving Indoor Visual SLAM Conference Proc. 19th Int'l Conf. on Ubiquitous Robots (UR 2022), Jeju, Korea, 2022. @conference{asmanis2022semantic, title = {A Semantic Enhancement of Unified Geometric Representations for Improving Indoor Visual SLAM}, author = {I Asmanis and P Mermigkas and G Chalvatzaki and J Peters and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2022_AsmanisMermigkas_SemanticEnhanceGeomRepres-IndoorVisualSLAM_UR.pdf}, year = {2022}, date = {2022-07-01}, booktitle = {Proc. 19th Int'l Conf. on Ubiquitous Robots (UR 2022)}, address = {Jeju, Korea}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Retsinas, P Filntisis, N Kardaris, P Maragos Attribute-based Gesture Recognition: Generalization to Unseen Classes Conference Proc. 14th Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022), Nafplio, Greece, 2022. @conference{retsinas2022attribute, title = {Attribute-based Gesture Recognition: Generalization to Unseen Classes}, author = {G Retsinas and P Filntisis and N Kardaris and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Retsinas_IVMSP2022_paper.pdf}, year = {2022}, date = {2022-06-01}, booktitle = {Proc. 14th Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022)}, address = {Nafplio, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C O Tze, P Filntisis, A Roussos, P Maragos Cartoonized Anonymization of Sign Language Videos Conference Proc. 14th IEEE Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022), Nafplio, Greece, 2022. @conference{tze2022cartoonized, title = {Cartoonized Anonymization of Sign Language Videos}, author = {C O Tze and P Filntisis and A Roussos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Tze_IVMSP2022_Cartoonized-Anonymization-Sign-Videos_paper.pdf}, year = {2022}, date = {2022-06-01}, booktitle = {Proc. 14th IEEE Image, Video, and Multidimensional Signal Processing Workshop (IVMSP 2022)}, address = {Nafplio, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
F Paraperas-Papantoniou, P P Filntisis, P Maragos, A Roussos Neural Emotion Director: Speech-preserving semantic control of facial expressions in “in-the-wild” videos Conference Proc. 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, USA, 2022, (CVPR-2022 Best Paper Finalist). BibTeX | Links: [PDF] [Poster] [Supp] @conference{paraperas2022neural, title = {Neural Emotion Director: Speech-preserving semantic control of facial expressions in “in-the-wild” videos}, author = {F Paraperas-Papantoniou and P P Filntisis and P Maragos and A Roussos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Paraperas_NED-SpeechPreservingSemanticControlFacialExpressions_CVPR2022_paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Paraperas_cvpr2022_NED_poster.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Paraperas_NED_CVPR2022_supplemental-material.pdf}, year = {2022}, date = {2022-06-01}, booktitle = {Proc. 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, address = {New Orleans, USA}, note = {CVPR-2022 Best Paper Finalist}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
K Avramidis, C Garoufis, A Zlatintsi, P Maragos Enhancing Affective Representations of Music-Induced EEG through Multimodal Supervision and Latent Domain Adaptation Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. BibTeX | Links: [PDF] [Poster] @conference{avramidis2022enhancing, title = {Enhancing Affective Representations of Music-Induced EEG through Multimodal Supervision and Latent Domain Adaptation}, author = {K Avramidis and C Garoufis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_MusicEEGCrossModal_ICASSP22_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_ICASSP2022-poster.pdf}, year = {2022}, date = {2022-05-01}, booktitle = {Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Bastas, S Koutoupis, M K.-Papakostas, V Katsouros, P Maragos A Few-sample Strategy for Guitar Tablature Transcription Based on Inharmonicity Analysis and Playability Constraints Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. @conference{bastas2022few, title = {A Few-sample Strategy for Guitar Tablature Transcription Based on Inharmonicity Analysis and Playability Constraints}, author = {G Bastas and S Koutoupis and M K.-Papakostas and V Katsouros and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/BastasKoutoupis_TablatureTranscription_ICASSP22_Paper.pdf}, year = {2022}, date = {2022-05-01}, booktitle = {Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
M Parelli, K Papadimitriou, G Potamianos, G Pavlakos, P Maragos Spatio-Temporal Graph Convolutional Networks for Continuous Sign Language Recognition Conference Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022), 2022. @conference{parelli2022spatio, title = {Spatio-Temporal Graph Convolutional Networks for Continuous Sign Language Recognition}, author = {M Parelli and K Papadimitriou and G Potamianos and G Pavlakos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ParelliPapadimitriou_SignLanguageRecognitionGCNNs_ICASSP22_Paper.pdf}, year = {2022}, date = {2022-05-01}, booktitle = {Proc. 47th IEEE Int’l Conf. on Acoustics, Speech and Signal Processing (ICASSP-2022)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Misiakos, G Smyrnis, G Retsinas, P Maragos Neural Network Approximation based on Hausdorff Distance of Tropical Zonotopes Conference Proc. Int’l Conf. on Learning Representations (ICLR 2022), 2022. BibTeX | Links: [PDF] [Poster] [Slides] @conference{misiakos2022neural, title = {Neural Network Approximation based on Hausdorff Distance of Tropical Zonotopes}, author = {P Misiakos and G Smyrnis and G Retsinas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Misiakos_ICLR2022_TropicalGeometry_paper.pdf https://iclr.cc/virtual/2022/poster/5971 http://robotics.ntua.gr/wp-content/uploads/sites/2/Misiakos_ICLR2022_TropicalGeometry_slides.pdf}, year = {2022}, date = {2022-01-01}, booktitle = {Proc. Int’l Conf. on Learning Representations (ICLR 2022)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2021 |
Petros Maragos, Vasileios Charisopoulos, Emmanouil Theodosis Tropical Geometry and Machine Learning Journal Article Proceedings of the IEEE, 109 (5), pp. 728-755, 2021. @article{MCT21, title = {Tropical Geometry and Machine Learning}, author = {Petros Maragos and Vasileios Charisopoulos and Emmanouil Theodosis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/MaragosCharisopoulosTheodosis_TGML_PIEEE20211.pdf}, doi = {10.1109/JPROC.2021.3065238}, year = {2021}, date = {2021-12-31}, journal = {Proceedings of the IEEE}, volume = {109}, number = {5}, pages = {728-755}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
George Moustris, Nikolaos Kardaris, Antigoni Tsiami, Georgia Chalvatzaki, Petros Koutras, Athanasios Dometios, Paris Oikonomou, Costas Tzafestas, Petros Maragos, Eleni Efthimiou, Xanthi Papageorgiou, Stavroula-Evita Fotinea, Yiannis Koumpouros, Anna Vacalopoulou, Effie Papageorgiou, Alexandra Karavasili, Foteini Koureta, Dimitris Dimou, Alexandros Nikolakakis, Konstantinos Karaiskos, Panagiotis Mavridis The i-Walk Lightweight Assistive Rollator: First Evaluation Study Journal Article Frontiers in Robotics and AI, 8 , pp. 272, 2021, ISSN: 2296-9144. Abstract | BibTeX | Links: [PDF] @article{10.3389/frobt.2021.677542, title = {The i-Walk Lightweight Assistive Rollator: First Evaluation Study}, author = {George Moustris and Nikolaos Kardaris and Antigoni Tsiami and Georgia Chalvatzaki and Petros Koutras and Athanasios Dometios and Paris Oikonomou and Costas Tzafestas and Petros Maragos and Eleni Efthimiou and Xanthi Papageorgiou and Stavroula-Evita Fotinea and Yiannis Koumpouros and Anna Vacalopoulou and Effie Papageorgiou and Alexandra Karavasili and Foteini Koureta and Dimitris Dimou and Alexandros Nikolakakis and Konstantinos Karaiskos and Panagiotis Mavridis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/frobt-08-677542.pdf}, doi = {10.3389/frobt.2021.677542}, issn = {2296-9144}, year = {2021}, date = {2021-12-30}, journal = {Frontiers in Robotics and AI}, volume = {8}, pages = {272}, abstract = {Robots can play a significant role as assistive devices for people with movement impairment and mild cognitive deficit. In this paper we present an overview of the lightweight i-Walk intelligent robotic rollator, which offers cognitive and mobility assistance to the elderly and to people with light to moderate mobility impairment. The utility, usability, safety and technical performance of the device is investigated through a clinical study, which took place at a rehabilitation center in Greece involving real patients with mild to moderate cognitive and mobility impairment. This first evaluation study comprised a set of scenarios in a number of pre-defined use cases, including physical rehabilitation exercises, as well as mobility and ambulation involved in typical daily living activities of the patients. The design and implementation of this study is discussed in detail, along with the obtained results, which include both an objective and a subjective evaluation of the system operation, based on a set of technical performance measures and a validated questionnaire for the analysis of qualitative data, respectively. The study shows that the technical modules performed satisfactory under real conditions, and that the users generally hold very positive views of the platform, considering it safe and reliable.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Robots can play a significant role as assistive devices for people with movement impairment and mild cognitive deficit. In this paper we present an overview of the lightweight i-Walk intelligent robotic rollator, which offers cognitive and mobility assistance to the elderly and to people with light to moderate mobility impairment. The utility, usability, safety and technical performance of the device is investigated through a clinical study, which took place at a rehabilitation center in Greece involving real patients with mild to moderate cognitive and mobility impairment. This first evaluation study comprised a set of scenarios in a number of pre-defined use cases, including physical rehabilitation exercises, as well as mobility and ambulation involved in typical daily living activities of the patients. The design and implementation of this study is discussed in detail, along with the obtained results, which include both an objective and a subjective evaluation of the system operation, based on a set of technical performance measures and a validated questionnaire for the analysis of qualitative data, respectively. The study shows that the technical modules performed satisfactory under real conditions, and that the users generally hold very positive views of the platform, considering it safe and reliable. |
Nikos Melanitis, Petros Maragos A linear method for camera pair self-calibration Journal Article Computer Vision and Image Understanding, 210 , pp. 103223, 2021. @article{MeMa21, title = {A linear method for camera pair self-calibration}, author = {Nikos Melanitis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_MelanitisMaragos_LinearCameraSelfPairCalibration_CVIU.pdf}, doi = {https://doi.org/10.1016/j.cviu.2021.103223}, year = {2021}, date = {2021-09-01}, journal = {Computer Vision and Image Understanding}, volume = {210}, pages = {103223}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
N Efthymiou, P P Filntisis, G Potamianos, P Maragos Visual Robotic Perception System with Incremental Learning for Child–Robot Interaction Scenarios Journal Article Technologies, 9 (4), pp. 86, 2021. @article{efthymiou2021visual, title = {Visual Robotic Perception System with Incremental Learning for Child–Robot Interaction Scenarios}, author = {N Efthymiou and P P Filntisis and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_EfthymiouEtAl_VisualRobotPerceptionSystem-ChildRobotInteract_Technologies-1.pdf}, doi = {10.3390/technologies9040086}, year = {2021}, date = {2021-01-01}, journal = {Technologies}, volume = {9}, number = {4}, pages = {86}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
M. Diomataris, N. Gkanatsios, V. Pitsikalis, P. Maragos Grounding Consistency: Distilling Spatial Common Sense for Precise Visual Relationship Detection Conference Proceedings of International Conference on Computer Vision (ICCV-2021), 2021. @conference{Diomataris2021, title = {Grounding Consistency: Distilling Spatial Common Sense for Precise Visual Relationship Detection}, author = {M. Diomataris, N. Gkanatsios, V. Pitsikalis and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/DiomatarisEtAl_GroundingConsistency-VisualRelationsDetection_ICCV2021.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/DiomatarisEtAl_GroundingConsistency-VisualRelationsDetection_ICCV2021_supp.pdf}, year = {2021}, date = {2021-12-31}, booktitle = {Proceedings of International Conference on Computer Vision (ICCV-2021)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Antoniadis, P P Filntisis, P Maragos Exploiting Emotional Dependencies with Graph Convolutional Networks for Facial Expression Recognition Conference Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021), 2021. @conference{Antoniadis2021, title = {Exploiting Emotional Dependencies with Graph Convolutional Networks for Facial Expression Recognition}, author = {P Antoniadis and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_AntoniadisEtAl_Emotion-GCN-FacialExpressionRecogn_FG-1.pdf}, year = {2021}, date = {2021-12-01}, booktitle = {Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I Pikoulis, P P Filntisis, P Maragos Leveraging Semantic Scene Characteristics and Multi-Stream Convolutional Architectures in a Contextual Approach for Video-Based Visual Emotion Recognition in the Wild Conference Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021), 2021. @conference{Pikoulis2021, title = {Leveraging Semantic Scene Characteristics and Multi-Stream Convolutional Architectures in a Contextual Approach for Video-Based Visual Emotion Recognition in the Wild}, author = {I Pikoulis and P P Filntisis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_PikoulisEtAl_VideoEmotionRecognInTheWild-SemanticMultiStreamContext_FG-1.pdf}, year = {2021}, date = {2021-12-01}, booktitle = {Proc. 16th IEEE Int’l Conf. on Automatic Face and Gesture Recognition (FG-2021)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
V. Vasileiou, N. Kardaris, P. Maragos Exploring Temporal Context and Human Movement Dynamics for Online Action Detection in Videos Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [PDF] [Slides] @conference{Vasileiou2021, title = {Exploring Temporal Context and Human Movement Dynamics for Online Action Detection in Videos}, author = {V. Vasileiou, N. Kardaris and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Vasileiou_EUSIPCO21_Enhancing_temporal_context_for_online_action_detection_in_videos_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Vasileiou_EUSIPCO21_presentation_slides.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. P. Filntisis, N. Efthymiou, G. Potamianos,, P. Maragos An Audiovisual Child Emotion Recognition System for Child-Robot Interaction Applications Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [Slides] [PDF] @conference{Filntisis2021, title = {An Audiovisual Child Emotion Recognition System for Child-Robot Interaction Applications}, author = {P. P. Filntisis, N. Efthymiou, G. Potamianos, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Filntisis_EUSIPCO2021_ChildEmotionRecogn_presentation_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_FilntisisEtAl_AV-ChildEmotionRecognSystem-ChildRobotInteract_EUSIPCO.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Garoufis, A. Zlatintsi,, P. Maragos HTMD-NET: A Hybrid Masking-Denoising Approach to Time-Domain Monaural Singing Voice Separation Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [Slides] [PDF] @conference{Garoufis2021, title = {HTMD-NET: A Hybrid Masking-Denoising Approach to Time-Domain Monaural Singing Voice Separation}, author = {C. Garoufis, A. Zlatintsi, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_EUSIPCO2021_HTMDNet_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_EUSIPCO2021_HTMDNet1_Paper.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
K. Avramidis, A. Zlatintsi, C. Garoufis,, P. Maragos Multiscale Fractal Analysis on EEG Signals for Music-Induced Emotion Recognition Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [Slides] [PDF] @conference{Avramidis2021, title = {Multiscale Fractal Analysis on EEG Signals for Music-Induced Emotion Recognition}, author = {K. Avramidis, A. Zlatintsi, C. Garoufis, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_EUSIPCO2021_MFA-EEG-MusicEmotion_presentation_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_AvramidisEtAl_MFA-EEG_MusicEmotionRecogn_EUSIPCO.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Giannoulis, G. Potamianos,, P. Maragos Overlapped Sound Event Classification via Multi- Channel Sound Separation Network Conference Proc. 29th European Signal Processing Conference (EUSIPCO 2021), Dublin, Ireland, 2021. BibTeX | Links: [PDF] [Slides] @conference{Giannoulis2021, title = {Overlapped Sound Event Classification via Multi- Channel Sound Separation Network}, author = {P. Giannoulis, G. Potamianos, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Giannoulis_EUSIPCO21_OverlapSoundEventClassif_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Giannoulis_EUSIPCO21_presentation_slides.pdf}, year = {2021}, date = {2021-08-31}, booktitle = {Proc. 29th European Signal Processing Conference (EUSIPCO 2021)}, address = {Dublin, Ireland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Garoufis, A. Zlatintsi, P. P. Filntisis, N. Efthymiou, E. Kalisperakis, V. Garyfalli, T. Karantinos, L. Mantonakis, N. Smyrnis, P. Maragos An Unsupervised Learning Approach for Detecting Relapses from Spontaneous Speech in Patients with Psychosis Conference Proc. IEEE-EMBS International Conference on Biomedical and Health Informatics (BHI-2021), 2021. BibTeX | Links: [PDF] [Poster] @conference{Garoufis2021b, title = {An Unsupervised Learning Approach for Detecting Relapses from Spontaneous Speech in Patients with Psychosis}, author = {C. Garoufis, A. Zlatintsi, P. P. Filntisis, N. Efthymiou, E. Kalisperakis, V. Garyfalli, T. Karantinos, L. Mantonakis, N. Smyrnis and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_BHI2021_UnsupervisedLearningRelapseDetection_Paper.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Garoufis_BHI21_Poster.pdf}, year = {2021}, date = {2021-07-31}, booktitle = {Proc. IEEE-EMBS International Conference on Biomedical and Health Informatics (BHI-2021)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Dafni Anagnostopoulou, Niki Efthymiou, Christina Papailiou, Petros Maragos Engagement Estimation During Child Robot Interaction Using DeepConvolutional Networks Focusing on ASD Children Conference Proc. IEEE Int'l Conf. Robotics and Automation (ICRA-2021), Xi'an, 2021. BibTeX | Links: [PDF] [Video] [Slides] @conference{AnagnostopoulouICRA2021, title = {Engagement Estimation During Child Robot Interaction Using DeepConvolutional Networks Focusing on ASD Children}, author = {Dafni Anagnostopoulou and Niki Efthymiou and Christina Papailiou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_Anagnostopoulou_EngagementEstimationChildRobotInteraction_ICRA.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_ICRA21_presentation1.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/Anagnostopoulou_ICRA21_slides1.pdf}, year = {2021}, date = {2021-06-01}, booktitle = {Proc. IEEE Int'l Conf. Robotics and Automation (ICRA-2021)}, address = {Xi'an}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Agelos Kratimenos, Georgios Pavlakos, Petros Maragos Independent Sign Language Recognition with 3D Body, Hands, and Face Reconstruction Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. BibTeX | Links: [PDF] [Slides] [Video] [Poster] @conference{Kratimenos_icassp21, title = {Independent Sign Language Recognition with 3D Body, Hands, and Face Reconstruction}, author = {Agelos Kratimenos and Georgios Pavlakos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_KratimenosPavlakosMaragos_IsolatedSignLangRecogn3Dreconstruct_ICASSP.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Kratimenos_ICASSP2021_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Kratimenos_ICASSP2021_video.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/Kratimenos_ICASSP2021_poster.pdf }, year = {2021}, date = {2021-06-01}, booktitle = {Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021)}, address = {Toronto}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikos Tsilivis, Anastasios Tsiamis, Petros Maragos Sparsity in Max-Plus Algebra and Applications in Multivariate Convex Regression Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. @conference{TTM21, title = {Sparsity in Max-Plus Algebra and Applications in Multivariate Convex Regression}, author = {Nikos Tsilivis and Anastasios Tsiamis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_TsilivisEtAl_SparseTropicalRegression_ICASSP.pdf}, year = {2021}, date = {2021-06-01}, booktitle = {Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021)}, address = {Toronto}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikolaos Dimitriadis, Petros Maragos Advances in Morphological Neural Networks: Training, Pruning and Enforcing Shape Constraints Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. BibTeX | Links: [PDF] [Slides] [Video] [Poster] @conference{DM21, title = {Advances in Morphological Neural Networks: Training, Pruning and Enforcing Shape Constraints}, author = {Nikolaos Dimitriadis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_DimitriadisMaragos_AdvancesMorphologicNeuralNets_ICASSP.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/icassp2021-slides-Dimitriadis-Maragos.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/icassp2021-presentation-Dimitriadis-Maragos.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/icassp2021-poster_Dimitriadis_Maragos.pdf}, year = {2021}, date = {2021-06-01}, booktitle = {Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021)}, address = {Toronto}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Kleanthis Avramidis, Agelos Kratimenos, Christos Garoufis, Athanasia Zlatintsi, Petros Maragos Deep Convolutional and Recurrent Networks for Polyphonic Instrument Classification from Monophonic Raw Audio Waveforms Conference Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021), Toronto, 2021. BibTeX | Links: [PDF] [Slides] [Video] [Poster] @conference{AvramidisIC2021, title = {Deep Convolutional and Recurrent Networks for Polyphonic Instrument Classification from Monophonic Raw Audio Waveforms}, author = {Kleanthis Avramidis and Agelos Kratimenos and Christos Garoufis and Athanasia Zlatintsi and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2021_AvramidisKratimenos_PolyphonicInstrumentClassification_ICASSP.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_ICASSP2021_IC2_Slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_ICASSP2021_IC2_Presentation.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/Avramidis_ICASSP2021_IC2_Poster.pdf}, year = {2021}, date = {2021-06-01}, booktitle = {Proc. 46th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2021)}, address = {Toronto}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Xanthi S Papageorgiou, Athanasios C Dometios, Costas S Tzafestas Towards a User Adaptive Assistive Robot: Learning from Demonstration Using Navigation Functions Conference 2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2021. @conference{papageorgiou2021towards, title = {Towards a User Adaptive Assistive Robot: Learning from Demonstration Using Navigation Functions}, author = {Xanthi S Papageorgiou and Athanasios C Dometios and Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Papageorgiou_NFLearning_IROS2021.pdf}, year = {2021}, date = {2021-01-01}, booktitle = {2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {965-970}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Danai Efstathiou, Georgia Chalvatzaki, Athanasios Dometios, Dionisios Spiliopoulos, Costas S Tzafestas Deep Leg Tracking by Detection and Gait Analysis in 2D Range Data for Intelligent Robotic Assistants Conference 2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2021. BibTeX | Links: [Slides] [PDF] @conference{efstathiou2021deep, title = {Deep Leg Tracking by Detection and Gait Analysis in 2D Range Data for Intelligent Robotic Assistants}, author = {Danai Efstathiou and Georgia Chalvatzaki and Athanasios Dometios and Dionisios Spiliopoulos and Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Efstathiou_DeepLegTracking_IROS2021_slides.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Efstathiou_DeepLegTracking_IROS2021.pdf}, year = {2021}, date = {2021-01-01}, booktitle = {2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {2657-2662}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2020 |
K Kritsis, C Garoufis, A Zlatintsi, M Bouillon, C Acosta, D Martín-Albo, R Piechaud, P Maragos, V Katsouros iMuSciCA Workbench: Web-based Music Activities For Science Education Journal Article Journal of the Audio Engineering Society, 68 (10), pp. 738-746, 2020. @article{kritsis2020imuscia, title = {iMuSciCA Workbench: Web-based Music Activities For Science Education}, author = {K Kritsis and C Garoufis and A Zlatintsi and M Bouillon and C Acosta and D Martín-Albo and R Piechaud and P Maragos and V Katsouros}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_iMuSciCa-WebMusicActivitiesForScienceEducation_JAES.pdf}, doi = {10.17743/jaes.2020.0021}, year = {2020}, date = {2020-10-01}, journal = {Journal of the Audio Engineering Society}, volume = {68}, number = {10}, pages = {738-746}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Christian Werner, Athanasios C Dometios, Costas S Tzafestas, Petros Maragos, Jürgen M Bauer, Klaus Hauer Evaluating the task effectiveness and user satisfaction with different operation modes of an assistive bathing robot in older adults Journal Article Assistive Technology, 0 , 2020, (PMID: 32286163). BibTeX | Links: [Webpage] [PDF] @article{doi:10.1080/10400435.2020.1755744, title = {Evaluating the task effectiveness and user satisfaction with different operation modes of an assistive bathing robot in older adults}, author = {Christian Werner and Athanasios C Dometios and Costas S Tzafestas and Petros Maragos and Jürgen M Bauer and Klaus Hauer}, url = {https://doi.org/10.1080/10400435.2020.1755744 http://robotics.ntua.gr/wp-content/uploads/sites/2/Werner2020_EvaluatingTheTaskEffectivenessAndUserSatisfaction-AssistBathRobot_AssistTechnology.pdf}, doi = {10.1080/10400435.2020.1755744}, year = {2020}, date = {2020-07-08}, journal = {Assistive Technology}, volume = {0}, publisher = {Taylor & Francis}, note = {PMID: 32286163}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, A C Dometios, N Kardaris, I Rodomagoulakis, P Koutras, X Papageorgiou, P Maragos, C S Tzafestas, P Vartholomeos, K Hauer, C Werner, R Annicchiarico, M G Lombardi, F Adriano, T Asfour, A M Sabatini, C Laschi, M Cianchetti, A Güler, I Kokkinos, B Klein, R López I-Support: A robotic platform of an assistive bathing robot for the elderly population Journal Article Robotics and Autonomous Systems, 126 , pp. 103451, 2020, ISSN: 0921-8890. Abstract | BibTeX | Links: [Webpage] [PDF] @article{ZLATINTSI2020103451, title = {I-Support: A robotic platform of an assistive bathing robot for the elderly population}, author = {A Zlatintsi and A C Dometios and N Kardaris and I Rodomagoulakis and P Koutras and X Papageorgiou and P Maragos and C S Tzafestas and P Vartholomeos and K Hauer and C Werner and R Annicchiarico and M G Lombardi and F Adriano and T Asfour and A M Sabatini and C Laschi and M Cianchetti and A Güler and I Kokkinos and B Klein and R López}, url = {http://www.sciencedirect.com/science/article/pii/S0921889019304968 http://robotics.ntua.gr/wp-content/uploads/sites/2/ICCSetal_I-Support_RAS-2020.pdf}, doi = {https://doi.org/10.1016/j.robot.2020.103451}, issn = {0921-8890}, year = {2020}, date = {2020-04-10}, journal = {Robotics and Autonomous Systems}, volume = {126}, pages = {103451}, abstract = {In this paper we present a prototype integrated robotic system, the I-Support bathing robot, that aims at supporting new aspects of assisted daily-living activities on a real-life scenario. The paper focuses on describing and evaluating key novel technological features of the system, with the emphasis on cognitive human–robot interaction modules and their evaluation through a series of clinical validation studies. The I-Support project on its whole has envisioned the development of an innovative, modular, ICT-supported service robotic system that assists frail seniors to safely and independently complete an entire sequence of physically and cognitively demanding bathing tasks, such as properly washing their back and their lower limbs. A variety of innovative technologies have been researched and a set of advanced modules of sensing, cognition, actuation and control have been developed and seamlessly integrated to enable the system to adapt to the target population abilities. These technologies include: human activity monitoring and recognition, adaptation of a motorized chair for safe transfer of the elderly in and out the bathing cabin, a context awareness system that provides full environmental awareness, as well as a prototype soft robotic arm and a set of user-adaptive robot motion planning and control algorithms. This paper focuses in particular on the multimodal action recognition system, developed to monitor, analyze and predict user actions with a high level of accuracy and detail in real-time, which are then interpreted as robotic tasks. In the same framework, the analysis of human actions that have become available through the project’s multimodal audio–gestural dataset, has led to the successful modeling of Human–Robot Communication, achieving an effective and natural interaction between users and the assistive robotic platform. In order to evaluate the I-Support system, two multinational validation studies were conducted under realistic operating conditions in two clinical pilot sites. Some of the findings of these studies are presented and analyzed in the paper, showing good results in terms of: (i) high acceptability regarding the system usability by this particularly challenging target group, the elderly end-users, and (ii) overall task effectiveness of the system in different operating modes.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper we present a prototype integrated robotic system, the I-Support bathing robot, that aims at supporting new aspects of assisted daily-living activities on a real-life scenario. The paper focuses on describing and evaluating key novel technological features of the system, with the emphasis on cognitive human–robot interaction modules and their evaluation through a series of clinical validation studies. The I-Support project on its whole has envisioned the development of an innovative, modular, ICT-supported service robotic system that assists frail seniors to safely and independently complete an entire sequence of physically and cognitively demanding bathing tasks, such as properly washing their back and their lower limbs. A variety of innovative technologies have been researched and a set of advanced modules of sensing, cognition, actuation and control have been developed and seamlessly integrated to enable the system to adapt to the target population abilities. These technologies include: human activity monitoring and recognition, adaptation of a motorized chair for safe transfer of the elderly in and out the bathing cabin, a context awareness system that provides full environmental awareness, as well as a prototype soft robotic arm and a set of user-adaptive robot motion planning and control algorithms. This paper focuses in particular on the multimodal action recognition system, developed to monitor, analyze and predict user actions with a high level of accuracy and detail in real-time, which are then interpreted as robotic tasks. In the same framework, the analysis of human actions that have become available through the project’s multimodal audio–gestural dataset, has led to the successful modeling of Human–Robot Communication, achieving an effective and natural interaction between users and the assistive robotic platform. In order to evaluate the I-Support system, two multinational validation studies were conducted under realistic operating conditions in two clinical pilot sites. Some of the findings of these studies are presented and analyzed in the paper, showing good results in terms of: (i) high acceptability regarding the system usability by this particularly challenging target group, the elderly end-users, and (ii) overall task effectiveness of the system in different operating modes. |
Christian Werner, Nikos Kardaris, Petros Koutras, Athanasia Zlatintsi, Petros Maragos, Jürgen M Bauer, Klaus Hauer Improving gesture-based interaction between an assistive bathing robot and older adults via user training on the gestural commands Journal Article Archives of Gerontology and Geriatrics, 87 , pp. 103996, 2020, ISSN: 0167-4943. Abstract | BibTeX | Links: [Webpage] [PDF] @article{WERNER2020103996, title = {Improving gesture-based interaction between an assistive bathing robot and older adults via user training on the gestural commands}, author = {Christian Werner and Nikos Kardaris and Petros Koutras and Athanasia Zlatintsi and Petros Maragos and Jürgen M Bauer and Klaus Hauer}, url = {http://www.sciencedirect.com/science/article/pii/S0167494319302390 http://robotics.ntua.gr/wp-content/uploads/sites/2/Improving-gesture-based-interaction-between-an-assistive-bathing-robot-and-older-adults-via-user-training-on-the-gestural-commands.pdf}, doi = {https://doi.org/10.1016/j.archger.2019.103996}, issn = {0167-4943}, year = {2020}, date = {2020-03-01}, journal = {Archives of Gerontology and Geriatrics}, volume = {87}, pages = {103996}, abstract = {Background Gesture-based human-robot interaction (HRI) depends on the technical performance of the robot-integrated gesture recognition system (GRS) and on the gestural performance of the robot user, which has been shown to be rather low in older adults. Training of gestural commands (GCs) might improve the quality of older users’ input for gesture-based HRI, which in turn may lead to an overall improved HRI. Objective To evaluate the effects of a user training on gesture-based HRI between an assistive bathing robot and potential elderly robot users. Methods Twenty-five older adults with bathing disability participated in this quasi-experimental, single-group, pre-/post-test study and underwent a specific user training (10−15 min) on GCs for HRI with the assistive bathing robot. Outcomes measured before and after training included participants’ gestural performance assessed by a scoring method of an established test of gesture production (TULIA) and sensor-based gestural performance (SGP) scores derived from the GRS-recorded data, and robot’s command recognition rate (CRR). Results Gestural performance (TULIA = +57.1 ± 56.2 %, SGP scores = +41.1 ± 74.4 %) and CRR (+31.9 ± 51.2 %) significantly improved over training (p < .001). Improvements in gestural performance and CRR were highly associated with each other (r = 0.80–0.81, p < .001). Participants with lower initial gestural performance and higher gerontechnology anxiety benefited most from the training. Conclusions Our study highlights that training in gesture-based HRI with an assistive bathing robot is highly beneficial for the quality of older users’ GCs, leading to higher CRRs of the robot-integrated GRS, and thus to an overall improved HRI.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Gesture-based human-robot interaction (HRI) depends on the technical performance of the robot-integrated gesture recognition system (GRS) and on the gestural performance of the robot user, which has been shown to be rather low in older adults. Training of gestural commands (GCs) might improve the quality of older users’ input for gesture-based HRI, which in turn may lead to an overall improved HRI. Objective To evaluate the effects of a user training on gesture-based HRI between an assistive bathing robot and potential elderly robot users. Methods Twenty-five older adults with bathing disability participated in this quasi-experimental, single-group, pre-/post-test study and underwent a specific user training (10−15 min) on GCs for HRI with the assistive bathing robot. Outcomes measured before and after training included participants’ gestural performance assessed by a scoring method of an established test of gesture production (TULIA) and sensor-based gestural performance (SGP) scores derived from the GRS-recorded data, and robot’s command recognition rate (CRR). Results Gestural performance (TULIA = +57.1 ± 56.2 %, SGP scores = +41.1 ± 74.4 %) and CRR (+31.9 ± 51.2 %) significantly improved over training (p < .001). Improvements in gestural performance and CRR were highly associated with each other (r = 0.80–0.81, p < .001). Participants with lower initial gestural performance and higher gerontechnology anxiety benefited most from the training. Conclusions Our study highlights that training in gesture-based HRI with an assistive bathing robot is highly beneficial for the quality of older users’ GCs, leading to higher CRRs of the robot-integrated GRS, and thus to an overall improved HRI. |
V. Tassopoulou, G. Retsinas,, P. Maragos Enhancing Handwritten Text Recognition with N-gram sequence decomposition and Multitask Learning Conference Intl' Conference Pattern Recognition (ICPR 2020), Milan, Italy, 2020. @conference{Tassopoulou2020, title = {Enhancing Handwritten Text Recognition with N-gram sequence decomposition and Multitask Learning}, author = {V. Tassopoulou and G. Retsinas, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_TassopoulouEtAl_EnhaceHandwrittenTextRecognition-MultitaskLearn_ICPR-1.pdf}, year = {2020}, date = {2020-12-04}, booktitle = {Intl' Conference Pattern Recognition (ICPR 2020)}, address = {Milan, Italy}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikolaos Gkanatsios, Vassilis Pitsikalis, Petros Maragos From Saturation to Zero-Shot Visual Relationship Detection Using Local Context Conference British Machine Vision Conference, Online, 2020. @conference{Gkanatsios2020, title = {From Saturation to Zero-Shot Visual Relationship Detection Using Local Context}, author = {Nikolaos Gkanatsios and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_GPM_VisualRelationDetection_BMCV.pdf}, year = {2020}, date = {2020-09-10}, booktitle = { British Machine Vision Conference}, address = {Online}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgia Chalvatzaki, Petros Koutras, Antigoni Tsiami, Costas S. Tzafestas, Petros Maragos i-Walk Intelligent Assessment System: Activity, Mobility, Intention, Communication Conference Proc. 16th European Computer Vision Conference Workshops (ECCVW) – 8th Int’l Workshop on Assistive Computer Vision and Robotics (ACVR-2020), Online, 2020. @conference{Chalvatzaki2020, title = {i-Walk Intelligent Assessment System: Activity, Mobility, Intention, Communication}, author = {Georgia Chalvatzaki and Petros Koutras and Antigoni Tsiami and Costas S. Tzafestas and Petros Maragos}, url = {https://robotics.ntua.gr/wp-content/uploads/2020_ChalvatzakiKoutrasTsiami+_iWalkIntelligentAssessmentSystem_ECCVW-ACVR.pdf}, year = {2020}, date = {2020-08-01}, booktitle = {Proc. 16th European Computer Vision Conference Workshops (ECCVW) – 8th Int’l Workshop on Assistive Computer Vision and Robotics (ACVR-2020)}, address = {Online}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgios Smyrnis, Petros Maragos Multiclass Neural Network Minimization via Tropical Newton Polytope Approximation Conference International Conference on Machine Learning (ICML), Online, 2020. @conference{Smyrnis2020, title = {Multiclass Neural Network Minimization via Tropical Newton Polytope Approximation}, author = {Georgios Smyrnis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_SmyrnisMaragos_MulticlassNNminimiz-TropicPolytopApproxim_ICML1.pdf}, year = {2020}, date = {2020-07-01}, booktitle = {International Conference on Machine Learning (ICML)}, address = {Online}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I. Maglogiannis, A. Zlatintsi, A. Menychtas, D. Papadimatos, P.P. Filntisis, N. Efthymiou, G. Retsinas, P. Tsanakas,, P. Maragos An intelligent cloud-based platform for effective monitoring of patients with psychotic disorders Conference Int’l Conf. on Artificial Intelligence Applications and Innovation (AIAI-2020), Halkidiki, Greece, 2020. @conference{Maglogiannis2020, title = {An intelligent cloud-based platform for effective monitoring of patients with psychotic disorders}, author = {I. Maglogiannis, A. Zlatintsi, A. Menychtas, D. Papadimatos, P.P. Filntisis, N. Efthymiou, G. Retsinas, P. Tsanakas, and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_MaglogiannisEtAl_e-Prevention_IntelligentCloudPlatform_AIAI-1.pdf}, year = {2020}, date = {2020-06-04}, booktitle = {Int’l Conf. on Artificial Intelligence Applications and Innovation (AIAI-2020)}, address = {Halkidiki, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A. Tsiami, P. Koutras, P. Maragos STAViS: Spatio-Temporal AudioVisual Saliency Network Conference IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Seattle, USA, 2020. @conference{Tsiami2020, title = {STAViS: Spatio-Temporal AudioVisual Saliency Network}, author = {A. Tsiami and P. Koutras and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Tsiami_STAViS_Spatio-Temporal_AudioVisual_Saliency_Network_CVPR_2020_paper.pdf}, year = {2020}, date = {2020-06-01}, booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, address = {Seattle, USA}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G. Potamianos, K. Papadimitriou, E. Efthimiou, S-E Fotinea, G. Sapountzaki, P. Maragos SL-ReDu: Greek Sign Language Recognition for Educational Applications. Project Description and Early Results Conference PETRA '20: Proceedings of the 13th ACM International Conference on PErvasive Technologies Related to Assistive Environments, Corfu, Greece, 2020. @conference{Potamianos2020, title = {SL-ReDu: Greek Sign Language Recognition for Educational Applications. Project Description and Early Results}, author = {G. Potamianos, K. Papadimitriou, E. Efthimiou, S-E Fotinea, G. Sapountzaki and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_PotamianosEtAl_SL-ReDu_PETRA.pdf}, year = {2020}, date = {2020-06-01}, booktitle = {PETRA '20: Proceedings of the 13th ACM International Conference on PErvasive Technologies Related to Assistive Environments}, address = {Corfu, Greece}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C Garoufis, A Zlatintsi, P Maragos An LSTM-Based Dynamic Chord Progression Generation System for Interactive Music Performance Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. @conference{9053992, title = {An LSTM-Based Dynamic Chord Progression Generation System for Interactive Music Performance}, author = {C Garoufis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_GZM_InteractiveChordProgressionGeneration_ICASSP.pdf}, year = {2020}, date = {2020-05-01}, booktitle = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {4502-4506}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Retsinas, P P Filntisis, N Efthymiou, E Theodosis, A Zlatintsi, P Maragos Person Identification Using Deep Convolutional Neural Networks on Short-Term Signals from Wearable Sensors Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. @conference{9053910, title = {Person Identification Using Deep Convolutional Neural Networks on Short-Term Signals from Wearable Sensors}, author = {G Retsinas and P P Filntisis and N Efthymiou and E Theodosis and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/icassp2020_retsinas.pdf}, year = {2020}, date = {2020-05-01}, booktitle = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {3657-3661}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G Smyrnis, P Maragos, G Retsinas Maxpolynomial Division with Application To Neural Network Simplification Conference ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. @conference{9053540, title = {Maxpolynomial Division with Application To Neural Network Simplification}, author = {G Smyrnis and P Maragos and G Retsinas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_SMR_MaxpolynomialDivision-NNsimplification_ICASSP.pdf}, year = {2020}, date = {2020-05-01}, booktitle = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {4192-4196}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos, E Theodosis Multivariate Tropical Regression and Piecewise-Linear Surface Fitting Conference Proc. 45th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2020), Barcelona, 2020. BibTeX | Links: [Video] [PDF] [Slides] @conference{MaTh20b, title = {Multivariate Tropical Regression and Piecewise-Linear Surface Fitting}, author = {P Maragos and E Theodosis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_MultivariateTropicRegression_ICASSP2020_presentation_video.mp4 http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_MaragosTheodosis_MultivariateTropicalRegression-PWLsurfaceFitting_ICASSP.pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_MultivariateTropicRegression_ICASSP2020_presentation_slides.pdf}, year = {2020}, date = {2020-05-01}, booktitle = {Proc. 45th IEEE Int'l Conf. Acoustics, Speech and Signal Processing (ICASSP-2020)}, address = {Barcelona}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Maragos, E. Theodosis Tropical Geometry and Piecewise-Linear Approximation of Curves and Surfaces on Weighted Lattices Book Chapter M. Breuss A. Bruckstein, Kiselman C; Maragos, P (Ed.): Shape Analysis: Euclidean, Discrete and Algebraic Geometric Methods, Springer, 2020. @inbook{Maragos2020, title = {Tropical Geometry and Piecewise-Linear Approximation of Curves and Surfaces on Weighted Lattices}, author = {P. Maragos and E. Theodosis}, editor = {M. Breuss, A. Bruckstein, C. Kiselman and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/MaragosTheodosis_TropicalApproximation_Springer.pdf}, year = {2020}, date = {2020-12-31}, booktitle = {Shape Analysis: Euclidean, Discrete and Algebraic Geometric Methods}, publisher = {Springer}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
M Parelli, K Papadimitriou, G Potamianos, G Pavlakos, P Maragos Exploiting 3D Hand Pose Estimation in Deep Learning-based Sign Language Recognition from RGB Videos Inproceedings Proc. European Conference on Computer Vision Workshops (ECCVW) - Int'l Workshop on Sign Language Recognition, Translation & Production, 2020. @inproceedings{Parelli2020, title = {Exploiting 3D Hand Pose Estimation in Deep Learning-based Sign Language Recognition from RGB Videos}, author = {M Parelli and K Papadimitriou and G Potamianos and G Pavlakos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2020_ParelliEtAl_ECCVW-SLRTP_3dHandPoseEstimDeepLearnSignΡecognFromRGBvideo.pdf}, doi = {https://slrtp.com/}, year = {2020}, date = {2020-08-01}, booktitle = {Proc. European Conference on Computer Vision Workshops (ECCVW) - Int'l Workshop on Sign Language Recognition, Translation & Production}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
I Marougkas, P Koutras, N Kardaris, G Retsinas, G Chalvatzaki, P Maragos How to track your dragon: A Multi-Attentional Framework for Real-time RGB-D 6-DOF Object Pose Tracking Inproceedings Proc. European Conference on Computer Vision Workshops (ECCVW) - 6th Int’l Workshop on Recovering 6D Object Pose, 2020. @inproceedings{marougkas2020how, title = {How to track your dragon: A Multi-Attentional Framework for Real-time RGB-D 6-DOF Object Pose Tracking}, author = {I Marougkas and P Koutras and N Kardaris and G Retsinas and G Chalvatzaki and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/MarougkasEtAl_How-to-Track-your-Dragon_ECCVW-R6D2020-4.pdf}, year = {2020}, date = {2020-01-01}, booktitle = {Proc. European Conference on Computer Vision Workshops (ECCVW) - 6th Int’l Workshop on Recovering 6D Object Pose}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
P. P. Filntisis, N. Efthymiou, G. Potamianos, P. Maragos Emotion Understanding in Videos Through Body, Context, and Visual-Semantic Embedding Loss Workshop Proc. 16th European Computer Vision Conference Workshops (ECCVW) - Workshop on Bodily Expressed Emotion Understanding, 2020. @workshop{Filntisis2020, title = {Emotion Understanding in Videos Through Body, Context, and Visual-Semantic Embedding Loss}, author = {P. P. Filntisis and N. Efthymiou and G. Potamianos and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Emotion_understanding_in_videos_through_body__context__and_visual_semantic_embedding_loss-1.pdf https://github.com/filby89/NTUA-BEEU-eccv2020}, year = {2020}, date = {2020-08-01}, booktitle = {Proc. 16th European Computer Vision Conference Workshops (ECCVW) - Workshop on Bodily Expressed Emotion Understanding}, keywords = {}, pubstate = {published}, tppubtype = {workshop} } |
2019 |
Georgia Chalvatzaki, Xanthi S Papageorgiou, Petros Maragos, Costas S Tzafestas Learn to adapt to human walking: A Model-based Reinforcement Learning Approach for a Robotic Assistant Rollator Journal Article IEEE Robotics and Automation Letters (with IROS option), 4 (4), pp. 3774–3781, 2019. @article{chalvatzaki2019learn, title = {Learn to adapt to human walking: A Model-based Reinforcement Learning Approach for a Robotic Assistant Rollator}, author = {Georgia Chalvatzaki and Xanthi S Papageorgiou and Petros Maragos and Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/19-0390_03_MS.pdf}, year = {2019}, date = {2019-12-31}, journal = {IEEE Robotics and Automation Letters (with IROS option)}, volume = {4}, number = {4}, pages = {3774--3781}, publisher = {IEEE}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P P Filntisis, N Efthymiou, P Koutras, G Potamianos, P Maragos Fusing Body Posture With Facial Expressions for Joint Recognition of Affect in Child–Robot Interaction Journal Article IEEE Robotics and Automation Letters (with IROS option), 4 (4), pp. 4011-4018, 2019. Abstract | BibTeX | Links: [PDF] @article{8769871, title = {Fusing Body Posture With Facial Expressions for Joint Recognition of Affect in Child–Robot Interaction}, author = {P P Filntisis and N Efthymiou and P Koutras and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/RAL_2019-5.pdf}, doi = {10.1109/LRA.2019.2930434}, year = {2019}, date = {2019-10-01}, journal = {IEEE Robotics and Automation Letters (with IROS option)}, volume = {4}, number = {4}, pages = {4011-4018}, abstract = {In this letter, we address the problem of multi-cue affect recognition in challenging scenarios such as child–robot interaction. Toward this goal we propose a method for automatic recognition of affect that leverages body expressions alongside facial ones, as opposed to traditional methods that typically focus only on the latter. Our deep-learning based method uses hierarchical multi-label annotations and multi-stage losses, can be trained both jointly and separately, and offers us computational models for both individual modalities, as well as for the whole body emotion. We evaluate our method on a challenging child–robot interaction database of emotional expressions collected by us, as well as on the GEneva multimodal emotion portrayal public database of acted emotions by adults, and show that the proposed method achieves significantly better results than facial-only expression baselines.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this letter, we address the problem of multi-cue affect recognition in challenging scenarios such as child–robot interaction. Toward this goal we propose a method for automatic recognition of affect that leverages body expressions alongside facial ones, as opposed to traditional methods that typically focus only on the latter. Our deep-learning based method uses hierarchical multi-label annotations and multi-stage losses, can be trained both jointly and separately, and offers us computational models for both individual modalities, as well as for the whole body emotion. We evaluate our method on a challenging child–robot interaction database of emotional expressions collected by us, as well as on the GEneva multimodal emotion portrayal public database of acted emotions by adults, and show that the proposed method achieves significantly better results than facial-only expression baselines. |
I Rodomagoulakis, P Maragos Improved Frequency Modulation Features for Multichannel Distant Speech Recognition Journal Article IEEE Journal of Selected Topics in Signal Processing, 13 (4), pp. 841-849, 2019. Abstract | BibTeX | Links: [PDF] @article{8744655, title = {Improved Frequency Modulation Features for Multichannel Distant Speech Recognition}, author = {I Rodomagoulakis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_RodomagoulakisMaragos_ImprovedFreqModFeaturesMultichanDistantSpeechRecogn_JSTSP.pdf}, doi = {10.1109/JSTSP.2019.2923372}, year = {2019}, date = {2019-08-01}, journal = {IEEE Journal of Selected Topics in Signal Processing}, volume = {13}, number = {4}, pages = {841-849}, abstract = {Frequency modulation features capture the fine structure of speech formants that constitute beneficial to the traditional energy-based cepstral features by carrying supplementary information. Improvements have been demonstrated mainly in Gaussian mixture model (GMM)-hidden Markov model (HMM) systems for small and large vocabulary tasks. Yet, they have limited applications in deep neural network (DNN)-HMM systems and distant speech recognition (DSR) tasks. Herein, we elaborate on their integration within state-of-the-art front-end schemes that include post-processing of MFCCs resulting in discriminant and speaker-adapted features of large temporal contexts. We explore: 1) multichannel demodulation schemes for multi-microphone setups; 2) richer descriptors of frequency modulations; and 3) feature transformation and combination via hierarchical deep networks. We present results for tandem and hybrid recognition with GMM and DNN acoustic models, respectively. The improved modulation features are combined efficiently with MFCCs yielding modest and consistent improvements in multichannel DSR tasks on reverberant and noisy environments, where recognition rates are far from human performance.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Frequency modulation features capture the fine structure of speech formants that constitute beneficial to the traditional energy-based cepstral features by carrying supplementary information. Improvements have been demonstrated mainly in Gaussian mixture model (GMM)-hidden Markov model (HMM) systems for small and large vocabulary tasks. Yet, they have limited applications in deep neural network (DNN)-HMM systems and distant speech recognition (DSR) tasks. Herein, we elaborate on their integration within state-of-the-art front-end schemes that include post-processing of MFCCs resulting in discriminant and speaker-adapted features of large temporal contexts. We explore: 1) multichannel demodulation schemes for multi-microphone setups; 2) richer descriptors of frequency modulations; and 3) feature transformation and combination via hierarchical deep networks. We present results for tandem and hybrid recognition with GMM and DNN acoustic models, respectively. The improved modulation features are combined efficiently with MFCCs yielding modest and consistent improvements in multichannel DSR tasks on reverberant and noisy environments, where recognition rates are far from human performance. |
Anastasios Tsiamis, Petros Maragos Sparsity in max-plus algebra and systems Journal Article Discrete Event Dynamic Systems, 29 (2), pp. 163–189, 2019. @article{DBLP:journals/deds/TsiamisM19, title = {Sparsity in max-plus algebra and systems}, author = {Anastasios Tsiamis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/TsiamisMaragos_SparsityInMaxplusAlgebraAndSystems_DEDS2019.pdf}, doi = {10.1007/s10626-019-00281-1}, year = {2019}, date = {2019-01-01}, journal = {Discrete Event Dynamic Systems}, volume = {29}, number = {2}, pages = {163--189}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Antigoni Tsiami, Petros Koutras, Athanasios Katsamanis, Argiro Vatakis, Petros Maragos A behaviorally inspired fusion approach for computational audiovisual saliency modeling Journal Article Signal Processing: Image Communication, 76 , pp. 186 - 200, 2019. Abstract | BibTeX | Links: [PDF] @article{TSIAMI2019186, title = {A behaviorally inspired fusion approach for computational audiovisual saliency modeling}, author = {Antigoni Tsiami and Petros Koutras and Athanasios Katsamanis and Argiro Vatakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/TsiamiEtAl_BehaviorInspiredFusionCompAudioVisSaliencyModel_SPIC2019_preprint.pdf}, doi = {https://doi.org/10.1016/j.image.2019.05.001}, year = {2019}, date = {2019-01-01}, journal = {Signal Processing: Image Communication}, volume = {76}, pages = {186 - 200}, abstract = {Human attention is highly influenced by multi-modal combinations of perceived sensory information and especially audiovisual information. Although systematic behavioral experiments have provided evidence that human attention is multi-modal, most bottom-up computational attention models, namely saliency models for fixation prediction, focus on visual information, largely ignoring auditory input. In this work, we aim to bridge the gap between findings from neuroscience concerning audiovisual attention and the computational attention modeling, by creating a 2-D bottom-up audiovisual saliency model. We experiment with various fusion schemes for integrating state-of-the-art auditory and visual saliency models in a single audiovisual attention/saliency model based on behavioral findings, that we validate in two experimental levels: (1) using results from behavioral experiments aiming to reproduce the results in a mostly qualitative manner and to ensure that our modeling is in line with behavioral findings, and (2) using 6 different databases with audiovisual human eye-tracking data. For this last purpose, we have also collected eye-tracking data for two databases: ETMD, a movie database that contains highly edited videos (movie clips), and SumMe, a database that contains unstructured and unedited user videos. Experimental results indicate that our proposed audiovisual fusion schemes in most cases improve performance compared to visual-only models, without any prior knowledge of the video/audio content. Also, they can be generalized and applied to any auditory saliency model and any visual spatio-temporal saliency model.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Human attention is highly influenced by multi-modal combinations of perceived sensory information and especially audiovisual information. Although systematic behavioral experiments have provided evidence that human attention is multi-modal, most bottom-up computational attention models, namely saliency models for fixation prediction, focus on visual information, largely ignoring auditory input. In this work, we aim to bridge the gap between findings from neuroscience concerning audiovisual attention and the computational attention modeling, by creating a 2-D bottom-up audiovisual saliency model. We experiment with various fusion schemes for integrating state-of-the-art auditory and visual saliency models in a single audiovisual attention/saliency model based on behavioral findings, that we validate in two experimental levels: (1) using results from behavioral experiments aiming to reproduce the results in a mostly qualitative manner and to ensure that our modeling is in line with behavioral findings, and (2) using 6 different databases with audiovisual human eye-tracking data. For this last purpose, we have also collected eye-tracking data for two databases: ETMD, a movie database that contains highly edited videos (movie clips), and SumMe, a database that contains unstructured and unedited user videos. Experimental results indicate that our proposed audiovisual fusion schemes in most cases improve performance compared to visual-only models, without any prior knowledge of the video/audio content. Also, they can be generalized and applied to any auditory saliency model and any visual spatio-temporal saliency model. |
Panagiotis Giannoulis, Gerasimos Potamianos, Petros Maragos Room-localized speech activity detection in multi-microphone smart homes Journal Article EURASIP Journal on Audio, Speech, and Music Processing, 2019 (1), pp. 15, 2019, ISSN: 1687-4722. Abstract | BibTeX | Links: [PDF] @article{Giannoulis2019, title = {Room-localized speech activity detection in multi-microphone smart homes}, author = {Panagiotis Giannoulis and Gerasimos Potamianos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_GiannoulisEtAl_RoomlocalizedSAD-MultiMicrophoneSmartHomes_EURASIP-JASM.pdfhttps://doi.org/10.1186/s13636-019-0158-8}, doi = {10.1186/s13636-019-0158-8}, issn = {1687-4722}, year = {2019}, date = {2019-01-01}, journal = {EURASIP Journal on Audio, Speech, and Music Processing}, volume = {2019}, number = {1}, pages = {15}, abstract = {Voice-enabled interaction systems in domestic environments have attracted significant interest recently, being the focus of smart home research projects and commercial voice assistant home devices. Within the multi-module pipelines of such systems, speech activity detection (SAD) constitutes a crucial component, providing input to their activation and speech recognition subsystems. In typical multi-room domestic environments, SAD may also convey spatial intelligence to the interaction, in addition to its traditional temporal segmentation output, by assigning speech activity at the room level. Such room-localized SAD can, for example, disambiguate user command referents, allow localized system feedback, and enable parallel voice interaction sessions by multiple subjects in different rooms. In this paper, we investigate a room-localized SAD system for smart homes equipped with multiple microphones distributed in multiple rooms, significantly extending our earlier work. The system employs a two-stage algorithm, incorporating a set of hand-crafted features specially designed to discriminate room-inside vs. room-outside speech at its second stage, refining SAD hypotheses obtained at its first stage by traditional statistical modeling and acoustic front-end processing. Both algorithmic stages exploit multi-microphone information, combining it at the signal, feature, or decision level. The proposed approach is extensively evaluated on both simulated and real data recorded in a multi-room, multi-microphone smart home, significantly outperforming alternative baselines. Further, it remains robust to reduced microphone setups, while also comparing favorably to deep learning-based alternatives.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Voice-enabled interaction systems in domestic environments have attracted significant interest recently, being the focus of smart home research projects and commercial voice assistant home devices. Within the multi-module pipelines of such systems, speech activity detection (SAD) constitutes a crucial component, providing input to their activation and speech recognition subsystems. In typical multi-room domestic environments, SAD may also convey spatial intelligence to the interaction, in addition to its traditional temporal segmentation output, by assigning speech activity at the room level. Such room-localized SAD can, for example, disambiguate user command referents, allow localized system feedback, and enable parallel voice interaction sessions by multiple subjects in different rooms. In this paper, we investigate a room-localized SAD system for smart homes equipped with multiple microphones distributed in multiple rooms, significantly extending our earlier work. The system employs a two-stage algorithm, incorporating a set of hand-crafted features specially designed to discriminate room-inside vs. room-outside speech at its second stage, refining SAD hypotheses obtained at its first stage by traditional statistical modeling and acoustic front-end processing. Both algorithmic stages exploit multi-microphone information, combining it at the signal, feature, or decision level. The proposed approach is extensively evaluated on both simulated and real data recorded in a multi-room, multi-microphone smart home, significantly outperforming alternative baselines. Further, it remains robust to reduced microphone setups, while also comparing favorably to deep learning-based alternatives. |
Georgia Chalvatzaki, Xanthi S. Papageorgiou, Petros Maragos, Costas S. Tzafestas Comparing the Impact of Robotic Rollator Control Schemes on Elderly Gait using on-line LRF-based Gait Analysis Conference Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. @conference{Chalvatzaki2019b, title = {Comparing the Impact of Robotic Rollator Control Schemes on Elderly Gait using on-line LRF-based Gait Analysis}, author = {Georgia Chalvatzaki and Xanthi S. Papageorgiou and Petros Maragos and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ChalvatzakiEtAl_Comparing-the-Impact-of-Robotic-Rollator_MoRobAE-ICRA2019.pdf}, year = {2019}, date = {2019-12-01}, booktitle = { Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA)}, address = {Montreal, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Jack Hadfield, Georgia Chalvatzaki, Petros Koutras, Mehdi Khamassi, Costas S Tzafestas, Petros Maragos A Deep Learning Approach for Multi-View Engagement Estimation of Children in a Child-Robot Joint Attention Task Conference 2019 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2019), Macau, China, 2019. @conference{hadfield:hal-02324118, title = {A Deep Learning Approach for Multi-View Engagement Estimation of Children in a Child-Robot Joint Attention Task}, author = {Jack Hadfield and Georgia Chalvatzaki and Petros Koutras and Mehdi Khamassi and Costas S Tzafestas and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/A_Deep_Learning_Approach_for_Multi-View_Engagement_Estimation_of_Children_in_a_Child-Robot_Joint_Attention_Task-1.pdf}, year = {2019}, date = {2019-11-04}, booktitle = {2019 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2019)}, address = {Macau, China}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgia Chalvatzaki, Petros Koutras, Jack Hadfield, Xanthi S. Papageorgiou, Costas S. Tzafestas, Petros Maragos On-line Human Gait Stability Prediction using LSTMs for the fusion of Deep-based Pose Estimation and LRF-based Augmented Gait State Estimation in an Intelligent Robotic Rollator Conference IEEE International Conference on Robotics & Automation (ICRA), Montreal, Canada, 2019. @conference{Chalvatzaki2019d, title = {On-line Human Gait Stability Prediction using LSTMs for the fusion of Deep-based Pose Estimation and LRF-based Augmented Gait State Estimation in an Intelligent Robotic Rollator}, author = {Georgia Chalvatzaki and Petros Koutras and Jack Hadfield and Xanthi S. Papageorgiou and Costas S. Tzafestas and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/On-line_Human_Gait_Stability_Prediction_using_LSTM.pdf}, year = {2019}, date = {2019-09-19}, booktitle = {IEEE International Conference on Robotics & Automation (ICRA)}, address = {Montreal, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Garoufis, A. Zlatintsi, K. Kritsis, P.P. Filntisis, V. Katsouros, and P. Maragos An Environment for Gestural Interaction with 3D Virual Musical Instruments as an Educational Tool Conference Proc. 27th European Conf.(EUSIPCO-19), A Coruna, Spain, 2019. @conference{Garoufis2019, title = {An Environment for Gestural Interaction with 3D Virual Musical Instruments as an Educational Tool}, author = {C. Garoufis and A. Zlatintsi and K. Kritsis and P.P. Filntisis and V. Katsouros and and P. Maragos }, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_GZKFKM_GestureInteractWithVirtualMusicInstrumentsForEducation_EUSIPCO-1-1.pdf}, year = {2019}, date = {2019-09-01}, booktitle = {Proc. 27th European Conf.(EUSIPCO-19)}, address = {A Coruna, Spain}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Maragos Tropical Geometry, Mathematical Morphology and Weighted Lattices Conference Proc. 14th Int’l Symposium on Mathematical Morphology (ISMM-2019), Saarbruecken, Germany, 2019. @conference{Maragos2019, title = {Tropical Geometry, Mathematical Morphology and Weighted Lattices}, author = {P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_Maragos_TropicalGeometry-MM-WeightedLattices_ISMM-1.pdf}, year = {2019}, date = {2019-05-21}, booktitle = {Proc. 14th Int’l Symposium on Mathematical Morphology (ISMM-2019)}, pages = {3–15}, address = {Saarbruecken, Germany}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
E Theodosis, P Maragos Tropical Modeling of Weighted Transducer Algorithms on Graphs Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP), 2019, ISSN: 2379-190X. Abstract | BibTeX | Links: [PDF] @conference{8683127, title = {Tropical Modeling of Weighted Transducer Algorithms on Graphs}, author = {E Theodosis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_TheodosisMaragos_TropicalModeling-Algorithms_ICASSP.pdf}, doi = {10.1109/ICASSP.2019.8683127}, issn = {2379-190X}, year = {2019}, date = {2019-05-01}, booktitle = {Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP)}, pages = {8653-8657}, abstract = {Weighted Finite State Transducers (WFSTs) are versatile graphical automata that can model a great number of problems, ranging from automatic speech recognition to DNA sequencing. Traditional computer science algorithms are employed when working with these automata in order to optimize their size, but also the run time of decoding algorithms. However, these algorithms are not unified under a common framework that would allow for their treatment as a whole. Moreover, the inherent geometrical representation of WFSTs, coupled with the topology-preserving algorithms that operate on them make the structures ideal for tropical analysis. The benefits of such analysis have a twofold nature; first, matrix operations offer a connection to nonlinear vector space and spectral theory, and, second, tropical algebra offers a connection to tropical geometry. In this work we model some of the most frequently used algorithms in WFSTs by using tropical algebra; this provides a theoretical unification and allows us to also analyze aspects of their tropical geometry.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Weighted Finite State Transducers (WFSTs) are versatile graphical automata that can model a great number of problems, ranging from automatic speech recognition to DNA sequencing. Traditional computer science algorithms are employed when working with these automata in order to optimize their size, but also the run time of decoding algorithms. However, these algorithms are not unified under a common framework that would allow for their treatment as a whole. Moreover, the inherent geometrical representation of WFSTs, coupled with the topology-preserving algorithms that operate on them make the structures ideal for tropical analysis. The benefits of such analysis have a twofold nature; first, matrix operations offer a connection to nonlinear vector space and spectral theory, and, second, tropical algebra offers a connection to tropical geometry. In this work we model some of the most frequently used algorithms in WFSTs by using tropical algebra; this provides a theoretical unification and allows us to also analyze aspects of their tropical geometry. |
G. Chalvatzaki, P. Koutras, J. Hadfield, X. S. Papageorgiou, C. S. Tzafestas, P. Maragos LSTM-based Network for Human Gait Stability Prediction in an Intelligent Robotic Rollator Conference Proc. 2019 IEEE International Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. @conference{Chalvatzaki2019, title = {LSTM-based Network for Human Gait Stability Prediction in an Intelligent Robotic Rollator}, author = {G. Chalvatzaki and P. Koutras and J. Hadfield and X. S. Papageorgiou and C. S. Tzafestas and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ChalvatzakiEtAl_LSTMGaitStab_ICRA2019.pdf}, year = {2019}, date = {2019-05-01}, booktitle = {Proc. 2019 IEEE International Conference on Robotics and Automation (ICRA)}, address = {Montreal, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Xanthi S Papageorgiou, Georgia Chalvatzaki, Eleni Efthimiou, Stavroula-Evita Fotinea, Alexandra Karavasili, Costas S Tzafestas, Petros Maragos, Anna Vacalopoulou, Theodore Goulas User Centered HRI Design for an Intelligent Robotic Rollator Conference Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA), Montreal, Canada, 2019. @conference{papageorgiouuser, title = {User Centered HRI Design for an Intelligent Robotic Rollator}, author = {Xanthi S Papageorgiou and Georgia Chalvatzaki and Eleni Efthimiou and Stavroula-Evita Fotinea and Alexandra Karavasili and Costas S Tzafestas and Petros Maragos and Anna Vacalopoulou and Theodore Goulas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/PapageorgiouEtAl_UserCenteredHRI_MoRobAE-ICRA2019.pdf}, year = {2019}, date = {2019-05-01}, booktitle = {Proc. Workshop on Mobile Robot Assistants for the Elderly (MoRobAE) in 2019 IEEE Int’l Conference on Robotics and Automation (ICRA)}, address = {Montreal, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Koutras, Petros Maragos SUSiNet: See, Understand and Summarize it Conference Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, 2019. @conference{koutras2019susinet, title = {SUSiNet: See, Understand and Summarize it}, author = {Petros Koutras and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Koutras_SUSiNet_See_Understand_and_Summarize_It_CVPRW_2019_paper.pdf}, year = {2019}, date = {2019-01-01}, booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
N Gkanatsios, V Pitsikalis, P Koutras, A Zlatintsi, P Maragos Deeply Supervised Multimodal Attentional Translation Embeddings for Visual Relationship Detection Conference 2019 IEEE International Conference on Image Processing (ICIP), 2019. @conference{8803106, title = {Deeply Supervised Multimodal Attentional Translation Embeddings for Visual Relationship Detection}, author = {N Gkanatsios and V Pitsikalis and P Koutras and A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/GVK_DeeplySupervisedMultimodalAttentioanlTransEmbeddingsVRD_ICIP2019.pdf}, year = {2019}, date = {2019-01-01}, booktitle = {2019 IEEE International Conference on Image Processing (ICIP)}, pages = {1840-1844}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
George Retsinas, Athena Elafrou, Georgios Goumas, Petros Maragos RecNets: Channel-wise Recurrent Convolutional Neural Networks Conference British Machine Vision Conference (BMVC-2019), Cardiff, UK, 2019. @conference{retsinas2019recnets, title = {RecNets: Channel-wise Recurrent Convolutional Neural Networks}, author = {George Retsinas and Athena Elafrou and Georgios Goumas and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/REGM_RecNets_BMVC-19.pdf}, year = {2019}, date = {2019-01-01}, booktitle = {British Machine Vision Conference (BMVC-2019)}, address = {Cardiff, UK}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Christos Sakaridis, Nikos Kolotouros, Kimon Drakopoulos, Petros Maragos Chapter 4 - Active contour methods on arbitrary graphs based on partial differential equations Incollection Kimmel, R; Tai, X -C (Ed.): Processing, Analyzing and Learning of Images, Shapes, and Forms: Part 2, 20 , pp. 149-190, Elsevier North-Holland, 2019. @incollection{SKDM19, title = {Chapter 4 - Active contour methods on arbitrary graphs based on partial differential equations}, author = {Christos Sakaridis and Nikos Kolotouros and Kimon Drakopoulos and Petros Maragos}, editor = {R Kimmel and X -C Tai}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2019_SKDM_ActiveContoursOnGraphsPDEs_chapter4_HNA-20_NH.pdf}, doi = {https://doi.org/10.1016/bs.hna.2019.07.002}, year = {2019}, date = {2019-01-01}, booktitle = {Processing, Analyzing and Learning of Images, Shapes, and Forms: Part 2}, volume = {20}, pages = {149-190}, publisher = {Elsevier North-Holland}, series = {Handbook of Numerical Analysis}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } |
2018 |
A C Dometios, Y Zhou, X S Papageorgiou, C S Tzafestas, T Asfour Vision-Based Online Adaptation of Motion Primitives to Dynamic Surfaces: Application to an Interactive Robotic Wiping Task Journal Article IEEE Robotics and Automation Letters, 3 (3), pp. 1410-1417, 2018. Abstract | BibTeX | Links: [PDF] @article{DZPTA18, title = {Vision-Based Online Adaptation of Motion Primitives to Dynamic Surfaces: Application to an Interactive Robotic Wiping Task}, author = {A C Dometios and Y Zhou and X S Papageorgiou and C S Tzafestas and T Asfour}, url = {http://robotics.ntua.gr/wp-content/publications/Dometios18_Vision-Based_Online_Adaptation_Motion_Primitives.pdf}, doi = {10.1109/LRA.2018.2800031}, year = {2018}, date = {2018-07-01}, journal = {IEEE Robotics and Automation Letters}, volume = {3}, number = {3}, pages = {1410-1417}, abstract = {Elderly or disabled people usually need augmented nursing attention both in home and clinical environments, especially to perform bathing activities. The development of an assistive robotic bath system, which constitutes a central motivation of this work, would increase the independence and safety of this procedure, ameliorating in this way the everyday life for this group of people. In general terms, the main goal of this work is to enable natural, physical human-robot interaction, involving human-friendly and user-adaptive on-line robot motion planning and interaction control. For this purpose, we employ imitation learning using a leader-follower framework called Coordinate Change Dynamic Movement Primitives (CC-DMP), in order to incorporate the expertise of professional carers for bathing sequences. In this letter, we propose a vision-based washing system, combining CC-DMP framework with a perception-based controller, to adapt the motion of robot’s end effector on moving and deformable surfaces, such as a human body part. The controller guarantees globally uniformly asymptotic convergence to the leader movement primitive while ensuring avoidance of restricted areas, such as sensitive skin body areas. We experimentally tested our approach on a setup including the humanoid robot ARMAR-III and a Kinect v2 camera. The robot executes motions learned from the publicly available KIT whole-body human motion database, achieving good tracking performance in challenging interactive task scenarios.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Elderly or disabled people usually need augmented nursing attention both in home and clinical environments, especially to perform bathing activities. The development of an assistive robotic bath system, which constitutes a central motivation of this work, would increase the independence and safety of this procedure, ameliorating in this way the everyday life for this group of people. In general terms, the main goal of this work is to enable natural, physical human-robot interaction, involving human-friendly and user-adaptive on-line robot motion planning and interaction control. For this purpose, we employ imitation learning using a leader-follower framework called Coordinate Change Dynamic Movement Primitives (CC-DMP), in order to incorporate the expertise of professional carers for bathing sequences. In this letter, we propose a vision-based washing system, combining CC-DMP framework with a perception-based controller, to adapt the motion of robot’s end effector on moving and deformable surfaces, such as a human body part. The controller guarantees globally uniformly asymptotic convergence to the leader movement primitive while ensuring avoidance of restricted areas, such as sensitive skin body areas. We experimentally tested our approach on a setup including the humanoid robot ARMAR-III and a Kinect v2 camera. The robot executes motions learned from the publicly available KIT whole-body human motion database, achieving good tracking performance in challenging interactive task scenarios. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas, P Maragos Augmented Human State Estimation Using Interacting Multiple Model Particle Filters With Probabilistic Data Association Journal Article IEEE Robotics and Automation Letters, 3 (3), pp. 1872-1879, 2018, ISSN: 2377-3766. @article{8276229, title = {Augmented Human State Estimation Using Interacting Multiple Model Particle Filters With Probabilistic Data Association}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_ChalvatzakiEtAl_HumanStateEstim-IMM-ParticleFilters-PDA_ieeeRAL.pdf}, doi = {10.1109/LRA.2018.2800084}, issn = {2377-3766}, year = {2018}, date = {2018-07-01}, journal = {IEEE Robotics and Automation Letters}, volume = {3}, number = {3}, pages = {1872-1879}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Ioannis Kordonis, Petros Maragos, George P Papavassilopoulos Stochastic stability in Max-Product and Max-Plus Systems with Markovian Jumps Journal Article Automatica, 92 , pp. 123–132, 2018, ISSN: 00051098. Abstract | BibTeX | Links: [PDF] @article{348, title = {Stochastic stability in Max-Product and Max-Plus Systems with Markovian Jumps}, author = {Ioannis Kordonis and Petros Maragos and George P Papavassilopoulos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KMP_StochStabilityInMPsystemsMarkovJumps_Automatica_preprint.pdf}, doi = {10.1016/j.automatica.2018.03.008}, issn = {00051098}, year = {2018}, date = {2018-01-01}, journal = {Automatica}, volume = {92}, pages = {123--132}, abstract = {We study Max-Product and Max-Plus Systems with Markovian Jumps and focus on stochastic stability problems. At first, a Lyapunov function is derived for the asymptotically stable deterministic Max-Product Systems. This Lyapunov function is then adjusted to derive sufficient conditions for the stochastic stability of Max-Product systems with Markovian Jumps. Many step Lyapunov functions are then used to derive necessary and sufficient conditions for stochastic stability. The results for the Max-Product systems are then applied to Max-Plus systems with Markovian Jumps, using an isomorphism and almost sure bounds for the asymptotic behavior of the state are obtained. A numerical example illustrating the application of the stability results on a production system is also given.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We study Max-Product and Max-Plus Systems with Markovian Jumps and focus on stochastic stability problems. At first, a Lyapunov function is derived for the asymptotically stable deterministic Max-Product Systems. This Lyapunov function is then adjusted to derive sufficient conditions for the stochastic stability of Max-Product systems with Markovian Jumps. Many step Lyapunov functions are then used to derive necessary and sufficient conditions for stochastic stability. The results for the Max-Product systems are then applied to Max-Plus systems with Markovian Jumps, using an isomorphism and almost sure bounds for the asymptotic behavior of the state are obtained. A numerical example illustrating the application of the stability results on a production system is also given. |
Lampros Flokas, Petros Maragos Online Wideband Spectrum Sensing Using Sparsity Journal Article IEEE Journal of Selected Topics in Signal Processing, 12 (1), pp. 35–44, 2018, ISSN: 19324553. Abstract | BibTeX | Links: [PDF] @article{349, title = {Online Wideband Spectrum Sensing Using Sparsity}, author = {Lampros Flokas and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/FlokasMaragos_OnlineWideSpectrumSensingUsingSparsity_JSTSP_preprint.pdf}, doi = {10.1109/JSTSP.2018.2797422}, issn = {19324553}, year = {2018}, date = {2018-01-01}, journal = {IEEE Journal of Selected Topics in Signal Processing}, volume = {12}, number = {1}, pages = {35--44}, abstract = {Wideband spectrum sensing is an essential part of cognitive radio systems. Exact spectrum estimation is usually inefficient as it requires sampling rates at or above the Nyquist rate. Using prior information on the structure of the signal could allow near exact reconstruction at much lower sampling rates. Sparsity of the sampled signal in the frequency domain is one of the popular priors studied for cognitive radio applications. Reconstruction of signals under sparsity assumptions has been studied rigorously by researchers in the field of Compressed Sensing (CS). CS algorithms that operate on batches of samples are known to be robust but can be computationally costly, making them unsuitable for cheap low power cognitive radio devices that require spectrum sensing in real time. On the other hand, online algorithms that are based on variations of the Least Mean Squares (LMS) algorithm have very simple updates so they are computationally efficient and can easily adapt in real time to changes of the underlying spectrum. In this paper we will present two variations of the LMS algorithm that enforce sparsity in the estimated spectrum given an upper bound on the number of non- zero coefficients. Assuming that the number of non-zero elements in the spectrum is known we show that under conditions the hard threshold operation can only reduce the error of our estimation. We will also show that we can estimate the number of non-zero elements of the spectrum at each iteration based on our online estimations. Finally, we numerically compare our algorithm with other online sparsity-inducing algorithms in the literature.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Wideband spectrum sensing is an essential part of cognitive radio systems. Exact spectrum estimation is usually inefficient as it requires sampling rates at or above the Nyquist rate. Using prior information on the structure of the signal could allow near exact reconstruction at much lower sampling rates. Sparsity of the sampled signal in the frequency domain is one of the popular priors studied for cognitive radio applications. Reconstruction of signals under sparsity assumptions has been studied rigorously by researchers in the field of Compressed Sensing (CS). CS algorithms that operate on batches of samples are known to be robust but can be computationally costly, making them unsuitable for cheap low power cognitive radio devices that require spectrum sensing in real time. On the other hand, online algorithms that are based on variations of the Least Mean Squares (LMS) algorithm have very simple updates so they are computationally efficient and can easily adapt in real time to changes of the underlying spectrum. In this paper we will present two variations of the LMS algorithm that enforce sparsity in the estimated spectrum given an upper bound on the number of non- zero coefficients. Assuming that the number of non-zero elements in the spectrum is known we show that under conditions the hard threshold operation can only reduce the error of our estimation. We will also show that we can estimate the number of non-zero elements of the spectrum at each iteration based on our online estimations. Finally, we numerically compare our algorithm with other online sparsity-inducing algorithms in the literature. |
Mehdi Khamassi, George Velentzas, Theodore Tsitsimis, Costas Tzafestas Robot fast adaptation to changes in human engagement during simulated dynamic social interaction with active exploration in parameterized reinforcement learning Journal Article IEEE Transactions on Cognitive and Developmental Systems, 10 , pp. 881 - 893, 2018. Abstract | BibTeX | Links: [PDF] @article{BFB99, title = {Robot fast adaptation to changes in human engagement during simulated dynamic social interaction with active exploration in parameterized reinforcement learning}, author = {Mehdi Khamassi and George Velentzas and Theodore Tsitsimis and Costas Tzafestas}, url = {http://robotics.ntua.gr/wp-content/publications/Khamassi_TCDS2018.pdf}, doi = {10.1109/TCDS.2018.2843122}, year = {2018}, date = {2018-01-01}, journal = { IEEE Transactions on Cognitive and Developmental Systems}, volume = {10}, pages = {881 - 893}, publisher = {IEEE}, abstract = {Dynamic uncontrolled human-robot interactions (HRI) require robots to be able to adapt to changes in the human’s behavior and intentions. Among relevant signals, non-verbal cues such as the human’s gaze can provide the robot with important information about the human’s current engagement in the task, and whether the robot should continue its current behavior or not. However, robot reinforcement learning (RL) abilities to adapt to these non-verbal cues are still underdeveloped. Here we propose an active exploration algorithm for RL during HRI where the reward function is the weighted sum of the human’s current engagement and variations of this engagement. We use a parameterized action space where a meta-learning algorithm is applied to simultaneously tune the exploration in discrete action space (e.g. moving an object) and in the space of continuous characteristics of movement (e.g. velocity, direction, strength, expressivity). We first show that this algorithm reaches state-of-the-art performance in the non-stationary multi-armed bandit paradigm. We then apply it to a simulated HRI task, and show that it outperforms continuous parameterized RL with either passive or active exploration based on different existing methods. We finally test the performance in a more realistic test of the same HRI task, where a practical approach is followed to estimate human engagement through visual cues of the head pose. The algorithm can detect and adapt to perturbations in human engagement with different durations. Altogether, these results suggest a novel efficient and robust framework for robot learning during dynamic HRI scenarios.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Dynamic uncontrolled human-robot interactions (HRI) require robots to be able to adapt to changes in the human’s behavior and intentions. Among relevant signals, non-verbal cues such as the human’s gaze can provide the robot with important information about the human’s current engagement in the task, and whether the robot should continue its current behavior or not. However, robot reinforcement learning (RL) abilities to adapt to these non-verbal cues are still underdeveloped. Here we propose an active exploration algorithm for RL during HRI where the reward function is the weighted sum of the human’s current engagement and variations of this engagement. We use a parameterized action space where a meta-learning algorithm is applied to simultaneously tune the exploration in discrete action space (e.g. moving an object) and in the space of continuous characteristics of movement (e.g. velocity, direction, strength, expressivity). We first show that this algorithm reaches state-of-the-art performance in the non-stationary multi-armed bandit paradigm. We then apply it to a simulated HRI task, and show that it outperforms continuous parameterized RL with either passive or active exploration based on different existing methods. We finally test the performance in a more realistic test of the same HRI task, where a practical approach is followed to estimate human engagement through visual cues of the head pose. The algorithm can detect and adapt to perturbations in human engagement with different durations. Altogether, these results suggest a novel efficient and robust framework for robot learning during dynamic HRI scenarios. |
N. Efthymiou, P. Koutras, P. ~P. Filntisis, G. Potamianos, P. Maragos MULTI-VIEW FUSION FOR ACTION RECOGNITION IN CHILD-ROBOT INTERACTION Conference Proc. IEEE Int'l Conf. on Image Processing, Athens, Greece, 2018. Abstract | BibTeX | Links: [PDF] @conference{efthymiou18action, title = {MULTI-VIEW FUSION FOR ACTION RECOGNITION IN CHILD-ROBOT INTERACTION}, author = { N. Efthymiou and P. Koutras and P. ~P. Filntisis and G. Potamianos and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/EfthymiouKoutrasFilntisis_MultiViewFusActRecognChildRobotInteract_ICIP18.pdf}, year = {2018}, date = {2018-10-01}, booktitle = {Proc. IEEE Int'l Conf. on Image Processing}, address = {Athens, Greece}, abstract = {Answering the challenge of leveraging computer vision methods in order to enhance Human Robot Interaction (HRI) experience, this work explores methods that can expand the capabilities of an action recognition system in such tasks. A multi-view action recognition system is proposed for integration in HRI scenarios with special users, such as children, in which there is limited data for training and many state-of-the-art techniques face difficulties. Different feature extraction approaches, encoding methods and fusion techniques are combined and tested in order to create an efficient system that recognizes children pantomime actions. This effort culminates in the integration of a robotic platform and is evaluated under an alluring Children Robot Interaction scenario.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Answering the challenge of leveraging computer vision methods in order to enhance Human Robot Interaction (HRI) experience, this work explores methods that can expand the capabilities of an action recognition system in such tasks. A multi-view action recognition system is proposed for integration in HRI scenarios with special users, such as children, in which there is limited data for training and many state-of-the-art techniques face difficulties. Different feature extraction approaches, encoding methods and fusion techniques are combined and tested in order to create an efficient system that recognizes children pantomime actions. This effort culminates in the integration of a robotic platform and is evaluated under an alluring Children Robot Interaction scenario. |
A. Zlatintsi, P.P. Filntisis, C. Garoufis, A. Tsiami, K. Kritsis, M.A. Kaliakatsos-Papakostas, A. Gkiokas, V. Katsouros, P. Maragos A Web-based Real-Time Kinect Application for Gestural Interaction with Virtual Musical Instruments Conference Proc. of Audio Mostly Conference (AM’18), Wrexham, North Wales, UK, 2018. @conference{Zlatintsi2018, title = {A Web-based Real-Time Kinect Application for Gestural Interaction with Virtual Musical Instruments}, author = {A. Zlatintsi and P.P. Filntisis and C. Garoufis and A. Tsiami and K. Kritsis and M.A. Kaliakatsos-Papakostas and A. Gkiokas and V. Katsouros and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ZlatintsiEtAl_WebBasedRealTimeKinectAppGestInteractVMI_ΑΜ18-1.pdf}, year = {2018}, date = {2018-09-01}, booktitle = {Proc. of Audio Mostly Conference (AM’18)}, address = {Wrexham, North Wales, UK}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. Garoufis, A. Zlatintsi, P. Maragos A Collaborative System for Composing Music via Motion Using a Kinect Sensor and Skeletal Data Conference Proc. 15th International Sound & Music Computing Conference (SMC-2018), Limmesol, Cyprus, 2018. Abstract | BibTeX | Links: [PDF] @conference{Garoufis2018, title = {A Collaborative System for Composing Music via Motion Using a Kinect Sensor and Skeletal Data}, author = {C. Garoufis and A. Zlatintsi and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_GZM_CollaborativeSystComposMusicMotionKinectSkeletalData_SMC2018_cr.pdf}, year = {2018}, date = {2018-07-01}, booktitle = {Proc. 15th International Sound & Music Computing Conference (SMC-2018)}, address = {Limmesol, Cyprus}, abstract = {This paper describes MoveSynth, a performance system for two players, who interact with it and collaborate with each other in various ways, including full-body movements, arm postures and continuous gestures, to compose music in real time. The system uses a Kinect sensor, in order to track the performers’ positions, as well as their arm and hand movements. In the system’s current state, the musical parameters that the performers can influence include the pitch and the volume of the music, the timbre of the sound, as well as the time interval between successive notes. We extensively experimented using various classifiers in order to detect the one that gives the optimal results regarding the task of continuous gesture and arm posture recognition, accomplishing 92.11% for continuous gestures and 99.33% for arm postures, using an 1-NN classifier with a condensed search space in both cases. Additionally, the qualitative results of the usability testing of the final system, which was performed by 9 users, are encouraging and identify possible avenues for further exploration and improvement.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper describes MoveSynth, a performance system for two players, who interact with it and collaborate with each other in various ways, including full-body movements, arm postures and continuous gestures, to compose music in real time. The system uses a Kinect sensor, in order to track the performers’ positions, as well as their arm and hand movements. In the system’s current state, the musical parameters that the performers can influence include the pitch and the volume of the music, the timbre of the sound, as well as the time interval between successive notes. We extensively experimented using various classifiers in order to detect the one that gives the optimal results regarding the task of continuous gesture and arm posture recognition, accomplishing 92.11% for continuous gestures and 99.33% for arm postures, using an 1-NN classifier with a condensed search space in both cases. Additionally, the qualitative results of the usability testing of the final system, which was performed by 9 users, are encouraging and identify possible avenues for further exploration and improvement. |
X S Papageorgiou, G Chalvatzaki, A Dometios, C S Tzafestas Human-Centered Service Robotic Systems for Assisted Living Conference Proceedings of the 27th International Conference on Robotics in Alpe-Adria Danube Region (RAAD 2018), 2018. Abstract | BibTeX | Links: [PDF] @conference{RAAD2018, title = {Human-Centered Service Robotic Systems for Assisted Living}, author = {X S Papageorgiou and G Chalvatzaki and A Dometios and C S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/publications/RAAD2018.pdf}, year = {2018}, date = {2018-06-01}, booktitle = {Proceedings of the 27th International Conference on Robotics in Alpe-Adria Danube Region (RAAD 2018)}, abstract = {Mobility impairment is a common problem for the elderly population which relates to difficulties in performing Activities of Daily Living (ADLs) and consequently leads to restrictions and the degradation of the living standards of the elders. When designing a user-friendly assistive device for mobility constrained people, the variable spectrum of disabilities is a factor that should affect the design process, since people with different impairments have different needs to be covered by the device, thus an adaptive behavior of those systems is necessary. Also, the performance of bathing activities includes several challenges for the elderly people, since such tasks require body flexibility. In this paper, we present current frameworks and solutions for intelligent robotic systems for assistive living involving human robot interaction in a natural interface. Our aim is to build such systems, in order to increase the independence and safety of these procedures. To achieve human - robot interaction in a natural way, we have to adapt the expertise of carers regarding bathing motions and walking assistance. The main goal of this work is to present recent research results towards the development of two real-life use cases incorporating intelligent robotic systems, aiming to support mobility and bathing activities for the elderly in order to provide context-aware and user-adaptive assistance.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mobility impairment is a common problem for the elderly population which relates to difficulties in performing Activities of Daily Living (ADLs) and consequently leads to restrictions and the degradation of the living standards of the elders. When designing a user-friendly assistive device for mobility constrained people, the variable spectrum of disabilities is a factor that should affect the design process, since people with different impairments have different needs to be covered by the device, thus an adaptive behavior of those systems is necessary. Also, the performance of bathing activities includes several challenges for the elderly people, since such tasks require body flexibility. In this paper, we present current frameworks and solutions for intelligent robotic systems for assistive living involving human robot interaction in a natural interface. Our aim is to build such systems, in order to increase the independence and safety of these procedures. To achieve human - robot interaction in a natural way, we have to adapt the expertise of carers regarding bathing motions and walking assistance. The main goal of this work is to present recent research results towards the development of two real-life use cases incorporating intelligent robotic systems, aiming to support mobility and bathing activities for the elderly in order to provide context-aware and user-adaptive assistance. |
G Bouritsas, P Koutras, A Zlatintsi, Petros Maragos Multimodal Visual Concept Learning with Weakly Supervised Techniques Conference Proc. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Salt Lake City, Utah, USA, 2018. Abstract | BibTeX | Links: [PDF] @conference{BKA+18, title = {Multimodal Visual Concept Learning with Weakly Supervised Techniques}, author = {G Bouritsas and P Koutras and A Zlatintsi and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_BKZM_MultimodalVisualConceptLearningWeaklySupervisedTechniques_CVPR.pdf}, year = {2018}, date = {2018-06-01}, booktitle = {Proc. IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, address = { Salt Lake City, Utah, USA}, abstract = {Despite the availability of a huge amount of video data accompanied by descriptive texts, it is not always easy to exploit the information contained in natural language in order to automatically recognize video concepts. Towards this goal, in this paper we use textual cues as means of supervision, introducing two weakly supervised techniques that extend the Multiple Instance Learning (MIL) framework: the Fuzzy Sets Multiple Instance Learning (FSMIL) and the Probabilistic Labels Multiple Instance Learning (PLMIL). The former encodes the spatio-temporal imprecision of the linguistic descriptions with Fuzzy Sets, while the latter models different interpretations of each description’s semantics with Probabilistic Labels, both formulated through a convex optimization algorithm. In addition, we provide a novel technique to extract weak labels in the presence of complex semantics, that consists of semantic similarity computations. We evaluate our methods on two distinct problems, namely face and action recognition, in the challenging and realistic setting of movies accompanied by their screenplays, contained in the COGNIMUSE database. We show that, on both tasks, our method considerably outperforms a state-of-the-art weakly supervised approach, as well as other baselines.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Despite the availability of a huge amount of video data accompanied by descriptive texts, it is not always easy to exploit the information contained in natural language in order to automatically recognize video concepts. Towards this goal, in this paper we use textual cues as means of supervision, introducing two weakly supervised techniques that extend the Multiple Instance Learning (MIL) framework: the Fuzzy Sets Multiple Instance Learning (FSMIL) and the Probabilistic Labels Multiple Instance Learning (PLMIL). The former encodes the spatio-temporal imprecision of the linguistic descriptions with Fuzzy Sets, while the latter models different interpretations of each description’s semantics with Probabilistic Labels, both formulated through a convex optimization algorithm. In addition, we provide a novel technique to extract weak labels in the presence of complex semantics, that consists of semantic similarity computations. We evaluate our methods on two distinct problems, namely face and action recognition, in the challenging and realistic setting of movies accompanied by their screenplays, contained in the COGNIMUSE database. We show that, on both tasks, our method considerably outperforms a state-of-the-art weakly supervised approach, as well as other baselines. |
E. Theodosis, P. Maragos Analysis of the Viterbi Algorithm Using Tropical Algebra and Geometry Conference Proc. IEEE International Workshop on Signal Processing Advances in Wireless Communications (SPAWC-18), Kalamata, Greece,, 2018. @conference{Theodosis2018, title = {Analysis of the Viterbi Algorithm Using Tropical Algebra and Geometry}, author = {E. Theodosis and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_TheodosisMaragos_AnalysisViterbi-TropicalAlgebraGeometry_SPAWC.pdf}, year = {2018}, date = {2018-06-01}, booktitle = {Proc. IEEE International Workshop on Signal Processing Advances in Wireless Communications (SPAWC-18)}, address = {Kalamata, Greece,}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A Tsiami, P Koutras, Niki Efthymiou, P P Filntisis, G Potamianos, P Maragos Multi3: Multi-sensory Perception System for Multi-modal Child Interaction with Multiple Robots Conference IEEE International Conference on Robotics and Automation (ICRA), Brisbane, Australia, 2018. Abstract | BibTeX | Links: [PDF] @conference{multi3, title = {Multi3: Multi-sensory Perception System for Multi-modal Child Interaction with Multiple Robots}, author = {A Tsiami and P Koutras and Niki Efthymiou and P P Filntisis and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/2018_TsiamiEtAl_Multi3-MultisensorMultimodalChildInteractMultRobots_ICRA.pdf}, year = {2018}, date = {2018-05-01}, booktitle = {IEEE International Conference on Robotics and Automation (ICRA)}, address = {Brisbane, Australia}, abstract = {Child-robot interaction is an interdisciplinary research area that has been attracting growing interest, primarily focusing on edutainment applications. A crucial factor to the successful deployment and wide adoption of such applications remains the robust perception of the child's multimodal actions, when interacting with the robot in a natural and untethered fashion. Since robotic sensory and perception capabilities are platform-dependent and most often rather limited, we propose a multiple Kinect-based system to perceive the child-robot interaction scene that is robot-independent and suitable for indoors interaction scenarios. The audio-visual input from the Kinect sensors is fed into speech, gesture, and action recognition modules, appropriately developed in this paper to address the challenging nature of child-robot interaction. For this purpose, data from multiple children are collected and used for module training or adaptation. Further, information from the multiple sensors is fused to enhance module performance. The perception system is integrated in a modular multi-robot architecture demonstrating its flexibility and scalability with different robotic platforms. The whole system, called Multi3, is evaluated, both objectively at the module level and subjectively in its entirety, under appropriate child-robot interaction scenarios containing several carefully designed games between children and robots.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Child-robot interaction is an interdisciplinary research area that has been attracting growing interest, primarily focusing on edutainment applications. A crucial factor to the successful deployment and wide adoption of such applications remains the robust perception of the child's multimodal actions, when interacting with the robot in a natural and untethered fashion. Since robotic sensory and perception capabilities are platform-dependent and most often rather limited, we propose a multiple Kinect-based system to perceive the child-robot interaction scene that is robot-independent and suitable for indoors interaction scenarios. The audio-visual input from the Kinect sensors is fed into speech, gesture, and action recognition modules, appropriately developed in this paper to address the challenging nature of child-robot interaction. For this purpose, data from multiple children are collected and used for module training or adaptation. Further, information from the multiple sensors is fused to enhance module performance. The perception system is integrated in a modular multi-robot architecture demonstrating its flexibility and scalability with different robotic platforms. The whole system, called Multi3, is evaluated, both objectively at the module level and subjectively in its entirety, under appropriate child-robot interaction scenarios containing several carefully designed games between children and robots. |
A Zlatintsi, I Rodomagoulakis, P Koutras, A ~C Dometios, V Pitsikalis, C ~S Tzafestas, P Maragos Multimodal Signal Processing and Learning Aspects of Human-Robot Interaction for an Assistive Bathing Robot Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing, Calgary, Canada, 2018. Abstract | BibTeX | Links: [PDF] @conference{ZRK+18, title = {Multimodal Signal Processing and Learning Aspects of Human-Robot Interaction for an Assistive Bathing Robot}, author = {A Zlatintsi and I Rodomagoulakis and P Koutras and A ~C Dometios and V Pitsikalis and C ~S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Zlatintsi+_I-SUPPORT_ICASSP18.pdf}, year = {2018}, date = {2018-04-01}, booktitle = {Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing}, address = {Calgary, Canada}, abstract = {We explore new aspects of assistive living on smart human-robot interaction (HRI) that involve automatic recognition and online validation of speech and gestures in a natural interface, providing social features for HRI. We introduce a whole framework and resources of a real-life scenario for elderly subjects supported by an assistive bathing robot, addressing health and hygiene care issues. We contribute a new dataset and a suite of tools used for data acquisition and a state-of-the-art pipeline for multimodal learning within the framework of the I-Support bathing robot, with emphasis on audio and RGB-D visual streams. We consider privacy issues by evaluating the depth visual stream along with the RGB, using Kinect sensors. The audio-gestural recognition task on this new dataset yields up to 84.5%, while the online validation of the I-Support system on elderly users accomplishes up to 84% when the two modalities are fused together. The results are promising enough to support further research in the area of multimodal recognition for assistive social HRI, considering the difficulties of the specific task.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We explore new aspects of assistive living on smart human-robot interaction (HRI) that involve automatic recognition and online validation of speech and gestures in a natural interface, providing social features for HRI. We introduce a whole framework and resources of a real-life scenario for elderly subjects supported by an assistive bathing robot, addressing health and hygiene care issues. We contribute a new dataset and a suite of tools used for data acquisition and a state-of-the-art pipeline for multimodal learning within the framework of the I-Support bathing robot, with emphasis on audio and RGB-D visual streams. We consider privacy issues by evaluating the depth visual stream along with the RGB, using Kinect sensors. The audio-gestural recognition task on this new dataset yields up to 84.5%, while the online validation of the I-Support system on elderly users accomplishes up to 84% when the two modalities are fused together. The results are promising enough to support further research in the area of multimodal recognition for assistive social HRI, considering the difficulties of the specific task. |
A Tsiami, P P Filntisis, N Efthymiou, P Koutras, G Potamianos, P Maragos FAR-FIELD AUDIO-VISUAL SCENE PERCEPTION OF MULTI-PARTY HUMAN-ROBOT INTERACTION FOR CHILDREN AND ADULTS Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP), Calgary, Canada, 2018. Abstract | BibTeX | Links: [PDF] @conference{tsiamifar, title = {FAR-FIELD AUDIO-VISUAL SCENE PERCEPTION OF MULTI-PARTY HUMAN-ROBOT INTERACTION FOR CHILDREN AND ADULTS}, author = {A Tsiami and P P Filntisis and N Efthymiou and P Koutras and G Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/2018_TsiamiEtAl_FarfieldAVperceptionHRI-ChildrenAdults_ICASSP.pdf}, year = {2018}, date = {2018-04-01}, booktitle = {Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing (ICASSP)}, address = {Calgary, Canada}, abstract = {Human-robot interaction (HRI) is a research area of growing interest with a multitude of applications for both children and adult user groups, as, for example, in edutainment and social robotics. Crucial, however, to its wider adoption remains the robust perception of HRI scenes in natural, untethered, and multi-party interaction scenarios, across user groups. Towards this goal, we investigate three focal HRI perception modules operating on data from multiple audio-visual sensors that observe the HRI scene from the far-field, thus bypassing limitations and platform-dependency of contemporary robotic sensing. In particular, the developed modules fuse intra- and/or inter-modality data streams to perform: (i) audio-visual speaker localization; (ii) distant speech recognition; and (iii) visual recognition of hand-gestures. Emphasis is also placed on ensuring high speech and gesture recognition rates for both children and adults. Development and objective evaluation of the three modules is conducted on a corpus of both user groups, collected by our far-field multi-sensory setup, for an interaction scenario of a question-answering ``guess-the-object'' collaborative HRI game with a ``Furhat'' robot. In addition, evaluation of the game incorporating the three developed modules is reported. Our results demonstrate robust far-field audio-visual perception of the multi-party HRI scene.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Human-robot interaction (HRI) is a research area of growing interest with a multitude of applications for both children and adult user groups, as, for example, in edutainment and social robotics. Crucial, however, to its wider adoption remains the robust perception of HRI scenes in natural, untethered, and multi-party interaction scenarios, across user groups. Towards this goal, we investigate three focal HRI perception modules operating on data from multiple audio-visual sensors that observe the HRI scene from the far-field, thus bypassing limitations and platform-dependency of contemporary robotic sensing. In particular, the developed modules fuse intra- and/or inter-modality data streams to perform: (i) audio-visual speaker localization; (ii) distant speech recognition; and (iii) visual recognition of hand-gestures. Emphasis is also placed on ensuring high speech and gesture recognition rates for both children and adults. Development and objective evaluation of the three modules is conducted on a corpus of both user groups, collected by our far-field multi-sensory setup, for an interaction scenario of a question-answering ``guess-the-object'' collaborative HRI game with a ``Furhat'' robot. In addition, evaluation of the game incorporating the three developed modules is reported. Our results demonstrate robust far-field audio-visual perception of the multi-party HRI scene. |
Jack Hadfield, Petros Koutras, Niki Efthymiou, Gerasimos Potamianos, Costas S Tzafestas, Petros Maragos Object assembly guidance in child-robot interaction using RGB-D based 3d tracking Conference 2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), IEEE 2018. @conference{hadfield2018object, title = {Object assembly guidance in child-robot interaction using RGB-D based 3d tracking}, author = {Jack Hadfield and Petros Koutras and Niki Efthymiou and Gerasimos Potamianos and Costas S Tzafestas and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2018_HadfieldEtAl_ObjectAssemblyGuidance-ChildRobotInteraction_IROS.pdf}, year = {2018}, date = {2018-01-01}, booktitle = {2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {347--354}, organization = {IEEE}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2017 |
P. Maragos Dynamical systems on weighted lattices: general theory, Journal Article Math. Control Signals Syst., 29 (1), 2017. @article{Maragos2017, title = {Dynamical systems on weighted lattices: general theory,}, author = {P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2017_Maragos_DynamicalSystemsOnWeightedLattices_MCSS.pdf}, doi = {10.1007/s00498-017-0207-8}, year = {2017}, date = {2017-12-01}, journal = {Math. Control Signals Syst.}, volume = {29}, number = {1}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
I. Rodomagoulakis, A. Katsamanis, G. Potamianos, P. Giannoulis, A. Tsiami, P. Maragos Room-localized spoken command recognition in multi-room, multi-microphone environments Journal Article Computer Speech & Language, 46 , pp. 419-443, 2017. @article{Rodomagoulakis2017, title = {Room-localized spoken command recognition in multi-room, multi-microphone environments}, author = {I. Rodomagoulakis and A. Katsamanis and G. Potamianos and P. Giannoulis and A. Tsiami and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/RKPGTM_SCR-RoomlocalizedMultiroomMultimic_csl2017.pdf}, year = {2017}, date = {2017-11-01}, journal = {Computer Speech & Language}, volume = {46}, pages = {419-443}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Christos Sakaridis, Kimon Drakopoulos, Petros Maragos Theoretical Analysis of Active Contours on Graphs Journal Article SIAM J. Imaging Sciences, 2017, ISSN: 1936-4954. Abstract | BibTeX | Links: [Webpage] [PDF] @article{344, title = {Theoretical Analysis of Active Contours on Graphs}, author = {Christos Sakaridis and Kimon Drakopoulos and Petros Maragos}, url = {http://arxiv.org/abs/1610.07381 http://robotics.ntua.gr/wp-content/uploads/sites/2/SakaridisDrakopoulosMaragos_TheoreticalAnalysisActiveContoursOnGraphs_siims2017.pdf}, doi = {10.1137/16M1100101}, issn = {1936-4954}, year = {2017}, date = {2017-01-01}, journal = {SIAM J. Imaging Sciences}, abstract = {Active contour models based on partial differential equations have proved successful in image segmentation, yet the study of their geometric formulation on arbitrary geometric graphs is still at an early stage. In this paper, we introduce geometric approximations of gradient and curvature, which are used in the geodesic active contour model. We prove convergence in probability of our gradient approximation to the true gradient value and derive an asymptotic upper bound for the error of this approximation for the class of random geometric graphs. Two different approaches for the approximation of curvature are presented and both are also proved to converge in probability in the case of random geometric graphs. We propose neighborhood-based filtering on graphs to improve the accuracy of the aforementioned approximations and define two variants of Gaussian smoothing on graphs which include normalization in order to adapt to graph non-uniformities. The performance of our active contour framework on graphs is demonstrated in the segmentation of regular images and geographical data defined on arbitrary graphs.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Active contour models based on partial differential equations have proved successful in image segmentation, yet the study of their geometric formulation on arbitrary geometric graphs is still at an early stage. In this paper, we introduce geometric approximations of gradient and curvature, which are used in the geodesic active contour model. We prove convergence in probability of our gradient approximation to the true gradient value and derive an asymptotic upper bound for the error of this approximation for the class of random geometric graphs. Two different approaches for the approximation of curvature are presented and both are also proved to converge in probability in the case of random geometric graphs. We propose neighborhood-based filtering on graphs to improve the accuracy of the aforementioned approximations and define two variants of Gaussian smoothing on graphs which include normalization in order to adapt to graph non-uniformities. The performance of our active contour framework on graphs is demonstrated in the segmentation of regular images and geographical data defined on arbitrary graphs. |
Christos G Bampis, Petros Maragos, Alan C Bovik Graph-driven diffusion and random walk schemes for image segmentation Journal Article IEEE Transactions on Image Processing, 26 (1), pp. 35–50, 2017, ISSN: 10577149. Abstract | BibTeX | Links: [PDF] @article{327, title = {Graph-driven diffusion and random walk schemes for image segmentation}, author = {Christos G Bampis and Petros Maragos and Alan C Bovik}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BampisMaragosBovik_GraphDiffusionRandomWalkImageSegment_TIP2017_0.pdf}, doi = {10.1109/TIP.2016.2621663}, issn = {10577149}, year = {2017}, date = {2017-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {26}, number = {1}, pages = {35--50}, abstract = {— We propose graph-driven approaches to image segmentation by developing diffusion processes defined on arbi-trary graphs. We formulate a solution to the image segmentation problem modeled as the result of infectious wavefronts prop-agating on an image-driven graph, where pixels correspond to nodes of an arbitrary graph. By relating the popular susceptible-infected-recovered epidemic propagation model to the Random Walker algorithm, we develop the normalized random walker and a lazy random walker variant. The underlying iterative solutions of these methods are derived as the result of infec-tions transmitted on this arbitrary graph. The main idea is to incorporate a degree-aware term into the original Random Walker algorithm in order to account for the node centrality of every neighboring node and to weigh the contribution of every neighbor to the underlying diffusion process. Our lazy random walk variant models the tendency of patients or nodes to resist changes in their infection status. We also show how previous work can be naturally extended to take advantage of this degree-aware term, which enables the design of other novel methods. Through an extensive experimental analysis, we demonstrate the reliability of our approach, its small computational burden and the dimensionality reduction capabilities of graph-driven approaches. Without applying any regular grid constraint, the proposed graph clustering scheme allows us to consider pixel-level, node-level approaches, and multidimensional input data by naturally integrating the importance of each node to the final clustering or segmentation solution. A software release containing implementations of this paper and supplementary material can be found at: http://cvsp.cs.ntua.gr/research/GraphClustering/.}, keywords = {}, pubstate = {published}, tppubtype = {article} } — We propose graph-driven approaches to image segmentation by developing diffusion processes defined on arbi-trary graphs. We formulate a solution to the image segmentation problem modeled as the result of infectious wavefronts prop-agating on an image-driven graph, where pixels correspond to nodes of an arbitrary graph. By relating the popular susceptible-infected-recovered epidemic propagation model to the Random Walker algorithm, we develop the normalized random walker and a lazy random walker variant. The underlying iterative solutions of these methods are derived as the result of infec-tions transmitted on this arbitrary graph. The main idea is to incorporate a degree-aware term into the original Random Walker algorithm in order to account for the node centrality of every neighboring node and to weigh the contribution of every neighbor to the underlying diffusion process. Our lazy random walk variant models the tendency of patients or nodes to resist changes in their infection status. We also show how previous work can be naturally extended to take advantage of this degree-aware term, which enables the design of other novel methods. Through an extensive experimental analysis, we demonstrate the reliability of our approach, its small computational burden and the dimensionality reduction capabilities of graph-driven approaches. Without applying any regular grid constraint, the proposed graph clustering scheme allows us to consider pixel-level, node-level approaches, and multidimensional input data by naturally integrating the importance of each node to the final clustering or segmentation solution. A software release containing implementations of this paper and supplementary material can be found at: http://cvsp.cs.ntua.gr/research/GraphClustering/. |
Panagiotis Paraskevas Filntisis, Athanasios Katsamanis, Pirros Tsiakoulis, Petros Maragos Video-realistic expressive audio-visual speech synthesis for the Greek language Journal Article Speech Communication, 95 , pp. 137–152, 2017, ISSN: 01676393. Abstract | BibTeX | Links: [PDF] @article{345, title = {Video-realistic expressive audio-visual speech synthesis for the Greek language}, author = {Panagiotis Paraskevas Filntisis and Athanasios Katsamanis and Pirros Tsiakoulis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/FilntisisKatsamanisTsiakoulis+_VideoRealExprAudioVisSpeechSynthGrLang_SC17.pdf}, doi = {10.1016/j.specom.2017.08.011}, issn = {01676393}, year = {2017}, date = {2017-01-01}, journal = {Speech Communication}, volume = {95}, pages = {137--152}, abstract = {High quality expressive speech synthesis has been a long-standing goal towards natural human-computer interaction. Generating a talking head which is both realistic and expressive appears to be a considerable challenge, due to both the high complexity in the acoustic and visual streams and the large non-discrete number of emotional states we would like the talking head to be able to express. In order to cover all the desired emotions, a significant amount of data is required, which poses an additional time-consuming data collection challenge. In this paper we attempt to address the aforementioned problems in an audio-visual context. Towards this goal, we propose two deep neural network (DNN) architectures for Video-realistic Expressive Audio-Visual Text-To-Speech synthesis (EAVTTS) and evaluate them by comparing them directly both to traditional hidden Markov model (HMM) based EAVTTS, as well as a concatenative unit selection EAVTTS approach, both on the realism and the expressiveness of the generated talking head. Next, we investigate adaptation and interpolation techniques to address the problem of covering the large emotional space. We use HMM interpolation in order to generate different levels of intensity for an emotion, as well as investigate whether it is possible to generate speech with intermediate speaking styles between two emotions. In addition, we employ HMM adaptation to adapt an HMM-based system to another emotion using only a limited amount of adaptation data from the target emotion. We performed an extensive experimental evaluation on a medium sized audio-visual corpus covering three emotions, namely anger, sadness and happiness, as well as neutral reading style. Our results show that DNN-based models outperform HMMs and unit selection on both the realism and expressiveness of the generated talking heads, while in terms of adaptation we can successfully adapt an audio-visual HMM set trained on a neutral speaking style database to a target emotion. Finally, we show that HMM interpolation can indeed generate different levels of intensity for EAVTTS by interpolating an emotion with the neutral reading style, as well as in some cases, generate audio-visual speech with intermediate expressions between two emotions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } High quality expressive speech synthesis has been a long-standing goal towards natural human-computer interaction. Generating a talking head which is both realistic and expressive appears to be a considerable challenge, due to both the high complexity in the acoustic and visual streams and the large non-discrete number of emotional states we would like the talking head to be able to express. In order to cover all the desired emotions, a significant amount of data is required, which poses an additional time-consuming data collection challenge. In this paper we attempt to address the aforementioned problems in an audio-visual context. Towards this goal, we propose two deep neural network (DNN) architectures for Video-realistic Expressive Audio-Visual Text-To-Speech synthesis (EAVTTS) and evaluate them by comparing them directly both to traditional hidden Markov model (HMM) based EAVTTS, as well as a concatenative unit selection EAVTTS approach, both on the realism and the expressiveness of the generated talking head. Next, we investigate adaptation and interpolation techniques to address the problem of covering the large emotional space. We use HMM interpolation in order to generate different levels of intensity for an emotion, as well as investigate whether it is possible to generate speech with intermediate speaking styles between two emotions. In addition, we employ HMM adaptation to adapt an HMM-based system to another emotion using only a limited amount of adaptation data from the target emotion. We performed an extensive experimental evaluation on a medium sized audio-visual corpus covering three emotions, namely anger, sadness and happiness, as well as neutral reading style. Our results show that DNN-based models outperform HMMs and unit selection on both the realism and expressiveness of the generated talking heads, while in terms of adaptation we can successfully adapt an audio-visual HMM set trained on a neutral speaking style database to a target emotion. Finally, we show that HMM interpolation can indeed generate different levels of intensity for EAVTTS by interpolating an emotion with the neutral reading style, as well as in some cases, generate audio-visual speech with intermediate expressions between two emotions. |
A Zlatintsi, P Koutras, G Evangelopoulos, N Malandrakis, N Efthymiou, K Pastra, A Potamianos, P Maragos COGNIMUSE: a multimodal video database annotated with saliency, events, semantics and emotion with application to summarization Journal Article EURASIP Journal on Image and Video Processing, 54 , pp. 1–24, 2017. Abstract | BibTeX | Links: [PDF] @article{ZKE+17, title = {COGNIMUSE: a multimodal video database annotated with saliency, events, semantics and emotion with application to summarization}, author = {A Zlatintsi and P Koutras and G Evangelopoulos and N Malandrakis and N Efthymiou and K Pastra and A Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Zlatintsi+_COGNIMUSEdb_EURASIP_JIVP-2017.pdf}, doi = {doi 10.1186/s13640-017-0194}, year = {2017}, date = {2017-01-01}, journal = {EURASIP Journal on Image and Video Processing}, volume = {54}, pages = {1--24}, abstract = {Research related to computational modeling for machine-based understanding requires ground truth data for training, content analysis, and evaluation. In this paper, we present a multimodal video database, namely COGNIMUSE, annotated with sensory and semantic saliency, events, cross-media semantics, and emotion. The purpose of this database is manifold; it can be used for training and evaluation of event detection and summarization algorithms, for classification and recognition of audio-visual and cross-media events, as well as for emotion tracking. In order to enable comparisons with other computational models, we propose state-of-the-art algorithms, specifically a unified energy-based audio-visual framework and a method for text saliency computation, for the detection of perceptually salient events from videos. Additionally, a movie summarization system for the automatic production of summaries is presented. Two kinds of evaluation were performed, an objective based on the saliency annotation of the database and an extensive qualitative human evaluation of the automatically produced summaries, where we investigated what composes high-quality movie summaries, where both methods verified the appropriateness of the proposed methods. The annotation of the database and the code for the summarization system can be found at http://cognimuse.cs.ntua.gr/database.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Research related to computational modeling for machine-based understanding requires ground truth data for training, content analysis, and evaluation. In this paper, we present a multimodal video database, namely COGNIMUSE, annotated with sensory and semantic saliency, events, cross-media semantics, and emotion. The purpose of this database is manifold; it can be used for training and evaluation of event detection and summarization algorithms, for classification and recognition of audio-visual and cross-media events, as well as for emotion tracking. In order to enable comparisons with other computational models, we propose state-of-the-art algorithms, specifically a unified energy-based audio-visual framework and a method for text saliency computation, for the detection of perceptually salient events from videos. Additionally, a movie summarization system for the automatic production of summaries is presented. Two kinds of evaluation were performed, an objective based on the saliency annotation of the database and an extensive qualitative human evaluation of the automatically produced summaries, where we investigated what composes high-quality movie summaries, where both methods verified the appropriateness of the proposed methods. The annotation of the database and the code for the summarization system can be found at http://cognimuse.cs.ntua.gr/database. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas Towards a user-adaptive context-aware robotic walker with a pathological gait assessment system: First experimental study Conference IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2017. Abstract | BibTeX | Links: [PDF] @conference{CPT17, title = {Towards a user-adaptive context-aware robotic walker with a pathological gait assessment system: First experimental study}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/publications/CPT17.pdf}, doi = {10.1109/IROS.2017.8206388}, year = {2017}, date = {2017-09-01}, booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {5037-5042}, abstract = {When designing a user-friendly Mobility Assistive Device (MAD) for mobility constrained people, it is important to take into account the diverse spectrum of disabilities, which results to completely different needs to be covered by the MAD for each specific user. An intelligent adaptive behavior is necessary. In this work we present experimental results, using an in house developed methodology for assessing the gait of users with different mobility status while interacting with a robotic MAD. We use data from a laser scanner, mounted on the MAD to track the legs using Particle Filters and Probabilistic Data Association (PDA-PF). The legs' states are fed to an HMM-based pathological gait cycle recognition system to compute in real-time the gait parameters that are crucial for the mobility status characterization of the user. We aim to show that a gait assessment system would be an important feedback for an intelligent MAD. Thus, we use this system to compare the gaits of the subjects using two different control settings of the MAD and we experimentally validate the ability of our system to recognize the impact of the control designs on the users' walking performance. The results demonstrate that a generic control scheme does not meet every patient's needs, and therefore, an Adaptive Context-Aware MAD (ACA MAD), that can understand the specific needs of the user, is important for enhancing the human-robot physical interaction.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } When designing a user-friendly Mobility Assistive Device (MAD) for mobility constrained people, it is important to take into account the diverse spectrum of disabilities, which results to completely different needs to be covered by the MAD for each specific user. An intelligent adaptive behavior is necessary. In this work we present experimental results, using an in house developed methodology for assessing the gait of users with different mobility status while interacting with a robotic MAD. We use data from a laser scanner, mounted on the MAD to track the legs using Particle Filters and Probabilistic Data Association (PDA-PF). The legs' states are fed to an HMM-based pathological gait cycle recognition system to compute in real-time the gait parameters that are crucial for the mobility status characterization of the user. We aim to show that a gait assessment system would be an important feedback for an intelligent MAD. Thus, we use this system to compare the gaits of the subjects using two different control settings of the MAD and we experimentally validate the ability of our system to recognize the impact of the control designs on the users' walking performance. The results demonstrate that a generic control scheme does not meet every patient's needs, and therefore, an Adaptive Context-Aware MAD (ACA MAD), that can understand the specific needs of the user, is important for enhancing the human-robot physical interaction. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas, P Maragos HMM-based Pathological Gait Analyzer for a User-Adaptive Intelligent Robotic Walker Conference Proc. 25th European Conf.(EUSIPCO-17) Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017. Abstract | BibTeX | Links: [PDF] @conference{CPTM_WML17, title = {HMM-based Pathological Gait Analyzer for a User-Adaptive Intelligent Robotic Walker}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/CPTM_WML17.pdf}, year = {2017}, date = {2017-09-01}, booktitle = {Proc. 25th European Conf.(EUSIPCO-17) Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications"}, address = {Kos, Greece}, abstract = {During the past decade, robotic technology has evolved considerably towards the development of cognitive robotic systems that enable close interaction with humans. Application fields of such novel robotic technologies are now wide spreading covering a variety of human assistance function- alities, aiming in particular at supporting the needs of human beings experiencing various forms of mobility or cognitive impairments. Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. This paper re- ports current research work related to the development of a pathological gait analyzer for intelligent robotic rollator aiming to be an input to a user-adaptive and context-aware robot control architecture. Specifically, we present a novel method for human leg tracking using Particle Filters and Probablistic Data Association from a laser scanner, constituting a non- wearable and non-intrusive approach. The tracked positions and velocities of the user’s legs are the observables of an HMM, which provides the gait phases of the detected gait cycles. Given those phases we compute specific gait parameters, which are used for medical diagnosis. The results of our pathological gait analyzer are validated using ground truth data from a GAITRite system. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior- based robot control system.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } During the past decade, robotic technology has evolved considerably towards the development of cognitive robotic systems that enable close interaction with humans. Application fields of such novel robotic technologies are now wide spreading covering a variety of human assistance function- alities, aiming in particular at supporting the needs of human beings experiencing various forms of mobility or cognitive impairments. Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. This paper re- ports current research work related to the development of a pathological gait analyzer for intelligent robotic rollator aiming to be an input to a user-adaptive and context-aware robot control architecture. Specifically, we present a novel method for human leg tracking using Particle Filters and Probablistic Data Association from a laser scanner, constituting a non- wearable and non-intrusive approach. The tracked positions and velocities of the user’s legs are the observables of an HMM, which provides the gait phases of the detected gait cycles. Given those phases we compute specific gait parameters, which are used for medical diagnosis. The results of our pathological gait analyzer are validated using ground truth data from a GAITRite system. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior- based robot control system. |
A C Dometios, X S Papageorgiou, A Arvanitakis, C S Tzafestas, P Maragos Real-time End-effector Motion Behavior Planning Approach Using On-line Point-cloud Data Towards a User Adaptive Assistive Bath Robot Conference 2017 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), Vancouver, Canada, 2017. Abstract | BibTeX | Links: [PDF] @conference{DPATM17, title = {Real-time End-effector Motion Behavior Planning Approach Using On-line Point-cloud Data Towards a User Adaptive Assistive Bath Robot}, author = {A C Dometios and X S Papageorgiou and A Arvanitakis and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Dometios17_End-Effector_Motion_Behavior_Planning_PointCloud.pdf}, doi = {10.1109/IROS.2017.8206387}, year = {2017}, date = {2017-09-01}, booktitle = {2017 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {5031-5036}, address = {Vancouver, Canada}, abstract = {Elderly people have particular needs in performing bathing activities, since these tasks require body flexibility. Our aim is to build an assistive robotic bath system, in order to increase the independence and safety of this procedure. Towards this end, the expertise of professional carers for bathing sequences and appropriate motions has to be adopted, in order to achieve natural, physical human - robot interaction. In this paper, a real-time end-effector motion planning method for an assistive bath robot, using on-line Point-Cloud information, is proposed. The visual feedback obtained from Kinect depth sensor is employed to adapt suitable washing paths to the user’s body part motion and deformable surface. We make use of a navigation function-based controller, with guarantied globally uniformly asymptotic stability, and bijective transformations for the adaptation of the paths. Experiments were conducted with a rigid rectangular object for validation purposes, while a female subject took part to the experiment in order to evaluate and demonstrate the basic concepts of the proposed methodology.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Elderly people have particular needs in performing bathing activities, since these tasks require body flexibility. Our aim is to build an assistive robotic bath system, in order to increase the independence and safety of this procedure. Towards this end, the expertise of professional carers for bathing sequences and appropriate motions has to be adopted, in order to achieve natural, physical human - robot interaction. In this paper, a real-time end-effector motion planning method for an assistive bath robot, using on-line Point-Cloud information, is proposed. The visual feedback obtained from Kinect depth sensor is employed to adapt suitable washing paths to the user’s body part motion and deformable surface. We make use of a navigation function-based controller, with guarantied globally uniformly asymptotic stability, and bijective transformations for the adaptation of the paths. Experiments were conducted with a rigid rectangular object for validation purposes, while a female subject took part to the experiment in order to evaluate and demonstrate the basic concepts of the proposed methodology. |
A C Dometios, A Tsiami, A Arvanitakis, P Giannoulis, X S Papageorgiou, C S Tzafestas, P Maragos Integrated Speech-based Perception System for User Adaptive Robot Motion Planning in Assistive Bath Scenarios Conference Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017. Abstract | BibTeX | Links: [PDF] @conference{DTAGPTM17, title = {Integrated Speech-based Perception System for User Adaptive Robot Motion Planning in Assistive Bath Scenarios}, author = {A C Dometios and A Tsiami and A Arvanitakis and P Giannoulis and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://www.eurasip.org/Proceedings/Eusipco/Eusipco2017/wpapers/ML5.pdf}, year = {2017}, date = {2017-09-01}, booktitle = {Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications"}, address = {Kos, Greece}, abstract = {Elderly people have augmented needs in performing bathing activities, since these tasks require body flexibility. Our aim is to build an assistive robotic bath system, in order to increase the independence and safety of this procedure. Towards this end, the expertise of professional carers for bathing sequences and appropriate motions have to be adopted, in order to achieve natural, physical human - robot interaction. The integration of the communication and verbal interaction between the user and the robot during the bathing tasks is a key issue for such a challenging assistive robotic application. In this paper, we tackle this challenge by developing a novel integrated real-time speech-based perception system, which will provide the necessary assistance to the frail senior citizens. This system can be suitable for installation and use in conventional home or hospital bathroom space. We employ both a speech recognition system with sub-modules to achieve a smooth and robust human-system communication and a low cost depth camera or end-effector motion planning. With a variety of spoken commands, the system can be adapted to the user’s needs and preferences. The instructed by the user washing commands are executed by a robotic manipulator, demonstrating the progress of each task. The smooth integration of ll subsystems is accomplished by a modular and hierarchical decision architecture organized as a Behavior Tree. The system was experimentally tested by successful execution of scenarios from different users with different preferences.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Elderly people have augmented needs in performing bathing activities, since these tasks require body flexibility. Our aim is to build an assistive robotic bath system, in order to increase the independence and safety of this procedure. Towards this end, the expertise of professional carers for bathing sequences and appropriate motions have to be adopted, in order to achieve natural, physical human - robot interaction. The integration of the communication and verbal interaction between the user and the robot during the bathing tasks is a key issue for such a challenging assistive robotic application. In this paper, we tackle this challenge by developing a novel integrated real-time speech-based perception system, which will provide the necessary assistance to the frail senior citizens. This system can be suitable for installation and use in conventional home or hospital bathroom space. We employ both a speech recognition system with sub-modules to achieve a smooth and robust human-system communication and a low cost depth camera or end-effector motion planning. With a variety of spoken commands, the system can be adapted to the user’s needs and preferences. The instructed by the user washing commands are executed by a robotic manipulator, demonstrating the progress of each task. The smooth integration of ll subsystems is accomplished by a modular and hierarchical decision architecture organized as a Behavior Tree. The system was experimentally tested by successful execution of scenarios from different users with different preferences. |
G. Velentzas, C. Tzafestas, M. Khamassi Bio-inspired meta-learning for active exploration during non-stationary multi-armed bandit tasks Conference Proc. IEEE Intelligent Systems Conference, London, UK, 2017. Abstract | BibTeX | Links: [PDF] @conference{BFB97, title = {Bio-inspired meta-learning for active exploration during non-stationary multi-armed bandit tasks}, author = {G. Velentzas and C. Tzafestas and M. Khamassi}, url = {http://robotics.ntua.gr/wp-content/publications/Velentzas_Intellisys2017.pdf}, doi = {10.1109/IntelliSys.2017.8324365}, year = {2017}, date = {2017-09-01}, booktitle = {Proc. IEEE Intelligent Systems Conference}, address = {London, UK}, abstract = {Fast adaptation to changes in the environment requires agents (animals, robots and simulated artefacts) to be able to dynamically tune an exploration-exploitation trade-off during learning. This trade-off usually determines a fixed proportion of exploitative choices (i.e. choice of the action that subjectively appears as best at a given moment) relative to exploratory choices (i.e. testing other actions that now appear worst but may turn out promising later). Rather than using a fixed proportion, non-stationary multi-armed bandit methods in the field of machine learning have proven that principles such as exploring actions that have not been tested for a long time can lead to performance closer to optimal - bounded regret. In parallel, researches in active exploration in the fields of robot learning and computational neuroscience of learning and decision-making have proposed alternative solutions such as transiently increasing exploration in response to drops in average performance, or attributing exploration bonuses specifically to actions associated with high uncertainty in order to gain information when choosing them. In this work, we compare different methods from machine learning, computational neuroscience and robot learning on a set of non-stationary stochastic multi-armed bandit tasks: abrupt shifts; best bandit becomes worst one and vice versa; multiple shifting frequencies. We find that different methods are appropriate in different scenarios. We propose a new hybrid method combining bio-inspired meta-learning, kalman filter and exploration bonuses and show that it outperforms other methods in these scenarios.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Fast adaptation to changes in the environment requires agents (animals, robots and simulated artefacts) to be able to dynamically tune an exploration-exploitation trade-off during learning. This trade-off usually determines a fixed proportion of exploitative choices (i.e. choice of the action that subjectively appears as best at a given moment) relative to exploratory choices (i.e. testing other actions that now appear worst but may turn out promising later). Rather than using a fixed proportion, non-stationary multi-armed bandit methods in the field of machine learning have proven that principles such as exploring actions that have not been tested for a long time can lead to performance closer to optimal - bounded regret. In parallel, researches in active exploration in the fields of robot learning and computational neuroscience of learning and decision-making have proposed alternative solutions such as transiently increasing exploration in response to drops in average performance, or attributing exploration bonuses specifically to actions associated with high uncertainty in order to gain information when choosing them. In this work, we compare different methods from machine learning, computational neuroscience and robot learning on a set of non-stationary stochastic multi-armed bandit tasks: abrupt shifts; best bandit becomes worst one and vice versa; multiple shifting frequencies. We find that different methods are appropriate in different scenarios. We propose a new hybrid method combining bio-inspired meta-learning, kalman filter and exploration bonuses and show that it outperforms other methods in these scenarios. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas, P Maragos Estimating double support in pathological gaits using an HMM-based analyzer for an intelligent robotic walker Conference IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN), 2017. Abstract | BibTeX | Links: [PDF] @conference{CPTM_ROMAN17, title = {Estimating double support in pathological gaits using an HMM-based analyzer for an intelligent robotic walker}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/CPTM_ROMAN17.pdf}, doi = {10.1109/ROMAN.2017.8172287}, year = {2017}, date = {2017-08-01}, booktitle = {IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN)}, pages = {101-106}, abstract = {For a robotic walker designed to assist mobility constrained people, it is important to take into account the different spectrum of pathological walking patterns, which result into completely different needs to be covered for each specific user. For a deployable intelligent assistant robot it is necessary to have a precise gait analysis system, providing real-time monitoring of the user and extracting specific gait parameters, which are associated with the rehabilitation progress and the risk of fall. In this paper, we present a completely non-invasive framework for the on-line analysis of pathological human gait and the recognition of specific gait phases and events. The performance of this gait analysis system is assessed, in particular, as related to the estimation of double support phases, which are typically difficult to extract reliably, especially when applying non-wearable and non-intrusive technologies. Furthermore, the duration of double support phases constitutes an important gait parameter and a critical indicator in pathological gait patterns. The performance of this framework is assessed using real data collected from an ensemble of elderly persons with different pathologies. The estimated gait parameters are experimentally validated using ground truth data provided by a Motion Capture system. The results obtained and presented in this paper demonstrate that the proposed human data analysis (modeling, learning and inference) framework has the potential to support efficient detection and classification of specific walking pathologies, as needed to empower a cognitive robotic mobility-assistance device with user-adaptive and context-aware functionalities.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } For a robotic walker designed to assist mobility constrained people, it is important to take into account the different spectrum of pathological walking patterns, which result into completely different needs to be covered for each specific user. For a deployable intelligent assistant robot it is necessary to have a precise gait analysis system, providing real-time monitoring of the user and extracting specific gait parameters, which are associated with the rehabilitation progress and the risk of fall. In this paper, we present a completely non-invasive framework for the on-line analysis of pathological human gait and the recognition of specific gait phases and events. The performance of this gait analysis system is assessed, in particular, as related to the estimation of double support phases, which are typically difficult to extract reliably, especially when applying non-wearable and non-intrusive technologies. Furthermore, the duration of double support phases constitutes an important gait parameter and a critical indicator in pathological gait patterns. The performance of this framework is assessed using real data collected from an ensemble of elderly persons with different pathologies. The estimated gait parameters are experimentally validated using ground truth data provided by a Motion Capture system. The results obtained and presented in this paper demonstrate that the proposed human data analysis (modeling, learning and inference) framework has the potential to support efficient detection and classification of specific walking pathologies, as needed to empower a cognitive robotic mobility-assistance device with user-adaptive and context-aware functionalities. |
Theodore Tsitsimis, George Velentzas, Mehdi Khamassi, Costas Tzafestas Online adaptation to human engagement perturbations in simulated human-robot interaction using hybrid reinforcement learning Conference Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017., Kos, Greece, 2017. Abstract | BibTeX | Links: [PDF] @conference{BFB98, title = {Online adaptation to human engagement perturbations in simulated human-robot interaction using hybrid reinforcement learning}, author = {Theodore Tsitsimis and George Velentzas and Mehdi Khamassi and Costas Tzafestas}, editor = {Michael Aron}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/MultiLearn2017.pdf}, year = {2017}, date = {2017-08-01}, booktitle = {Proc. of the 25th European Signal Processing Conference - Workshop: "MultiLearn 2017 - Multimodal processing, modeling and learning for human-computer/robot interaction applications", Kos, Greece, 2017.}, address = {Kos, Greece}, abstract = {Dynamic uncontrolled human-robot interaction requires robots to be able to adapt to changes in the human’s behavior and intentions. Among relevant signals, non-verbal cues such as the human’s gaze can provide the robot with important information about the human’s current engagement in the task, and whether the robot should continue its current behavior or not. In a previous work [1] we proposed an active exploration algorithm for reinforcement learning where the reward function is the weighted sum of the human’s current engagement and variations of this engagement (so that a low but increasing engagement is rewarding). We used a structured (parameterized) continuous action space where a meta-learning algorithm is applied to simultaneously tune the exploration in discrete and continuous action space, enabling the robot to learn which discrete action is expected by the human (e.g. moving an object) and with which velocity of movement. In this paper we want to show the performance of the algorithm to a simulated humanrobot interaction task where a practical approach is followed to estimate human engagement through visual cues of the head pose. We then measure the adaptation of the algorithm to engagement perturbations simulated as changes in the optimal action parameter and we quantify its performance for variations in perturbation duration and measurement noise.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Dynamic uncontrolled human-robot interaction requires robots to be able to adapt to changes in the human’s behavior and intentions. Among relevant signals, non-verbal cues such as the human’s gaze can provide the robot with important information about the human’s current engagement in the task, and whether the robot should continue its current behavior or not. In a previous work [1] we proposed an active exploration algorithm for reinforcement learning where the reward function is the weighted sum of the human’s current engagement and variations of this engagement (so that a low but increasing engagement is rewarding). We used a structured (parameterized) continuous action space where a meta-learning algorithm is applied to simultaneously tune the exploration in discrete and continuous action space, enabling the robot to learn which discrete action is expected by the human (e.g. moving an object) and with which velocity of movement. In this paper we want to show the performance of the algorithm to a simulated humanrobot interaction task where a practical approach is followed to estimate human engagement through visual cues of the head pose. We then measure the adaptation of the algorithm to engagement perturbations simulated as changes in the optimal action parameter and we quantify its performance for variations in perturbation duration and measurement noise. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas, P Maragos Comparative experimental validation of human gait tracking algorithms for an intelligent robotic rollator Conference IEEE International Conference on Robotics and Automation (ICRA), 2017. Abstract | BibTeX | Links: [PDF] @conference{CPTM_ICRA17, title = {Comparative experimental validation of human gait tracking algorithms for an intelligent robotic rollator}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/CPTM_ICRA17.pdf}, doi = {10.1109/ICRA.2017.7989713}, year = {2017}, date = {2017-05-01}, booktitle = {IEEE International Conference on Robotics and Automation (ICRA)}, pages = {6026-6031}, abstract = {Tracking human gait accurately and robustly constitutes a key factor for a smart robotic walker, aiming to provide assistance to patients with different mobility impairment. A context-aware assistive robot needs constant knowledge of the user's kinematic state to assess the gait status and adjust its movement properly to provide optimal assistance. In this work, we experimentally validate the performance of two gait tracking algorithms using data from elderly patients; the first algorithm employs a Kalman Filter (KF), while the second one tracks the user legs separately using two probabilistically associated Particle Filters (PFs). The algorithms are compared according to their accuracy and robustness, using data captured from real experiments, where elderly subjects performed specific walking scenarios with physical assistance from a prototype Robotic Rollator. Sensorial data were provided by a laser rangefinder mounted on the robotic platform recording the movement of the user's legs. The accuracy of the proposed algorithms is analysed and validated with respect to ground truth data provided by a Motion Capture system tracking a set of visual markers worn by the patients. The robustness of the two tracking algorithms is also analysed comparatively in a complex maneuvering scenario. Current experimental findings demonstrate the superior performance of the PFs in difficult cases of occlusions and clutter, where KF tracking often fails.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Tracking human gait accurately and robustly constitutes a key factor for a smart robotic walker, aiming to provide assistance to patients with different mobility impairment. A context-aware assistive robot needs constant knowledge of the user's kinematic state to assess the gait status and adjust its movement properly to provide optimal assistance. In this work, we experimentally validate the performance of two gait tracking algorithms using data from elderly patients; the first algorithm employs a Kalman Filter (KF), while the second one tracks the user legs separately using two probabilistically associated Particle Filters (PFs). The algorithms are compared according to their accuracy and robustness, using data captured from real experiments, where elderly subjects performed specific walking scenarios with physical assistance from a prototype Robotic Rollator. Sensorial data were provided by a laser rangefinder mounted on the robotic platform recording the movement of the user's legs. The accuracy of the proposed algorithms is analysed and validated with respect to ground truth data provided by a Motion Capture system tracking a set of visual markers worn by the patients. The robustness of the two tracking algorithms is also analysed comparatively in a complex maneuvering scenario. Current experimental findings demonstrate the superior performance of the PFs in difficult cases of occlusions and clutter, where KF tracking often fails. |
Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task Conference Proc. IEEE Int'l Conference on Robotic Computing, Taichung, Taiwan, 2017. Abstract | BibTeX | Links: [PDF] @conference{BFB95, title = {Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task}, url = {http://robotics.ntua.gr/wp-content/publications/khamassi_IRC2017.pdf}, doi = {10.1109/IRC.2017.33}, year = {2017}, date = {2017-04-01}, booktitle = {Proc. IEEE Int'l Conference on Robotic Computing}, address = {Taichung, Taiwan}, abstract = {Online model-free reinforcement learning (RL) methods with continuous actions are playing a prominent role when dealing with real-world applications such as Robotics. However, when confronted to non-stationary environments, these methods crucially rely on an exploration-exploitation trade-off which is rarely dynamically and automatically adjusted to changes in the environment. Here we propose an active exploration algorithm for RL in structured (parameterized) continuous action space. This framework deals with a set of discrete actions, each of which is parameterized with continuous variables. Discrete exploration is controlled through a Boltzmann softmax function with an inverse temperature β parameter. In parallel, a Gaussian exploration is applied to the continuous action parameters. We apply a meta-learning algorithm based on the comparison between variations of short-term and long-term reward running averages to simultaneously tune β and the width of the Gaussian distribution from which continuous action parameters are drawn. We first show that this algorithm reaches state-of-the-art performance in the non-stationary multi-armed bandit paradigm, while also being generalizable to continuous actions and multi-step tasks. We then apply it to a simulated human-robot interaction task, and show that it outperforms continuous parameterized RL both without active exploration and with active exploration based on uncertainty variations measured by a Kalman-Q-learning algorithm.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Online model-free reinforcement learning (RL) methods with continuous actions are playing a prominent role when dealing with real-world applications such as Robotics. However, when confronted to non-stationary environments, these methods crucially rely on an exploration-exploitation trade-off which is rarely dynamically and automatically adjusted to changes in the environment. Here we propose an active exploration algorithm for RL in structured (parameterized) continuous action space. This framework deals with a set of discrete actions, each of which is parameterized with continuous variables. Discrete exploration is controlled through a Boltzmann softmax function with an inverse temperature β parameter. In parallel, a Gaussian exploration is applied to the continuous action parameters. We apply a meta-learning algorithm based on the comparison between variations of short-term and long-term reward running averages to simultaneously tune β and the width of the Gaussian distribution from which continuous action parameters are drawn. We first show that this algorithm reaches state-of-the-art performance in the non-stationary multi-armed bandit paradigm, while also being generalizable to continuous actions and multi-step tasks. We then apply it to a simulated human-robot interaction task, and show that it outperforms continuous parameterized RL both without active exploration and with active exploration based on uncertainty variations measured by a Kalman-Q-learning algorithm. |
A Zlatintsi, I Rodomagoulakis, V Pitsikalis, P Koutras, N Kardaris, X Papageorgiou, C Tzafestas, P Maragos Social Human-Robot Interaction for the Elderly: Two Real-life Use Cases, Conference ACM/IEEE International Conference on Human-Robot Interaction (HRI), Vienna, Austria, 2017. Abstract | BibTeX | Links: [PDF] @conference{ZRP+17, title = {Social Human-Robot Interaction for the Elderly: Two Real-life Use Cases,}, author = {A Zlatintsi and I Rodomagoulakis and V Pitsikalis and P Koutras and N Kardaris and X Papageorgiou and C Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Zlatintsi+_SocialHRIforTheElderly_HRI-17.pdf}, year = {2017}, date = {2017-03-01}, booktitle = {ACM/IEEE International Conference on Human-Robot Interaction (HRI)}, address = {Vienna, Austria}, abstract = {We explore new aspects on assistive living via smart social human-robot interaction (HRI) involving automatic recognition of multimodal gestures and speech in a natural interface, providing social features in HRI. We discuss a whole framework of resources, including datasets and tools, briefly shown in two real-life use cases for elderly subjects: a multimodal interface of an assistive robotic rollator and an assistive bathing robot. We discuss these domain specific tasks, and open source tools, which can be used to build such HRI systems, as well as indicative results. Sharing such resources can open new perspectives in assistive HRI.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We explore new aspects on assistive living via smart social human-robot interaction (HRI) involving automatic recognition of multimodal gestures and speech in a natural interface, providing social features in HRI. We discuss a whole framework of resources, including datasets and tools, briefly shown in two real-life use cases for elderly subjects: a multimodal interface of an assistive robotic rollator and an assistive bathing robot. We discuss these domain specific tasks, and open source tools, which can be used to build such HRI systems, as well as indicative results. Sharing such resources can open new perspectives in assistive HRI. |
X S Papageorgiou, G Chalvatzaki, A Dometios, C S Tzafestas, P Maragos Intelligent Assistive Robotic Systems for the Elderly: Two Real-life Use Cases Conference C_PETRA, ACM, Island of Rhodes, Greece, 2017, ISBN: 978-1-4503-5227-7. Abstract | BibTeX | Links: [PDF] @conference{PETRA2017, title = {Intelligent Assistive Robotic Systems for the Elderly: Two Real-life Use Cases}, author = {X S Papageorgiou and G Chalvatzaki and A Dometios and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/PETRA2017.pdf}, doi = {10.1145/3056540.3076184}, isbn = {978-1-4503-5227-7}, year = {2017}, date = {2017-01-01}, booktitle = {C_PETRA}, pages = {360--365}, publisher = {ACM}, address = {Island of Rhodes, Greece}, abstract = {Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. When designing a user-friendly assistive device for mobility constrained people, it is important to take into account the diverse spectrum of disabilities, which results into completely different needs to be covered by the device for each specific user. An intelligent adaptive behavior is necessary for the deployment of such systems. Also, elderly people have particular needs in specific case of performing bathing activities, since these tasks require body flexibility. We explore new aspects of assistive living via intelligent assistive robotic systems involving human robot interaction in a natural interface. Our aim is to build assistive robotic systems, in order to increase the independence and safety of these procedures. Towards this end, the expertise of professional carers for walking or bathing sequences and appropriate motions have to be adopted, in order to achieve natural, physical human - robot interaction. Our goal is to report current research work related to the development of two real-life use cases of intelligent robotic systems for elderly aiming to provide user-adaptive and context-aware assistance.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. When designing a user-friendly assistive device for mobility constrained people, it is important to take into account the diverse spectrum of disabilities, which results into completely different needs to be covered by the device for each specific user. An intelligent adaptive behavior is necessary for the deployment of such systems. Also, elderly people have particular needs in specific case of performing bathing activities, since these tasks require body flexibility. We explore new aspects of assistive living via intelligent assistive robotic systems involving human robot interaction in a natural interface. Our aim is to build assistive robotic systems, in order to increase the independence and safety of these procedures. Towards this end, the expertise of professional carers for walking or bathing sequences and appropriate motions have to be adopted, in order to achieve natural, physical human - robot interaction. Our goal is to report current research work related to the development of two real-life use cases of intelligent robotic systems for elderly aiming to provide user-adaptive and context-aware assistance. |
G Karamanolakis, E Iosif, A Zlatintsi, A Pikrakis, A Potamianos Audio-based Distributional Semantic Models for Music Auto-tagging and Similarity Measurement Conference Proc. MultiLearn2017: Multimodal Processing, Modeling and Learning for Human-Computer/Robot Interaction Workshop, in conjuction with European Signal Processing Conference, Kos, Greece, 2017. Abstract | BibTeX | Links: [PDF] @conference{KIZ+17, title = {Audio-based Distributional Semantic Models for Music Auto-tagging and Similarity Measurement}, author = {G Karamanolakis and E Iosif and A Zlatintsi and A Pikrakis and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/Karamanolakis+_MultiLearn-17_ML7.pdf}, year = {2017}, date = {2017-01-01}, booktitle = {Proc. MultiLearn2017: Multimodal Processing, Modeling and Learning for Human-Computer/Robot Interaction Workshop, in conjuction with European Signal Processing Conference}, address = {Kos, Greece}, abstract = {The recent development of Audio-based Distributional Semantic Models (ADSMs) enables the computation of audio and lexical vector representations in a joint acoustic-semantic space. In this work, these joint representations are applied to the problem of automatic tag generation. The predicted tags together with their corresponding acoustic representation are exploited for the construction of acoustic-semantic clip embeddings. The proposed algorithms are evaluated on the task of similarity measurement between music clips. Acoustic-semantic models are shown to outperform the state-of-the-art for this task and produce high quality tags for audio/music clips.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The recent development of Audio-based Distributional Semantic Models (ADSMs) enables the computation of audio and lexical vector representations in a joint acoustic-semantic space. In this work, these joint representations are applied to the problem of automatic tag generation. The predicted tags together with their corresponding acoustic representation are exploited for the construction of acoustic-semantic clip embeddings. The proposed algorithms are evaluated on the task of similarity measurement between music clips. Acoustic-semantic models are shown to outperform the state-of-the-art for this task and produce high quality tags for audio/music clips. |
Vasileios Charisopoulos, Petros Maragos Morphological perceptrons: Geometry and training algorithms Conference Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 10225 LNCS , 2017, ISSN: 16113349. Abstract | BibTeX | Links: [PDF] @conference{346, title = {Morphological perceptrons: Geometry and training algorithms}, author = { Vasileios Charisopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/CharisopoulosMaragos_MorphologPerceptGeometryTrainAlgor_ISMM17.pdf}, doi = {10.1007/978-3-319-57240-6_1}, issn = {16113349}, year = {2017}, date = {2017-01-01}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {10225 LNCS}, pages = {3--15}, abstract = {Neural networks have traditionally relied on mostly linear models, such as the multiply-accumulate architecture of a linear perceptron that remains the dominant paradigm of neuronal computation. However, from a biological standpoint, neuron activity may as well involve inherently nonlinear and competitive operations. Mathematical morphology and minimax algebra provide the necessary background in the study of neural networks made up from these kinds of nonlinear units. This paper deals with such a model, called the morphological perceptron. We study some of its geometrical properties and introduce a training algorithm for binary classification. We point out the relationship between morphological classifiers and the recent field of tropical geometry, which enables us to obtain a precise bound on the number of linear regions of the maxout unit, a popular choice for deep neural networks introduced recently. Finally, we present some relevant numerical results.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Neural networks have traditionally relied on mostly linear models, such as the multiply-accumulate architecture of a linear perceptron that remains the dominant paradigm of neuronal computation. However, from a biological standpoint, neuron activity may as well involve inherently nonlinear and competitive operations. Mathematical morphology and minimax algebra provide the necessary background in the study of neural networks made up from these kinds of nonlinear units. This paper deals with such a model, called the morphological perceptron. We study some of its geometrical properties and introduce a training algorithm for binary classification. We point out the relationship between morphological classifiers and the recent field of tropical geometry, which enables us to obtain a precise bound on the number of linear regions of the maxout unit, a popular choice for deep neural networks introduced recently. Finally, we present some relevant numerical results. |
Mehdi Khamassi, George Velentzas, Theodore Tsitsimis, Costas Tzafestas Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task Conference Proceedings - 2017 1st IEEE International Conference on Robotic Computing, IRC 2017, 2017, ISBN: 9781509067237. Abstract | BibTeX | Links: [PDF] @conference{337, title = {Active exploration and parameterized reinforcement learning applied to a simulated human-robot interaction task}, author = { Mehdi Khamassi and George Velentzas and Theodore Tsitsimis and Costas Tzafestas}, url = {http://ieeexplore.ieee.org/document/7926511/%0Ahttp://ieeexplore.ieee.org/ielx7/7925476/7926477/07926511.pdf?tp=&arnumber=7926511&isnumber=7926477}, doi = {10.1109/IRC.2017.33}, isbn = {9781509067237}, year = {2017}, date = {2017-01-01}, booktitle = {Proceedings - 2017 1st IEEE International Conference on Robotic Computing, IRC 2017}, pages = {28--35}, abstract = {textcopyright 2017 IEEE. Online model-free reinforcement learning (RL) methods with continuous actions are playing a prominent role when dealing with real-world applications such as Robotics. However, when confronted to non-stationary environments, these methods crucially rely on an exploration-exploitation trade-off which is rarely dynamically and automatically adjusted to changes in the environment. Here we propose an active exploration algorithm for RL in structured (parameterized) continuous action space. This framework deals with a set of discrete actions, each of which is parameterized with continuous variables. Discrete exploration is controlled through a Boltzmann softmax function with an inverse temperature $beta$ parameter. In parallel, a Gaussian exploration is applied to the continuous action parameters. We apply a meta-learning algorithm based on the comparison between variations of short-Term and long-Term reward running averages to simultaneously tune $beta$ and the width of the Gaussian distribution from which continuous action parameters are drawn. We first show that this algorithm reaches state-of-The-Art performance in the non-stationary multi-Armed bandit paradigm, while also being generalizable to continuous actions and multi-step tasks. We then apply it to a simulated human-robot interaction task, and show that it outperforms continuous parameterized RL both without active exploration and with active exploration based on uncertainty variations measured by a Kalman-Q-learning algorithm.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } textcopyright 2017 IEEE. Online model-free reinforcement learning (RL) methods with continuous actions are playing a prominent role when dealing with real-world applications such as Robotics. However, when confronted to non-stationary environments, these methods crucially rely on an exploration-exploitation trade-off which is rarely dynamically and automatically adjusted to changes in the environment. Here we propose an active exploration algorithm for RL in structured (parameterized) continuous action space. This framework deals with a set of discrete actions, each of which is parameterized with continuous variables. Discrete exploration is controlled through a Boltzmann softmax function with an inverse temperature $beta$ parameter. In parallel, a Gaussian exploration is applied to the continuous action parameters. We apply a meta-learning algorithm based on the comparison between variations of short-Term and long-Term reward running averages to simultaneously tune $beta$ and the width of the Gaussian distribution from which continuous action parameters are drawn. We first show that this algorithm reaches state-of-The-Art performance in the non-stationary multi-Armed bandit paradigm, while also being generalizable to continuous actions and multi-step tasks. We then apply it to a simulated human-robot interaction task, and show that it outperforms continuous parameterized RL both without active exploration and with active exploration based on uncertainty variations measured by a Kalman-Q-learning algorithm. |
Athanasios Katsamanis, Vassilis Pitsikalis, Stavros Theodorakis, Petros Maragos Multimodal Gesture Recognition Book Chapter The Handbook of Multimodal-Multisensor Interfaces: Foundations, User Modeling, and Common Modality Combinations - Volume 1, pp. 449–487, Association for Computing Machinery and Morgan & Claypool, 2017, ISBN: 9781970001679. BibTeX | Links: [Webpage] [PDF] @inbook{10.1145/3015783.3015796, title = {Multimodal Gesture Recognition}, author = {Athanasios Katsamanis and Vassilis Pitsikalis and Stavros Theodorakis and Petros Maragos}, url = {https://dx.doi.org/10.1145/3015783.3015796 http://robotics.ntua.gr/wp-content/uploads/sites/2/2017_KPTM_MulitmodGestureRecogn_HandbookMMI_preprint.pdf}, isbn = {9781970001679}, year = {2017}, date = {2017-01-01}, booktitle = {The Handbook of Multimodal-Multisensor Interfaces: Foundations, User Modeling, and Common Modality Combinations - Volume 1}, pages = {449–487}, publisher = {Association for Computing Machinery and Morgan & Claypool}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
2016 |
John N Karigiannis, Costas S Tzafestas Model-free learning on robot kinematic chains using a nested multi-agent topology Journal Article Journal of Experimental and Theoretical Artificial Intelligence, 28 (6), pp. 913–954, 2016, ISSN: 13623079. Abstract | BibTeX | Links: [PDF] @article{321, title = {Model-free learning on robot kinematic chains using a nested multi-agent topology}, author = {John N Karigiannis and Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Model-free-learning-on-robot-kinematic-chains-using-a-nested-multi-agent-topology.pdf}, doi = {10.1080/0952813X.2015.1042923}, issn = {13623079}, year = {2016}, date = {2016-01-01}, journal = {Journal of Experimental and Theoretical Artificial Intelligence}, volume = {28}, number = {6}, pages = {913--954}, abstract = {This paper proposes a model-free learning scheme for the developmental acquisition of robot kinematic control and dexterous manipulation skills. The approach is based on a nested-hierarchical multi-agent architecture that intuitively encapsulates the topology of robot kinematic chains, where the activity of each independent degree-of-freedom (DOF) is finally mapped onto a distinct agent. Each one of those agents progressively evolves a local kinematic control strategy in a game-theoretic sense, that is, based on a partial (local) view of the whole system topology, which is incrementally updated through a recursive communication process according to the nested-hierarchical topology. Learning is thus approached not through demonstration and training but through an autonomous self-exploration process. A fuzzy reinforcement learning scheme is employed within each agent to enable efficient exploration in a continuous state–action domain. This paper constitutes in fact a proof of concept, demonstrating that glo...}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper proposes a model-free learning scheme for the developmental acquisition of robot kinematic control and dexterous manipulation skills. The approach is based on a nested-hierarchical multi-agent architecture that intuitively encapsulates the topology of robot kinematic chains, where the activity of each independent degree-of-freedom (DOF) is finally mapped onto a distinct agent. Each one of those agents progressively evolves a local kinematic control strategy in a game-theoretic sense, that is, based on a partial (local) view of the whole system topology, which is incrementally updated through a recursive communication process according to the nested-hierarchical topology. Learning is thus approached not through demonstration and training but through an autonomous self-exploration process. A fuzzy reinforcement learning scheme is employed within each agent to enable efficient exploration in a continuous state–action domain. This paper constitutes in fact a proof of concept, demonstrating that glo... |
N Kardaris, I Rodomagoulakis, V Pitsikalis, A Arvanitakis, P Maragos A Platform for Building New Human-Computer Interface Systems that Support Online Automatic Recognition of Audio-Gestural Commands Conference Proceedings of the 2017 ACM on Multimedia Conference, Amsterdam, The Netherlands, 2016. Abstract | BibTeX | Links: [PDF] @conference{acm_kardaris_2016, title = {A Platform for Building New Human-Computer Interface Systems that Support Online Automatic Recognition of Audio-Gestural Commands}, author = {N Kardaris and I Rodomagoulakis and V Pitsikalis and A Arvanitakis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/KPMM_TemporalOrderForVisualWords-ActionRecognition_ICIP2016.pdf}, doi = {10.1145/2964284.2973794}, year = {2016}, date = {2016-10-01}, booktitle = {Proceedings of the 2017 ACM on Multimedia Conference}, address = {Amsterdam, The Netherlands}, abstract = {We introduce a new framework to build human-computer interfaces that provide online automatic audio-gestural command recognition. The overall system allows the construction of a multimodal interface that recognizes user input expressed naturally as audio commands and manual gestures, captured by sensors such as Kinect. It includes a component for acquiring multimodal user data which is used as input to a module responsible for training audio-gestural models. These models are employed by the automatic recognition component, which supports online recognition of audiovisual modalities. The overall framework is exemplified by a working system use case. This demonstrates the potential of the overall software platform, which can be employed to build other new human-computer interaction systems. Moreover, users may populate libraries of models and/or data that can be shared in the network. In this way users may reuse or extend existing systems.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We introduce a new framework to build human-computer interfaces that provide online automatic audio-gestural command recognition. The overall system allows the construction of a multimodal interface that recognizes user input expressed naturally as audio commands and manual gestures, captured by sensors such as Kinect. It includes a component for acquiring multimodal user data which is used as input to a module responsible for training audio-gestural models. These models are employed by the automatic recognition component, which supports online recognition of audiovisual modalities. The overall framework is exemplified by a working system use case. This demonstrates the potential of the overall software platform, which can be employed to build other new human-computer interaction systems. Moreover, users may populate libraries of models and/or data that can be shared in the network. In this way users may reuse or extend existing systems. |
A Guler, N Kardaris, S Chandra, V Pitsikalis, C Werner, K Hauer, C Tzafestas, P Maragos, I Kokkinos Human Joint Angle Estimation and Gesture Recognition for Assistive Robotic Vision Conference Proc. of Workshop on Assistive Computer Vision and Robotics, European Conf. on Computer Vision (ECCV-2016), Amsterdam, The Netherlands, 2016. Abstract | BibTeX | Links: [PDF] @conference{guler_joint_gesture_2016, title = {Human Joint Angle Estimation and Gesture Recognition for Assistive Robotic Vision}, author = {A Guler and N Kardaris and S Chandra and V Pitsikalis and C Werner and K Hauer and C Tzafestas and P Maragos and I Kokkinos}, url = {http://robotics.ntua.gr/wp-content/publications/PoseEstimGestureRecogn-AssistRobotVision_ACVR2016-ECCV-Workshop.pdf}, doi = {10.1007/978-3-319-48881-3_29}, year = {2016}, date = {2016-10-01}, booktitle = {Proc. of Workshop on Assistive Computer Vision and Robotics, European Conf. on Computer Vision (ECCV-2016)}, address = {Amsterdam, The Netherlands}, abstract = {We explore new directions for automatic human gesture recognition and human joint angle estimation as applied for human-robot interaction in the context of an actual challenging task of assistive living for real-life elderly subjects. Our contributions include state-of-the-art approaches for both low- and mid-level vision, as well as for higher level action and gesture recognition. The first direction investigates a deep learning based framework for the challenging task of human joint angle estimation on noisy real world RGB-D images. The second direction includes the employment of dense trajectory features for online processing of videos for automatic gesture recognition with real-time performance. Our approaches are evaluated both qualitative and quantitatively on a newly acquired dataset that is constructed on a challenging real-life scenario on assistive living for elderly subjects.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We explore new directions for automatic human gesture recognition and human joint angle estimation as applied for human-robot interaction in the context of an actual challenging task of assistive living for real-life elderly subjects. Our contributions include state-of-the-art approaches for both low- and mid-level vision, as well as for higher level action and gesture recognition. The first direction investigates a deep learning based framework for the challenging task of human joint angle estimation on noisy real world RGB-D images. The second direction includes the employment of dense trajectory features for online processing of videos for automatic gesture recognition with real-time performance. Our approaches are evaluated both qualitative and quantitatively on a newly acquired dataset that is constructed on a challenging real-life scenario on assistive living for elderly subjects. |
N Kardaris, V Pitsikalis, E Mavroudi, P Maragos Introducing Temporal Order of Dominant Visual Word Sub-Sequences for Human Action Recognition Conference Proc. of IEEE Int'l Conf. on Image Processing (ICIP-2016), Phoenix, AZ, USA, 2016. Abstract | BibTeX | Links: [PDF] @conference{acm_kardaris_2016b, title = {Introducing Temporal Order of Dominant Visual Word Sub-Sequences for Human Action Recognition}, author = {N Kardaris and V Pitsikalis and E Mavroudi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/KRPAM_BuildingMultimodalInterfaces_ACM-MM2016.pdf}, doi = {10.1109/ICIP.2016.7532922}, year = {2016}, date = {2016-09-01}, booktitle = {Proc. of IEEE Int'l Conf. on Image Processing (ICIP-2016)}, address = {Phoenix, AZ, USA}, abstract = {We present a novel video representation for human action recognition by considering temporal sequences of visual words. Based on state-of-the-art dense trajectories, we introduce temporal bundles of dominant, that is most frequent, visual words. These are employed to construct a complementary action representation of ordered dominant visual word sequences, that additionally incorporates fine-grained temporal information. We exploit the introduced temporal information by applying local sub-sequence alignment that quantifies the similarity between sequences. This facilitates the fusion of our representation with the bag-of-visual-words (BoVW) representation. Our approach incorporates sequential temporal structure and results in a low-dimensional representation compared to the BoVW, while still yielding a descent result when combined with it. Experiments on the KTH, Hollywood2 and the challenging HMDB51 datasets show that the proposed framework is complementary to the BoVW representation, which discards temporal order}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We present a novel video representation for human action recognition by considering temporal sequences of visual words. Based on state-of-the-art dense trajectories, we introduce temporal bundles of dominant, that is most frequent, visual words. These are employed to construct a complementary action representation of ordered dominant visual word sequences, that additionally incorporates fine-grained temporal information. We exploit the introduced temporal information by applying local sub-sequence alignment that quantifies the similarity between sequences. This facilitates the fusion of our representation with the bag-of-visual-words (BoVW) representation. Our approach incorporates sequential temporal structure and results in a low-dimensional representation compared to the BoVW, while still yielding a descent result when combined with it. Experiments on the KTH, Hollywood2 and the challenging HMDB51 datasets show that the proposed framework is complementary to the BoVW representation, which discards temporal order |
G Karamanolakis, E Iosif, A Zlatintsi, A Pikrakis, A Potamianos Audio-Based Distributional Representations of Meaning Using a Fusion of Feature Encodings Conference 2016. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Karamanolakis2016, title = {Audio-Based Distributional Representations of Meaning Using a Fusion of Feature Encodings}, author = {G Karamanolakis and E Iosif and A Zlatintsi and A Pikrakis and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/Karamanolakis+_Interspeech16.PDF http://robotics.ntua.gr/wp-content/uploads/sites/2/karamanolakis16_interspeech.pdf}, year = {2016}, date = {2016-09-01}, abstract = {Recently a “Bag-of-Audio-Words” approach was proposed [1] for the combination of lexical features with audio clips in a multimodal semantic representation, i.e., an Audio Distributional Semantic Model (ADSM). An important step towards the creation of ADSMs is the estimation of the semantic distance between clips in the acoustic space, which is especially challenging given the diversity of audio collections. In this work, we investigate the use of different feature encodings in order to address this challenge following a two-step approach. First, an audio clip is categorized with respect to three classes, namely, music, speech and other. Next, the feature encodings are fused according to the posterior probabilities estimated in the previous step. Using a collection of audio clips annotated with tags we derive a mapping between words and audio clips. Based on this mapping and the proposed audio semantic distance, we construct an ADSM model in order to compute the distance between words (lexical semantic similarity task). The proposed model is shown to significantly outperform (23.6% relative improvement in correlation coefficient) the state-of-the-art results reported in the literature.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Recently a “Bag-of-Audio-Words” approach was proposed [1] for the combination of lexical features with audio clips in a multimodal semantic representation, i.e., an Audio Distributional Semantic Model (ADSM). An important step towards the creation of ADSMs is the estimation of the semantic distance between clips in the acoustic space, which is especially challenging given the diversity of audio collections. In this work, we investigate the use of different feature encodings in order to address this challenge following a two-step approach. First, an audio clip is categorized with respect to three classes, namely, music, speech and other. Next, the feature encodings are fused according to the posterior probabilities estimated in the previous step. Using a collection of audio clips annotated with tags we derive a mapping between words and audio clips. Based on this mapping and the proposed audio semantic distance, we construct an ADSM model in order to compute the distance between words (lexical semantic similarity task). The proposed model is shown to significantly outperform (23.6% relative improvement in correlation coefficient) the state-of-the-art results reported in the literature. |
G Panagiotaropoulou, P Koutras, A Katsamanis, P Maragos, A Zlatintsi, A Protopapas, E Karavasilis, N Smyrnis fMRI-based Perceptual Validation of a computational Model for Visual and Auditory Saliency in Videos Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Phoenix, AZ, USA, 2016. Abstract | BibTeX | Links: [PDF] @conference{PKK+16, title = {fMRI-based Perceptual Validation of a computational Model for Visual and Auditory Saliency in Videos}, author = {G Panagiotaropoulou and P Koutras and A Katsamanis and P Maragos and A Zlatintsi and A Protopapas and E Karavasilis and N Smyrnis}, url = {http://robotics.ntua.gr/wp-content/publications/PanagiotaropoulouEtAl_fMRI-Validation-CompAVsaliencyVideos_ICIP2016.pdf}, year = {2016}, date = {2016-09-01}, booktitle = {Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing}, address = {Phoenix, AZ, USA}, abstract = {In this study, we make use of brain activation data to investigate the perceptual plausibility of a visual and an auditory model for visual and auditory saliency in video processing. These models have already been successfully employed in a number of applications. In addition, we experiment with parameters, modifications and suitable fusion schemes. As part of this work, fMRI data from complex video stimuli were collected, on which we base our analysis and results. The core part of the analysis involves the use of well-established methods for the manipulation of fMRI data and the examination of variability across brain responses of different individuals. Our results indicate a success in confirming the value of these saliency models in terms of perceptual plausibility.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this study, we make use of brain activation data to investigate the perceptual plausibility of a visual and an auditory model for visual and auditory saliency in video processing. These models have already been successfully employed in a number of applications. In addition, we experiment with parameters, modifications and suitable fusion schemes. As part of this work, fMRI data from complex video stimuli were collected, on which we base our analysis and results. The core part of the analysis involves the use of well-established methods for the manipulation of fMRI data and the examination of variability across brain responses of different individuals. Our results indicate a success in confirming the value of these saliency models in terms of perceptual plausibility. |
G Chalvatzaki, X S Papageorgiou, C Werner, K Hauer, C S Tzafestas, P Maragos Experimental comparison of human gait tracking algorithms: Towards a context-aware mobility assistance robotic walker Conference Mediterranean Conference on Control and Automation (MED), 2016. Abstract | BibTeX | Links: [PDF] @conference{CPWHTM16, title = {Experimental comparison of human gait tracking algorithms: Towards a context-aware mobility assistance robotic walker}, author = {G Chalvatzaki and X S Papageorgiou and C Werner and K Hauer and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/CPWHTM16.pdf}, doi = {10.1109/MED.2016.7535962}, year = {2016}, date = {2016-06-01}, booktitle = {Mediterranean Conference on Control and Automation (MED)}, pages = {719-724}, abstract = {Towards a mobility assistance robot for the elderly, it is essential to develop a robust and accurate gait tracking system. Various pathologies cause mobility inabilities to the aged population, leading to different gait patterns and walking speed. In this work, we present the experimental comparison of two user leg tracking systems of a robotic assistance walker, using data collected by a laser range sensor. The first one is a Kalman Filter tracking system, while the second one proposes the use of Particle Filters. The tracking systems provide the positions and velocities of the user's legs, which are used as observations into an HMM-based gait phases recognition system. The spatiotemporal results of the HMM framework are employed for computing parameters that characterize the human motion, which subsequently can be used to assess and distinguish between possible motion disabilities. For the experimental comparison, we are using real data collected from an ensemble of different elderly persons with a number of pathologies, and ground truth data from a GaitRite System. The results presented in this work, demonstrate the applicability of the tracking systems in real test cases.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Towards a mobility assistance robot for the elderly, it is essential to develop a robust and accurate gait tracking system. Various pathologies cause mobility inabilities to the aged population, leading to different gait patterns and walking speed. In this work, we present the experimental comparison of two user leg tracking systems of a robotic assistance walker, using data collected by a laser range sensor. The first one is a Kalman Filter tracking system, while the second one proposes the use of Particle Filters. The tracking systems provide the positions and velocities of the user's legs, which are used as observations into an HMM-based gait phases recognition system. The spatiotemporal results of the HMM framework are employed for computing parameters that characterize the human motion, which subsequently can be used to assess and distinguish between possible motion disabilities. For the experimental comparison, we are using real data collected from an ensemble of different elderly persons with a number of pathologies, and ground truth data from a GaitRite System. The results presented in this work, demonstrate the applicability of the tracking systems in real test cases. |
A C Dometios, X S Papageorgiou, C S Tzafestas, P Vartholomeos Towards ICT-supported Bath Robots: Control Architecture Description and Localized Perception of User for Robot Motion Planning Conference Mediterranean Conference on Control and Automation (MED), Athens, Greece, 2016. Abstract | BibTeX | Links: [PDF] @conference{DPTV16, title = {Towards ICT-supported Bath Robots: Control Architecture Description and Localized Perception of User for Robot Motion Planning}, author = {A C Dometios and X S Papageorgiou and C S Tzafestas and P Vartholomeos}, url = {http://robotics.ntua.gr/wp-content/publications/Dometios16_Control_Architecture_Description_Motion_Planning.pdf}, doi = {10.1109/MED.2016.7535954}, year = {2016}, date = {2016-06-01}, booktitle = {Mediterranean Conference on Control and Automation (MED)}, pages = {713-718}, address = {Athens, Greece}, abstract = {Τhis paper describes the general control architecture and the basic implementation concepts of a bath service robotic system. The goal of this system is to support and enhance elderly’s mobility, manipulation and force exertion abilities and assist them in successfully, safely and independently completing the entire sequence of showering and drying tasks, such as properly washing their back and lower limbs. This service robotic system is based on soft-robotic arms which,together with advanced human-robot force/compliance control will form the basis for a safe physical human-robot interaction that complies with the most up-to-date safety standards. In this paper an overview of the bath robotic system components is presented, and the basic modules that contribute to the overall control architecture of the system are described. Moreover, this paper proposed an algorithm that performs efficient processing of feedback data provided by a depth sensor. This algorithm supports local shape perception and geometric characterization of user body parts and will form the basis for further implementation of surface reconstruction and robot motion planning algorithms.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Τhis paper describes the general control architecture and the basic implementation concepts of a bath service robotic system. The goal of this system is to support and enhance elderly’s mobility, manipulation and force exertion abilities and assist them in successfully, safely and independently completing the entire sequence of showering and drying tasks, such as properly washing their back and lower limbs. This service robotic system is based on soft-robotic arms which,together with advanced human-robot force/compliance control will form the basis for a safe physical human-robot interaction that complies with the most up-to-date safety standards. In this paper an overview of the bath robotic system components is presented, and the basic modules that contribute to the overall control architecture of the system are described. Moreover, this paper proposed an algorithm that performs efficient processing of feedback data provided by a depth sensor. This algorithm supports local shape perception and geometric characterization of user body parts and will form the basis for further implementation of surface reconstruction and robot motion planning algorithms. |
X S Papageorgiou, G Chalvatzaki, K N Lianos, C Werner, K Hauer, C S Tzafestas, P Maragos Experimental validation of human pathological gait analysis for an assisted living intelligent robotic walker Conference C_BIOROB, 2016. Abstract | BibTeX | Links: [PDF] @conference{BIOROB2016, title = {Experimental validation of human pathological gait analysis for an assisted living intelligent robotic walker}, author = {X S Papageorgiou and G Chalvatzaki and K N Lianos and C Werner and K Hauer and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/BIOROB2016.pdf}, doi = {10.1109/BIOROB.2016.7523776}, year = {2016}, date = {2016-06-01}, booktitle = {C_BIOROB}, pages = {1086-1091}, abstract = {A robust and effective gait analysis functionality is an essential characteristic for an assistance mobility robot dealing with elderly persons. The aforementioned functionality is crucial for dealing with mobility disabilities which are widespread in these parts of the population. In this work we present experimental validation of our in house developed system. We are using real data, collected from an ensemble of different elderly persons with a number of pathologies, and we present a validation study by using a GaitRite System. Our system, following the standard literature conventions, characterizes the human motion with a set of parameters which subsequently can be used to assess and distinguish between possible motion disabilities, using a laser range finder as its main sensor. The initial results, presented in this work, demonstrate the applicability of our framework in real test cases. Regarding such frameworks, a crucial technical question is the necessary complexity of the overall tracking system. To answer this question, we compare two approaches with different complexity levels. The first is a static rule based system acting on filtered laser data, while the second system utilizes a Hidden Markov Model for gait cycle estimation, and extraction of the gait parameters. The results demonstrate that the added complexity of the HMM system is necessary for improving the accuracy and efficacy of the system.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } A robust and effective gait analysis functionality is an essential characteristic for an assistance mobility robot dealing with elderly persons. The aforementioned functionality is crucial for dealing with mobility disabilities which are widespread in these parts of the population. In this work we present experimental validation of our in house developed system. We are using real data, collected from an ensemble of different elderly persons with a number of pathologies, and we present a validation study by using a GaitRite System. Our system, following the standard literature conventions, characterizes the human motion with a set of parameters which subsequently can be used to assess and distinguish between possible motion disabilities, using a laser range finder as its main sensor. The initial results, presented in this work, demonstrate the applicability of our framework in real test cases. Regarding such frameworks, a crucial technical question is the necessary complexity of the overall tracking system. To answer this question, we compare two approaches with different complexity levels. The first is a static rule based system acting on filtered laser data, while the second system utilizes a Hidden Markov Model for gait cycle estimation, and extraction of the gait parameters. The results demonstrate that the added complexity of the HMM system is necessary for improving the accuracy and efficacy of the system. |
A Tsiami, A Katsamanis, P Maragos, A Vatakis Towards a behaviorally-validated computational audiovisual saliency model Conference Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing, Shanghai, China, 2016. Abstract | BibTeX | Links: [PDF] @conference{7472197, title = {Towards a behaviorally-validated computational audiovisual saliency model}, author = {A Tsiami and A Katsamanis and P Maragos and A Vatakis}, url = {http://robotics.ntua.gr/wp-content/publications/TKMV_BehaviorComputAVSaliencyModel_ICASSP2016.pdf}, doi = {10.1109/ICASSP.2016.7472197}, year = {2016}, date = {2016-03-01}, booktitle = {Proc. IEEE Int'l Conf. Acous., Speech, and Signal Processing}, pages = {2847-2851}, address = {Shanghai, China}, abstract = {Computational saliency models aim at predicting, in a bottom-up fashion, where human attention is drawn in the presented (visual, auditory or audiovisual) scene and have been proven useful in applications like robotic navigation, image compression and movie summarization. Despite the fact that well-established auditory and visual saliency models have been validated in behavioral experiments, e.g., by means of eye-tracking, there is no established computational audiovisual saliency model validated in the same way. In this work, building on biologically-inspired models of visual and auditory saliency, we present a joint audiovisual saliency model and introduce the validation approach we follow to show that it is compatible with recent findings of psychology and neuroscience regarding multimodal integration and attention. In this direction, we initially focus on the "pip and pop" effect which has been observed in behavioral experiments and indicates that visual search in sequences of cluttered images can be significantly aided by properly timed non-spatial auditory signals presented alongside the target visual stimuli.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Computational saliency models aim at predicting, in a bottom-up fashion, where human attention is drawn in the presented (visual, auditory or audiovisual) scene and have been proven useful in applications like robotic navigation, image compression and movie summarization. Despite the fact that well-established auditory and visual saliency models have been validated in behavioral experiments, e.g., by means of eye-tracking, there is no established computational audiovisual saliency model validated in the same way. In this work, building on biologically-inspired models of visual and auditory saliency, we present a joint audiovisual saliency model and introduce the validation approach we follow to show that it is compatible with recent findings of psychology and neuroscience regarding multimodal integration and attention. In this direction, we initially focus on the "pip and pop" effect which has been observed in behavioral experiments and indicates that visual search in sequences of cluttered images can be significantly aided by properly timed non-spatial auditory signals presented alongside the target visual stimuli. |
Christos G. Bampis, Petros Maragos, Alan C. Bovik Projective non-negative matrix factorization for unsupervised graph clustering Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. @conference{328, title = {Projective non-negative matrix factorization for unsupervised graph clustering}, author = { Christos G. Bampis and Petros Maragos and Alan C. Bovik}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BampisMaragosBovik_GRPNMF-UnsupervisGraphCluster_ICIP2016.pdf}, doi = {10.1109/ICIP.2016.7532559}, issn = {15224880}, year = {2016}, date = {2016-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2016-August}, pages = {1255--1258}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgia Panagiotaropoulou, Petros Koutras, Athanasios Katsamanis, Petros Maragos, Athanasia Zlatintsi, Athanassios Protopapas, Efstratios Karavasilis, Nikolaos Smyrnis FMRI-based perceptual validation of a computational model for visual and auditory saliency in videos Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{332, title = {FMRI-based perceptual validation of a computational model for visual and auditory saliency in videos}, author = { Georgia Panagiotaropoulou and Petros Koutras and Athanasios Katsamanis and Petros Maragos and Athanasia Zlatintsi and Athanassios Protopapas and Efstratios Karavasilis and Nikolaos Smyrnis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PanagiotaropoulouEtAl_fMRI-Validation-CompAVsaliencyVideos_ICIP2016.pdf}, doi = {10.1109/ICIP.2016.7532447}, issn = {15224880}, year = {2016}, date = {2016-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2016-August}, pages = {699--703}, abstract = {textcopyright 2016 IEEE.In this study, we make use of brain activation data to investigate the perceptual plausibility of a visual and an auditory model for visual and auditory saliency in video processing. These models have already been successfully employed in a number of applications. In addition, we experiment with parameters, modifications and suitable fusion schemes. As part of this work, fMRI data from complex video stimuli were collected, on which we base our analysis and results. The core part of the analysis involves the use of well-established methods for the manipulation of fMRI data and the examination of variability across brain responses of different individuals. Our results indicate a success in confirming the value of these saliency models in terms of perceptual plausibility.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } textcopyright 2016 IEEE.In this study, we make use of brain activation data to investigate the perceptual plausibility of a visual and an auditory model for visual and auditory saliency in video processing. These models have already been successfully employed in a number of applications. In addition, we experiment with parameters, modifications and suitable fusion schemes. As part of this work, fMRI data from complex video stimuli were collected, on which we base our analysis and results. The core part of the analysis involves the use of well-established methods for the manipulation of fMRI data and the examination of variability across brain responses of different individuals. Our results indicate a success in confirming the value of these saliency models in terms of perceptual plausibility. |
I. Rodomagoulakis, N. Kardaris, V. Pitsikalis, A. Arvanitakis, P. Maragos A multimedia gesture dataset for human robot communication: Acquisition, tools and recognition results Conference Proceedings - International Conference on Image Processing, ICIP, 2016-August , 2016, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{334, title = {A multimedia gesture dataset for human robot communication: Acquisition, tools and recognition results}, author = { I. Rodomagoulakis and N. Kardaris and V. Pitsikalis and A. Arvanitakis and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RKPAM_MultimedaGestureDataset-HRI_ICIP2016.pdf}, doi = {10.1109/ICIP.2016.7532923}, issn = {15224880}, year = {2016}, date = {2016-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2016-August}, pages = {3066--3070}, abstract = {Motivated by the recent advances in human-robot interaction we present a new dataset, a suite of tools to handle it and state-of-the-art work on visual gestures and audio commands recognition. The dataset has been collected with an integrated annotation and acquisition web-interface that facilitates on-the-way temporal ground-truths for fast acquisition. The dataset includes gesture instances in which the subjects are not in strict setup positions, and contains multiple scenarios, not restricted to a single static configuration. We accompany it by a valuable suite of tools as the practical interface to acquire audio-visual data in the robotic operating system, a state-of-the-art learning pipeline to train visual gesture and audio command models, and an online gesture recognition system. Finally, we include a rich evaluation of the dataset providing rich and insightfull experimental recognition results.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Motivated by the recent advances in human-robot interaction we present a new dataset, a suite of tools to handle it and state-of-the-art work on visual gestures and audio commands recognition. The dataset has been collected with an integrated annotation and acquisition web-interface that facilitates on-the-way temporal ground-truths for fast acquisition. The dataset includes gesture instances in which the subjects are not in strict setup positions, and contains multiple scenarios, not restricted to a single static configuration. We accompany it by a valuable suite of tools as the practical interface to acquire audio-visual data in the robotic operating system, a state-of-the-art learning pipeline to train visual gesture and audio command models, and an online gesture recognition system. Finally, we include a rich evaluation of the dataset providing rich and insightfull experimental recognition results. |
Petros Maragos, Vassilis Pitsikalis, Athanasios Katsamanis, George Pavlakos, Stavros Theodorakis On Shape Recognition and Language Book Chapter Breuß, Michael; Bruckstein, Alfred; Maragos, Petros; Wuhrer, Stefanie (Ed.): Perspectives in Shape Analysis, pp. 321–344, Springer International Publishing, Cham, 2016, ISBN: 978-3-319-24726-7. Abstract | BibTeX | Links: [PDF] @inbook{Maragos2016, title = {On Shape Recognition and Language}, author = {Petros Maragos and Vassilis Pitsikalis and Athanasios Katsamanis and George Pavlakos and Stavros Theodorakis}, editor = {Michael Breuß and Alfred Bruckstein and Petros Maragos and Stefanie Wuhrer}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2016_MaragosEtAl_ShapeRecognitionAndLanguage_PerspectivesInShapeAnalysis_Springer-1.pdf}, isbn = {978-3-319-24726-7}, year = {2016}, date = {2016-01-01}, booktitle = {Perspectives in Shape Analysis}, pages = {321--344}, publisher = {Springer International Publishing}, address = {Cham}, abstract = {Shapes shapeconvey meaning. Language is efficient in expressing and structuring meaning. The main thesis of this chapter is that by integrating shape with linguistic information shape recognition can be improved in performance. It broadens the concept of shape to visual shapes that include both geometric and optical information and explores ways that additional linguistic information may help with shape recognition. Towards this goal, it briefly describes some shape categories which have the potential of better recognition via language, with emphasis on gestures and moving shapes of sign language, as well as on cross-modal relations between vision and language in videos. It also draws inspiration from psychological studies that explore connections between gestures and human languages. Afterwards, it focuses on the broad class of multimodal gestures that combine spatio-temporal visual shapes with audio information. In this area, an approach is reviewed that significantly improves multimodal gesture recognition by fusing 3D shape information from motion-position of gesturing hands/arms and spatio-temporal handshapes in color and depth visual channels with audio information in the form of acoustically recognized sequences of gesture words.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } Shapes shapeconvey meaning. Language is efficient in expressing and structuring meaning. The main thesis of this chapter is that by integrating shape with linguistic information shape recognition can be improved in performance. It broadens the concept of shape to visual shapes that include both geometric and optical information and explores ways that additional linguistic information may help with shape recognition. Towards this goal, it briefly describes some shape categories which have the potential of better recognition via language, with emphasis on gestures and moving shapes of sign language, as well as on cross-modal relations between vision and language in videos. It also draws inspiration from psychological studies that explore connections between gestures and human languages. Afterwards, it focuses on the broad class of multimodal gestures that combine spatio-temporal visual shapes with audio information. In this area, an approach is reviewed that significantly improves multimodal gesture recognition by fusing 3D shape information from motion-position of gesturing hands/arms and spatio-temporal handshapes in color and depth visual channels with audio information in the form of acoustically recognized sequences of gesture words. |
2015 |
Stamatios Lefkimmiatis, Anastasios Roussos, Petros Maragos, Michael Unser Structure Tensor Total Variation Journal Article SIAM Journal on Imaging Sciences, 8 (2), pp. 1090–1122, 2015, ISSN: 1936-4954. Abstract | BibTeX | Links: [Webpage] [PDF] @article{305, title = {Structure Tensor Total Variation}, author = {Stamatios Lefkimmiatis and Anastasios Roussos and Petros Maragos and Michael Unser}, url = {http://epubs.siam.org/doi/10.1137/14098154X http://robotics.ntua.gr/wp-content/uploads/sites/2/LefkimmiatisRoussosMaragosUnser_STV_siamjIS2015.pdf}, doi = {10.1137/14098154X}, issn = {1936-4954}, year = {2015}, date = {2015-05-01}, journal = {SIAM Journal on Imaging Sciences}, volume = {8}, number = {2}, pages = {1090--1122}, abstract = {We introduce a novel generic energy functional that we employ to solve inverse imaging problems within a variational framework. The proposed regularization family, termed as structure tensor total variation (STV), penalizes the eigenvalues of the structure tensor and is suitable for both grayscale and vector-valued images. It generalizes several existing variational penalties, including the total variation seminorm and vectorial extensions of it. Meanwhile, thanks to the structure tensor's ability to capture first-order information around a local neighborhood, the STV functionals can provide more robust measures of image variation. Further, we prove that the STV regularizers are convex while they also satisfy several invariance properties w.r.t. image transformations. These properties qualify them as ideal candidates for imaging applications. In addition, for the discrete version of the STV functionals we derive an equivalent definition that is based on the patch-based Jacobian operator, a novel linear op...}, keywords = {}, pubstate = {published}, tppubtype = {article} } We introduce a novel generic energy functional that we employ to solve inverse imaging problems within a variational framework. The proposed regularization family, termed as structure tensor total variation (STV), penalizes the eigenvalues of the structure tensor and is suitable for both grayscale and vector-valued images. It generalizes several existing variational penalties, including the total variation seminorm and vectorial extensions of it. Meanwhile, thanks to the structure tensor's ability to capture first-order information around a local neighborhood, the STV functionals can provide more robust measures of image variation. Further, we prove that the STV regularizers are convex while they also satisfy several invariance properties w.r.t. image transformations. These properties qualify them as ideal candidates for imaging applications. In addition, for the discrete version of the STV functionals we derive an equivalent definition that is based on the patch-based Jacobian operator, a novel linear op... |
Vassilis Pitsikalis, Athanasios Katsamanis, Stavros Theodorakis, Petros Maragos Multimodal Gesture Recognition via Multiple Hypotheses Rescoring Journal Article Journal of Machine Learning Research, 16 (1), pp. 255-284, 2015. BibTeX | Links: [Webpage] [PDF] @article{144, title = {Multimodal Gesture Recognition via Multiple Hypotheses Rescoring}, author = {Vassilis Pitsikalis and Athanasios Katsamanis and Stavros Theodorakis and Petros Maragos}, url = {http://link.springer.com/10.1007/978-3-319-57021-1_16 http://robotics.ntua.gr/wp-content/uploads/sites/2/pitsikalis15a.pdf}, doi = {10.1007/978-3-319-57021-1_16}, year = {2015}, date = {2015-01-01}, journal = {Journal of Machine Learning Research}, volume = {16}, number = {1}, pages = {255-284}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Theodora Chaspari, Constantin Soldatos, Petros Maragos The development of the Athens Emotional States Inventory (AESI): collection, validation and automatic processing of emotionally loaded sentences Journal Article The World Journal of Biological Psychiatry, 16 (5), pp. 312–322, 2015. @article{chaspari2015development, title = {The development of the Athens Emotional States Inventory (AESI): collection, validation and automatic processing of emotionally loaded sentences}, author = {Theodora Chaspari and Constantin Soldatos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ChaspariSoldatosMaragos_AESI_WJBP2015_postprint.pdf}, doi = {10.3109/15622975.2015.1012228}, year = {2015}, date = {2015-01-01}, journal = {The World Journal of Biological Psychiatry}, volume = {16}, number = {5}, pages = {312--322}, publisher = {Taylor & Francis}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
X S Papageorgiou, C S Tzafestas, C Vartholomeos Laschi, R Lopez ICT-Supported Bath Robots: Design Concepts Conference C_ICSR, 2015. Abstract | BibTeX | Links: [PDF] @conference{ICSR2015_1, title = {ICT-Supported Bath Robots: Design Concepts}, author = {X S Papageorgiou and C S Tzafestas and C Vartholomeos Laschi and R Lopez}, url = {http://robotics.ntua.gr/wp-content/publications/ICSR2015_1.pdf}, year = {2015}, date = {2015-10-01}, booktitle = {C_ICSR}, abstract = {This paper presents the concept and the architecture of the I-SUPPORT service robotics system. The goal of the I-SUPPORT system is to support and enhance older adults mobility, manipulation and force exertion abilities and assist them in successfully, safely and independently completing the entire sequence of showering tasks, such as properly washing their back, their upper parts, their lower limbs, their buttocks and groin, and to effectively use the towel for drying purposes. Adaptation and integration of state-of-the-art, cost-effective, soft-robotic arms will provide the hardware constituents, which, together with advanced human-robot force/compliance control will form the basis for a safe physical human-robot interaction that complies with the most up-to-date safety standards. Human behavioural, sociological, safety, ethical and acceptability aspects, as well as financial factors related to the proposed service robotics system will be thoroughly investigated and evaluated so that the I-SUPPORT end result is a close-to-market prototype, applicable to realistic living settings.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper presents the concept and the architecture of the I-SUPPORT service robotics system. The goal of the I-SUPPORT system is to support and enhance older adults mobility, manipulation and force exertion abilities and assist them in successfully, safely and independently completing the entire sequence of showering tasks, such as properly washing their back, their upper parts, their lower limbs, their buttocks and groin, and to effectively use the towel for drying purposes. Adaptation and integration of state-of-the-art, cost-effective, soft-robotic arms will provide the hardware constituents, which, together with advanced human-robot force/compliance control will form the basis for a safe physical human-robot interaction that complies with the most up-to-date safety standards. Human behavioural, sociological, safety, ethical and acceptability aspects, as well as financial factors related to the proposed service robotics system will be thoroughly investigated and evaluated so that the I-SUPPORT end result is a close-to-market prototype, applicable to realistic living settings. |
G Papageorgiou X.S. Moustris, G Pitsikalis V. Chalvatzaki, A Dometios, N Kardaris, C S Tzafestas, P Maragos User-Oriented Cognitive Interaction and Control for an Intelligent Robotic Walker Conference 17th International Conference on Social Robotics (ICSR 2015), 2015. Abstract | BibTeX | Links: [PDF] @conference{ICSR2015_2, title = {User-Oriented Cognitive Interaction and Control for an Intelligent Robotic Walker}, author = {G Papageorgiou X.S. Moustris and G Pitsikalis V. Chalvatzaki and A Dometios and N Kardaris and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ICSR2015_2.pdf}, year = {2015}, date = {2015-10-01}, booktitle = {17th International Conference on Social Robotics (ICSR 2015)}, abstract = {Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. This paper reports current research work related to the control of an intelligent robotic rollator aiming to provide user-adaptive and context-aware walking assistance. To achieve such targets, a large spectrum of multimodal sensory processing and interactive control modules need to be developed and seamlessly integrated, that can, on one side track and analyse human motions and actions, in order to detect pathological situations and estimate user needs, while predicting at the same time the user (short-term or long-range) intentions in order to adapt robot control actions and supportive behaviours accordingly. User-oriented human-robot interaction and control refers to the functionalities that couple the motions, the actions and, in more general terms, the behaviours of the assistive robotic device to the user in a non-physical interaction context.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mobility impairments are prevalent in the elderly population and constitute one of the main causes related to difficulties in performing Activities of Daily Living (ADLs) and consequent reduction of quality of life. This paper reports current research work related to the control of an intelligent robotic rollator aiming to provide user-adaptive and context-aware walking assistance. To achieve such targets, a large spectrum of multimodal sensory processing and interactive control modules need to be developed and seamlessly integrated, that can, on one side track and analyse human motions and actions, in order to detect pathological situations and estimate user needs, while predicting at the same time the user (short-term or long-range) intentions in order to adapt robot control actions and supportive behaviours accordingly. User-oriented human-robot interaction and control refers to the functionalities that couple the motions, the actions and, in more general terms, the behaviours of the assistive robotic device to the user in a non-physical interaction context. |
G Chalvatzaki, X S Papageorgiou, C S Tzafestas Gait Modelling for a Context-Aware User-Adaptive Robotic Assistant Platform Conference 2015, ISSN: 978-88-97999-63-8. Abstract | BibTeX | Links: [PDF] @conference{CPT15, title = {Gait Modelling for a Context-Aware User-Adaptive Robotic Assistant Platform}, author = {G Chalvatzaki and X S Papageorgiou and C S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/publications/CPT15.pdf}, issn = {978-88-97999-63-8}, year = {2015}, date = {2015-09-01}, pages = {132-141}, abstract = {For a context-aware robotic assistant platform that follows patients with moderate mobility impairment and adapts its motion to the patient?s needs, the de- velopment of an efficient leg tracker and the recogni- tion of pathological gait are very important. In this work, we present the basic concept for the robot con- trol architecture and analyse three essential parts of the Adaptive Context-Aware Robot Control scheme; the detection and tracking of the subject?s legs, the gait modelling and classification and the computation of gait parameters for the impairment level assess- ment. We initially process raw laser data and estimate the legs? position and velocity with a Kalman Filter and then use this information as input for a Hidden Markov Model-based framework that detects specific gait patterns and classifies human gait into normal or pathological. We then compute gait parameters com- monly used for medical diagnosis. The recognised gait patterns along with the gait parameters will be used for the impairment level assessment, which will activate certain control assistive actions regarding the pathological state of the patient.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } For a context-aware robotic assistant platform that follows patients with moderate mobility impairment and adapts its motion to the patient?s needs, the de- velopment of an efficient leg tracker and the recogni- tion of pathological gait are very important. In this work, we present the basic concept for the robot con- trol architecture and analyse three essential parts of the Adaptive Context-Aware Robot Control scheme; the detection and tracking of the subject?s legs, the gait modelling and classification and the computation of gait parameters for the impairment level assess- ment. We initially process raw laser data and estimate the legs? position and velocity with a Kalman Filter and then use this information as input for a Hidden Markov Model-based framework that detects specific gait patterns and classifies human gait into normal or pathological. We then compute gait parameters com- monly used for medical diagnosis. The recognised gait patterns along with the gait parameters will be used for the impairment level assessment, which will activate certain control assistive actions regarding the pathological state of the patient. |
X S Papageorgiou, G Chalvatzaki, C S Tzafestas, P Maragos Hidden markov modeling of human pathological gait using laser range finder for an assisted living intelligent robotic walker Conference IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2015. Abstract | BibTeX | Links: [PDF] @conference{IROS2015, title = {Hidden markov modeling of human pathological gait using laser range finder for an assisted living intelligent robotic walker}, author = {X S Papageorgiou and G Chalvatzaki and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/IROS2015.pdf}, doi = {10.1109/IROS.2015.7354283}, year = {2015}, date = {2015-09-01}, booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, pages = {6342-6347}, abstract = {The precise analysis of a patient's or an elderly person's walking pattern is very important for an effective intelligent active mobility assistance robot. This walking pattern can be described by a cyclic motion, which can be modeled using the consecutive gait phases. In this paper, we present a completely non-invasive framework for analyzing and recognizing a pathological human walking gait pattern. Our framework utilizes a laser range finder sensor to detect and track the human legs, and an appropriately synthesized Hidden Markov Model (HMM) for state estimation, and recognition of the gait patterns. We demonstrate the applicability of this setup using real data, collected from an ensemble of different elderly persons with a number of pathologies. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior-based robot control system. More specifically, the proposed framework has the potential to be used for the classification of specific walking pathologies, which is needed for the development of a context-aware robot mobility assistant.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The precise analysis of a patient's or an elderly person's walking pattern is very important for an effective intelligent active mobility assistance robot. This walking pattern can be described by a cyclic motion, which can be modeled using the consecutive gait phases. In this paper, we present a completely non-invasive framework for analyzing and recognizing a pathological human walking gait pattern. Our framework utilizes a laser range finder sensor to detect and track the human legs, and an appropriately synthesized Hidden Markov Model (HMM) for state estimation, and recognition of the gait patterns. We demonstrate the applicability of this setup using real data, collected from an ensemble of different elderly persons with a number of pathologies. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior-based robot control system. More specifically, the proposed framework has the potential to be used for the classification of specific walking pathologies, which is needed for the development of a context-aware robot mobility assistant. |
P Koutras, A Zlatintsi, E.Iosif, A Katsamanis, P Maragos, A Potamianos Predicting Audio-Visual Salient Events Based on Visual, Audio and Text Modalities for Movie Summarization Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Quebec, Canada, 2015. Abstract | BibTeX | Links: [PDF] @conference{KZI+15, title = {Predicting Audio-Visual Salient Events Based on Visual, Audio and Text Modalities for Movie Summarization}, author = {P Koutras and A Zlatintsi and E.Iosif and A Katsamanis and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/KZIKMP_MovieSum2_ICIP-2015.pdf}, year = {2015}, date = {2015-09-01}, booktitle = {Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing}, address = {Quebec, Canada}, abstract = {In this paper, we present a new and improved synergistic approach to the problem of audio-visual salient event detection and movie summarization based on visual, audio and text modalities. Spatio-temporal visual saliency is estimated through a perceptually inspired frontend based on 3D (space, time) Gabor filters and frame-wise features are extracted from the saliency volumes. For the auditory salient event detection we extract features based on Teager-Kaiser Energy Operator, while text analysis incorporates part-of-speech tag-ging and affective modeling of single words on the movie subtitles. For the evaluation of the proposed system, we employ an elementary and non-parametric classification technique like KNN. Detection results are reported on the MovSum database, using objective evaluations against ground-truth denoting the perceptually salient events, and human evaluations of the movie summaries. Our evaluation verifies the appropriateness of the proposed methods compared to our baseline system. Finally, our newly proposed summarization algorithm produces summaries that consist of salient and meaningful events, also improving the comprehension of the semantics.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we present a new and improved synergistic approach to the problem of audio-visual salient event detection and movie summarization based on visual, audio and text modalities. Spatio-temporal visual saliency is estimated through a perceptually inspired frontend based on 3D (space, time) Gabor filters and frame-wise features are extracted from the saliency volumes. For the auditory salient event detection we extract features based on Teager-Kaiser Energy Operator, while text analysis incorporates part-of-speech tag-ging and affective modeling of single words on the movie subtitles. For the evaluation of the proposed system, we employ an elementary and non-parametric classification technique like KNN. Detection results are reported on the MovSum database, using objective evaluations against ground-truth denoting the perceptually salient events, and human evaluations of the movie summaries. Our evaluation verifies the appropriateness of the proposed methods compared to our baseline system. Finally, our newly proposed summarization algorithm produces summaries that consist of salient and meaningful events, also improving the comprehension of the semantics. |
A Zlatintsi, E.Iosif, P Maragos, A Potamianos Audio Salient Event Detection and Summarization using Audio and Text Modalities Conference Nice, France, 2015. Abstract | BibTeX | Links: [PDF] @conference{ZIM+15, title = {Audio Salient Event Detection and Summarization using Audio and Text Modalities}, author = {A Zlatintsi and E.Iosif and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiEtAl_AudioTextSum-EUSIPCO-2015.pdf}, year = {2015}, date = {2015-09-01}, address = {Nice, France}, abstract = {This paper investigates the problem of audio event detection and summarization, building on previous work [1, 2] on the detection of perceptually important audio events based on saliency models. We take a synergistic approach to audio summarization where saliency computation of audio streams is assisted by using the text modality as well. Auditory saliency is assessed by auditory and perceptual cues such as Teager energy, loudness and roughness; all known to correlate with attention and human hearing. Text analysis incorporates part-of-speech tagging and affective modeling. A computational method for the automatic correction of the boundaries of the selected audio events is applied creating summaries that consist not only of salient but also meaningful and semantically coherent events. A non-parametric classification technique is employed and results are reported on the MovSum movie database using objective evaluations against ground-truth designating the auditory and semantically salient events.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper investigates the problem of audio event detection and summarization, building on previous work [1, 2] on the detection of perceptually important audio events based on saliency models. We take a synergistic approach to audio summarization where saliency computation of audio streams is assisted by using the text modality as well. Auditory saliency is assessed by auditory and perceptual cues such as Teager energy, loudness and roughness; all known to correlate with attention and human hearing. Text analysis incorporates part-of-speech tagging and affective modeling. A computational method for the automatic correction of the boundaries of the selected audio events is applied creating summaries that consist not only of salient but also meaningful and semantically coherent events. A non-parametric classification technique is employed and results are reported on the MovSum movie database using objective evaluations against ground-truth designating the auditory and semantically salient events. |
Petros Koutras, Petros Maragos Estimation of eye gaze direction angles based on active appearance models Conference 2015 IEEE International Conference on Image Processing (ICIP), 2015, ISBN: 978-1-4799-8339-1. BibTeX | Links: [Webpage] [PDF] @conference{Koutras2015, title = {Estimation of eye gaze direction angles based on active appearance models}, author = { Petros Koutras and Petros Maragos}, url = {http://ieeexplore.ieee.org/document/7351237/ http://robotics.ntua.gr/wp-content/uploads/sites/2/KoutrasMaragos_EyeGaze_ICIP15.pdf}, doi = {10.1109/ICIP.2015.7351237}, isbn = {978-1-4799-8339-1}, year = {2015}, date = {2015-09-01}, booktitle = {2015 IEEE International Conference on Image Processing (ICIP)}, pages = {2424--2428}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
CS Tzafestas, XS Papageorgiou, GP Moustris, G Chalvatzaki, A Dometios User-Oriented Human-Robot Interaction for an Intelligent Walking Assistant Robotic Device Conference Workshop IEEE/RSJ Int'l Conf. on Intelligent Robots and Systems, Invited Session: "Cognitive Mobility Assistance Robots: Scientific Advances and Perspectives", Hamburg, Germany, Sept. 28 - Oct. 02, 2015, 2015. @conference{Tzafestas2015, title = {User-Oriented Human-Robot Interaction for an Intelligent Walking Assistant Robotic Device}, author = {CS Tzafestas and XS Papageorgiou and GP Moustris and G Chalvatzaki and A Dometios}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/IROS2015-Workshop-Proceedings_Cognitive-Mobility-Assistance_final.pdf}, year = {2015}, date = {2015-09-01}, booktitle = {Workshop IEEE/RSJ Int'l Conf. on Intelligent Robots and Systems, Invited Session: "Cognitive Mobility Assistance Robots: Scientific Advances and Perspectives", Hamburg, Germany, Sept. 28 - Oct. 02, 2015}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A Zlatintsi, P Koutras, N Efthymiou, P Maragos, A Potamianos, K Pastra Quality Evaluation of Computational Models for Movie Summarization Conference Costa Navarino, Messinia, Greece, 2015. Abstract | BibTeX | Links: [PDF] @conference{ZKE+15, title = {Quality Evaluation of Computational Models for Movie Summarization}, author = {A Zlatintsi and P Koutras and N Efthymiou and P Maragos and A Potamianos and K Pastra}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiEtAl_MovieSumEval-QoMEX2015.pdf}, year = {2015}, date = {2015-05-01}, address = {Costa Navarino, Messinia, Greece}, abstract = {In this paper we present a movie summarization system and we investigate what composes high quality movie summaries in terms of user experience evaluation. We propose state-of-the-art audio, visual and text techniques for the detection of perceptually salient events from movies. The evaluation of such computational models is usually based on the comparison of the similarity between the system-detected events and some ground-truth data. For this reason, we have developed the MovSum movie database, which includes sensory and semantic saliency annotation as well as cross-media relations, for objective evaluations. The automatically produced movie summaries were qualitatively evaluated, in an extensive human evaluation, in terms of informativeness and enjoyability accomplishing very high ratings up to 80% and 90%, respectively, which verifies the appropriateness of the proposed methods.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we present a movie summarization system and we investigate what composes high quality movie summaries in terms of user experience evaluation. We propose state-of-the-art audio, visual and text techniques for the detection of perceptually salient events from movies. The evaluation of such computational models is usually based on the comparison of the similarity between the system-detected events and some ground-truth data. For this reason, we have developed the MovSum movie database, which includes sensory and semantic saliency annotation as well as cross-media relations, for objective evaluations. The automatically produced movie summaries were qualitatively evaluated, in an extensive human evaluation, in terms of informativeness and enjoyability accomplishing very high ratings up to 80% and 90%, respectively, which verifies the appropriateness of the proposed methods. |
Christos G Bampis, Petros Maragos UNIFYING THE RANDOM WALKER ALGORITHM AND THE SIR MODEL FOR GRAPH CLUSTERING AND IMAGE SEGMENTATION Conference Icip 2015, 2 (3), 2015, ISBN: 9781479983391. @conference{319, title = {UNIFYING THE RANDOM WALKER ALGORITHM AND THE SIR MODEL FOR GRAPH CLUSTERING AND IMAGE SEGMENTATION}, author = { Christos G Bampis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BampisMaragos_NormalizedRandomWalk-GraphCluster-ImageSegment_ICIP2015_crf.pdf}, isbn = {9781479983391}, year = {2015}, date = {2015-01-01}, booktitle = {Icip 2015}, volume = {2}, number = {3}, pages = {2265--2269}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Panagiotis Giannoulis, Alessio Brutti, Marco Matassoni, Alberto Abad, Athanasios Katsamanis, Miguel Matos, Gerasimos Potamianos, Petros Maragos, Fondazione Bruno Kessler MULTI-ROOM SPEECH ACTIVITY DETECTION USING A DISTRIBUTED MICROPHONE NETWORK IN DOMESTIC ENVIRONMENTS Conference Proc. European Signal Processing Conf. (EUSIPCO-2015), Nice, France, Sep. 2015, 2015, ISBN: 9780992862633. @conference{306, title = {MULTI-ROOM SPEECH ACTIVITY DETECTION USING A DISTRIBUTED MICROPHONE NETWORK IN DOMESTIC ENVIRONMENTS}, author = { Panagiotis Giannoulis and Alessio Brutti and Marco Matassoni and Alberto Abad and Athanasios Katsamanis and Miguel Matos and Gerasimos Potamianos and Petros Maragos and Fondazione Bruno Kessler}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/GiannoulisEtAl_MultiRoomSpeechActivityDetection_EUSIPCO2015_crf.pdf}, isbn = {9780992862633}, year = {2015}, date = {2015-01-01}, booktitle = {Proc. European Signal Processing Conf. (EUSIPCO-2015), Nice, France, Sep. 2015}, pages = {1281--1285}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Koutras, A. Zlatintsi, E. Iosif, A. Katsamanis, P. Maragos, A. Potamianos Predicting audio-visual salient events based on visual, audio and text modalities for movie summarization Conference Proceedings - International Conference on Image Processing, ICIP, 2015-December , 2015, ISSN: 15224880. @conference{307, title = {Predicting audio-visual salient events based on visual, audio and text modalities for movie summarization}, author = { P. Koutras and A. Zlatintsi and E. Iosif and A. Katsamanis and P. Maragos and A. Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KZIKMP_MovieSum2_ICIP-2015.pdf}, doi = {10.1109/ICIP.2015.7351630}, issn = {15224880}, year = {2015}, date = {2015-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2015-December}, pages = {4361--4365}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos, V Pitsikalis, A Katsamanis, N Kardaris, E Mavroudi, I Rodomagoulakis, A Tsiami Multimodal Sensory Processing for Human Action Recognition in Mobility Assistive Robotics Conference Proc. IROS-2015 Workshop on Cognitive Mobility Assistance Robots, Hamburg, Germany, Sep. 2015, 2015. @conference{320, title = {Multimodal Sensory Processing for Human Action Recognition in Mobility Assistive Robotics}, author = { P Maragos and V Pitsikalis and A Katsamanis and N Kardaris and E Mavroudi and I Rodomagoulakis and A Tsiami}, url = {MaragosEtAl_MultiSensoryHumanActionRecogn-Robotics_IROS2015-Workshop.pdf}, year = {2015}, date = {2015-01-01}, booktitle = {Proc. IROS-2015 Workshop on Cognitive Mobility Assistance Robots, Hamburg, Germany, Sep. 2015}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Z I Skordilis, A Tsiami, P Maragos, G Potamianos, L Spelgatti, R Sannino Multichannel Speech Enhancement Using Mems Microphones Conference IEEE International Conference on Acoustics, Speech and Signal Processing, 2015, ISBN: 978-1-4673-6997-8. @conference{163, title = {Multichannel Speech Enhancement Using Mems Microphones}, author = { Z I Skordilis and A Tsiami and P Maragos and G Potamianos and L Spelgatti and R Sannino}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SkorTsiamMarPotSpelSan_MEMS-MCSE_ICASSP2015.pdf}, doi = {10.1109/ICASSP.2015.7178467}, isbn = {978-1-4673-6997-8}, year = {2015}, date = {2015-01-01}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing}, pages = {2--6}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2014 |
Sheraz Khan, Athanasios Dometios, Chris Verginis, Costas Tzafestas, Dirk Wollherr, Martin Buss RMAP: A rectangular cuboid approximation framework for 3D environment mapping Journal Article Autonomous Robots, 37 (3), pp. 261–277, 2014, ISSN: 09295593. Abstract | BibTeX | Links: [PDF] @article{23n, title = {RMAP: A rectangular cuboid approximation framework for 3D environment mapping}, author = {Sheraz Khan and Athanasios Dometios and Chris Verginis and Costas Tzafestas and Dirk Wollherr and Martin Buss}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/s10514-014-9387-y.pdf}, doi = {10.1007/s10514-014-9387-y}, issn = {09295593}, year = {2014}, date = {2014-02-01}, journal = {Autonomous Robots}, volume = {37}, number = {3}, pages = {261--277}, abstract = {This paper presents a rectangular cuboid approximation framework (RMAP) for 3D mapping. The goal of RMAP is to provide computational and memory efficient environment representations for 3D robotic mapping using axis aligned rectangular cuboids (RC). This paper focuses on two aspects of the RMAP framework: (i) An occupancy grid approach and (ii) A RC approximation of 3D environments based on point cloud density. The RMAP occupancy grid is based on the Rtree data structure which is composed of a hierarchy of RC. The proposed approach is capable of generating probabilistic 3D representations with multiresolution capabilities. It reduces the memory complexity in large scale 3D occupancy grids by avoiding explicit modelling of free space. In contrast to point cloud and fixed resolution cell representations based on beam end point observations, an approximation approach using point cloud density is presented. The proposed approach generates variable sized RC approximations that are memory efficient for axis aligned surfaces. Evaluation of the RMAP occupancy grid and approximation approach based on computational and memory complexity on different datasets shows the effectiveness of this framework for 3D mapping. textcopyright 2014 The Author(s).}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper presents a rectangular cuboid approximation framework (RMAP) for 3D mapping. The goal of RMAP is to provide computational and memory efficient environment representations for 3D robotic mapping using axis aligned rectangular cuboids (RC). This paper focuses on two aspects of the RMAP framework: (i) An occupancy grid approach and (ii) A RC approximation of 3D environments based on point cloud density. The RMAP occupancy grid is based on the Rtree data structure which is composed of a hierarchy of RC. The proposed approach is capable of generating probabilistic 3D representations with multiresolution capabilities. It reduces the memory complexity in large scale 3D occupancy grids by avoiding explicit modelling of free space. In contrast to point cloud and fixed resolution cell representations based on beam end point observations, an approximation approach using point cloud density is presented. The proposed approach generates variable sized RC approximations that are memory efficient for axis aligned surfaces. Evaluation of the RMAP occupancy grid and approximation approach based on computational and memory complexity on different datasets shows the effectiveness of this framework for 3D mapping. textcopyright 2014 The Author(s). |
Epameinondas Antonakos, Vassilis Pitsikalis, Petros Maragos Classification of extreme facial events in sign language videos Journal Article Eurasip Journal on Image and Video Processing, 2014 , 2014, ISSN: 16875281. Abstract | BibTeX | Links: [PDF] @article{143, title = {Classification of extreme facial events in sign language videos}, author = {Epameinondas Antonakos and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/1687-5281-2014-14.pdf}, doi = {10.1186/1687-5281-2014-14}, issn = {16875281}, year = {2014}, date = {2014-01-01}, journal = {Eurasip Journal on Image and Video Processing}, volume = {2014}, abstract = {We propose a new approach for Extreme States Classification (ESC) on feature spaces of facial cues in sign language (SL) videos. The method is built upon Active Appearance Model (AAM) face tracking and feature extraction of global and local AAMs. ESC is applied on various facial cues-as, for instance, pose rotations, head movements and eye blinking-leading to the detection of extreme states such as left/right, up/down and open/closed. Given the importance of such facial events in SL analysis, we apply ESC to detect visual events on SL videos, including both American (ASL) and Greek (GSL) corpora, yielding promising qualitative and quantitative results. Further, we show the potential of ESC for assistive annotation tools and demonstrate a link of the detections with indicative higher-level linguistic events. Given the lack of facial annotated data and the fact that manual annotations are highly time-consuming, ESC results indicate that the framework can have significant impact on SL processing and analysis. textcopyright 2014 Antonakos et al.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We propose a new approach for Extreme States Classification (ESC) on feature spaces of facial cues in sign language (SL) videos. The method is built upon Active Appearance Model (AAM) face tracking and feature extraction of global and local AAMs. ESC is applied on various facial cues-as, for instance, pose rotations, head movements and eye blinking-leading to the detection of extreme states such as left/right, up/down and open/closed. Given the importance of such facial events in SL analysis, we apply ESC to detect visual events on SL videos, including both American (ASL) and Greek (GSL) corpora, yielding promising qualitative and quantitative results. Further, we show the potential of ESC for assistive annotation tools and demonstrate a link of the detections with indicative higher-level linguistic events. Given the lack of facial annotated data and the fact that manual annotations are highly time-consuming, ESC results indicate that the framework can have significant impact on SL processing and analysis. textcopyright 2014 Antonakos et al. |
S Khan, A Dometios, C Verginis, C Tzafestas, D Wollherr, M Buss RMAP: a Rectangular Cuboid Approximation Framework for 3D Environment Mapping Journal Article 37 (3), pp. 261–277, 2014, ISSN: 1573-7527. Abstract | BibTeX | Links: [PDF] @article{KDVTWB14, title = {RMAP: a Rectangular Cuboid Approximation Framework for 3D Environment Mapping}, author = {S Khan and A Dometios and C Verginis and C Tzafestas and D Wollherr and M Buss}, url = {http://robotics.ntua.gr/wp-content/publications/Khan14_RMAP_Rectangular_Cuboid_Approxim.pdf}, doi = {10.1007/s10514-014-9387-y}, issn = {1573-7527}, year = {2014}, date = {2014-01-01}, volume = {37}, number = {3}, pages = {261--277}, publisher = {Springer}, abstract = {This paper presents a rectangular cuboid approximation framework (RMAP) for 3D mapping. The goal of RMAP is to provide computational and memory efficient environment representations for 3D robotic mapping using axis aligned rectangular cuboids (RC). This paper focuses on two aspects of the RMAP framework: (i) An occupancy grid approach and (ii) A RC approximation of 3D environments based on point cloud density. The RMAP occupancy grid is based on the Rtree data structure which is composed of a hierarchy of RC. The proposed approach is capable of generating probabilistic 3D representations with multiresolution capabilities. It reduces the memory complexity in large scale 3D occupancy grids by avoiding explicit modelling of free space. In contrast to point cloud and fixed resolution cell representations based on beam end point observations, an approximation approach using point cloud density is presented. The proposed approach generates variable sized RC approximations that are memory efficient for axis aligned surfaces. Evaluation of the RMAP occupancy grid and approximation approach based on computational and memory complexity on different datasets shows the effectiveness of this framework for 3D mapping.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper presents a rectangular cuboid approximation framework (RMAP) for 3D mapping. The goal of RMAP is to provide computational and memory efficient environment representations for 3D robotic mapping using axis aligned rectangular cuboids (RC). This paper focuses on two aspects of the RMAP framework: (i) An occupancy grid approach and (ii) A RC approximation of 3D environments based on point cloud density. The RMAP occupancy grid is based on the Rtree data structure which is composed of a hierarchy of RC. The proposed approach is capable of generating probabilistic 3D representations with multiresolution capabilities. It reduces the memory complexity in large scale 3D occupancy grids by avoiding explicit modelling of free space. In contrast to point cloud and fixed resolution cell representations based on beam end point observations, an approximation approach using point cloud density is presented. The proposed approach generates variable sized RC approximations that are memory efficient for axis aligned surfaces. Evaluation of the RMAP occupancy grid and approximation approach based on computational and memory complexity on different datasets shows the effectiveness of this framework for 3D mapping. |
S Theodorakis, V Pitsikalis, P Maragos Dynamic-static unsupervised sequentiality, statistical subunits and lexicon for sign language recognition Journal Article Image and Vision Computing, 32 , pp. 533-549, 2014. @article{Theodorakis2014, title = {Dynamic-static unsupervised sequentiality, statistical subunits and lexicon for sign language recognition}, author = {S Theodorakis and V Pitsikalis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2014_TheodorakisPitsikalisMaragos_DynamicStaticSU-SignRecognition_IVC.pdf}, year = {2014}, date = {2014-01-01}, journal = {Image and Vision Computing}, volume = {32}, pages = {533-549}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, P Maragos Comparison of Different Representations Based on Nonlinear Features for Music Genre Classification Conference Proc. European Signal Processing Conference, Lisbon, Portugal, 2014. Abstract | BibTeX | Links: [PDF] @conference{ZlMa14, title = {Comparison of Different Representations Based on Nonlinear Features for Music Genre Classification}, author = {A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos_MGC_EUSIPCO14_Lisbon_proc.pdf}, year = {2014}, date = {2014-09-01}, booktitle = {Proc. European Signal Processing Conference}, address = {Lisbon, Portugal}, abstract = {In this paper, we examine the descriptiveness and recognition properties of different feature representations for the analysis of musical signals, aiming in the exploration of their micro- and macro-structures, for the task of music genre classification. We explore nonlinear methods, such as the AM-FM model and ideas from fractal theory, so as to model the time-varying harmonic structure of musical signals and the geometrical complexity of the music waveform. The different feature representations’ efficacy is compared regarding their recognition properties for the specific task. The proposed features are evaluated against and in combination with Mel frequency cepstral coefficients (MFCC), using both static and dynamic classifiers, accomplishing an error reduction of 28%, illustrating that they can capture important aspects of music.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we examine the descriptiveness and recognition properties of different feature representations for the analysis of musical signals, aiming in the exploration of their micro- and macro-structures, for the task of music genre classification. We explore nonlinear methods, such as the AM-FM model and ideas from fractal theory, so as to model the time-varying harmonic structure of musical signals and the geometrical complexity of the music waveform. The different feature representations’ efficacy is compared regarding their recognition properties for the specific task. The proposed features are evaluated against and in combination with Mel frequency cepstral coefficients (MFCC), using both static and dynamic classifiers, accomplishing an error reduction of 28%, illustrating that they can capture important aspects of music. |
A Tsiami, A Katsamanis, P Maragos, G Potamianos Experiments in acoustic source localization using sparse arrays in adverse indoors environments Conference Proc. European Signal Processing Conference, Lisbon, Portugal, 2014. Abstract | BibTeX | Links: [PDF] @conference{tsiami2014localization, title = {Experiments in acoustic source localization using sparse arrays in adverse indoors environments}, author = {A Tsiami and A Katsamanis and P Maragos and G Potamianos}, url = {http://robotics.ntua.gr/wp-content/publications/Tsiami+_AcousticSourceLocalization_EUSIPCO2014.pdf}, year = {2014}, date = {2014-09-01}, booktitle = {Proc. European Signal Processing Conference}, pages = {2390-2394}, address = {Lisbon, Portugal}, abstract = {In this paper we experiment with 2-D source localization in smart homes under adverse conditions using sparse distributed microphone arrays. We propose some improvements to deal with problems due to high reverberation, noise and use of a limited number of microphones. These consist of a pre-filtering stage for dereverberation and an iterative procedure that aims to increase accuracy. Experiments carried out in relatively large databases with both simulated and real recordings of sources in various positions indicate that the proposed method exhibits a better performance compared to others under challenging conditions while also being computationally efficient. It is demonstrated that although reverberation degrades localization performance, this degradation can be compensated by identifying the reliable microphone pairs and disposing of the outliers.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we experiment with 2-D source localization in smart homes under adverse conditions using sparse distributed microphone arrays. We propose some improvements to deal with problems due to high reverberation, noise and use of a limited number of microphones. These consist of a pre-filtering stage for dereverberation and an iterative procedure that aims to increase accuracy. Experiments carried out in relatively large databases with both simulated and real recordings of sources in various positions indicate that the proposed method exhibits a better performance compared to others under challenging conditions while also being computationally efficient. It is demonstrated that although reverberation degrades localization performance, this degradation can be compensated by identifying the reliable microphone pairs and disposing of the outliers. |
Antigoni Tsiami, Isidoros Rodomagoulakis, Panagiotis Giannoulis, Athanasios Katsamanis, Gerasimos Potamianos, Petros Maragos ATHENA: A Greek Multi-Sensory Database for Home Automation Control uthor: Isidoros Rodomagoulakis (NTUA, Greece) Conference Proc. Int'l Conf. on Speech Communication and Technology (INTERSPEECH), Singapore, 2014. Abstract | BibTeX | Links: [PDF] @conference{tsiami2014athena, title = {ATHENA: A Greek Multi-Sensory Database for Home Automation Control uthor: Isidoros Rodomagoulakis (NTUA, Greece)}, author = {Antigoni Tsiami and Isidoros Rodomagoulakis and Panagiotis Giannoulis and Athanasios Katsamanis and Gerasimos Potamianos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/Tsiami+_AthenaDatabase_INTERSPEECH2014.pdf}, year = {2014}, date = {2014-09-01}, booktitle = {Proc. Int'l Conf. on Speech Communication and Technology (INTERSPEECH)}, pages = {1608-1612}, address = {Singapore}, abstract = {In this paper we present a Greek speech database with real multi-modal data in a smart home two-room environment. In total, 20 speakers were recorded in 240 one-minute long sessions. The recordings include utterances of activation keywords and commands for home automation control, but also phonetically rich sentences and conversational speech. Audio, speaker movements and gestures were captured by 20 condenser microphones installed on the walls and ceiling, 6 MEMS microphones, 2 close-talk microphones and one Kinect camera. The new publicly available database exhibits adverse noise conditions because of background noises and acoustic events performed during the recordings to better approximate a realistic everyday home scenario. Thus, it is suitable for experimentation on voice activity and event detection, source localization, speech enhancement and far-field speech recognition. We present the details of the corpus as well as baseline results on multi-channel voice activity detection and spoken command recognition.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we present a Greek speech database with real multi-modal data in a smart home two-room environment. In total, 20 speakers were recorded in 240 one-minute long sessions. The recordings include utterances of activation keywords and commands for home automation control, but also phonetically rich sentences and conversational speech. Audio, speaker movements and gestures were captured by 20 condenser microphones installed on the walls and ceiling, 6 MEMS microphones, 2 close-talk microphones and one Kinect camera. The new publicly available database exhibits adverse noise conditions because of background noises and acoustic events performed during the recordings to better approximate a realistic everyday home scenario. Thus, it is suitable for experimentation on voice activity and event detection, source localization, speech enhancement and far-field speech recognition. We present the details of the corpus as well as baseline results on multi-channel voice activity detection and spoken command recognition. |
X S Papageorgiou, G Chalvatzaki, C S Tzafestas, P Maragos Hidden Markov modeling of human normal gait using laser range finder for a mobility assistance robot Conference IEEE International Conference on Robotics and Automation (ICRA), 2014, ISSN: 1050-4729. Abstract | BibTeX | Links: [PDF] @conference{ICRA2014, title = {Hidden Markov modeling of human normal gait using laser range finder for a mobility assistance robot}, author = {X S Papageorgiou and G Chalvatzaki and C S Tzafestas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ICRA2014.pdf}, doi = {10.1109/ICRA.2014.6906899}, issn = {1050-4729}, year = {2014}, date = {2014-05-01}, booktitle = {IEEE International Conference on Robotics and Automation (ICRA)}, pages = {482-487}, abstract = {For an effective intelligent active mobility assistance robot, the walking pattern of a patient or an elderly person has to be analyzed precisely. A well-known fact is that the walking patterns are gaits, that is, cyclic patterns with several consecutive phases. These cyclic motions can be modeled using the consecutive gait phases. In this paper, we present a completely non-invasive framework for analyzing a normal human walking gait pattern. Our framework utilizes a laser range finder sensor to collect the data, a combination of filters to preprocess these data, and an appropriately synthesized Hidden Markov Model (HMM) for state estimation, and recognition of the gait data. We demonstrate the applicability of this setup using real data, collected from an ensemble of different persons. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior-based robot control system. More specifically, the proposed framework has the potential to be used for the recognition of abnormal gait patterns and the subsequent classification of specific walking pathologies, which is needed for the development of a context-aware robot mobility assistant.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } For an effective intelligent active mobility assistance robot, the walking pattern of a patient or an elderly person has to be analyzed precisely. A well-known fact is that the walking patterns are gaits, that is, cyclic patterns with several consecutive phases. These cyclic motions can be modeled using the consecutive gait phases. In this paper, we present a completely non-invasive framework for analyzing a normal human walking gait pattern. Our framework utilizes a laser range finder sensor to collect the data, a combination of filters to preprocess these data, and an appropriately synthesized Hidden Markov Model (HMM) for state estimation, and recognition of the gait data. We demonstrate the applicability of this setup using real data, collected from an ensemble of different persons. The results presented in this paper demonstrate that the proposed human data analysis scheme has the potential to provide the necessary methodological (modeling, inference, and learning) framework for a cognitive behavior-based robot control system. More specifically, the proposed framework has the potential to be used for the recognition of abnormal gait patterns and the subsequent classification of specific walking pathologies, which is needed for the development of a context-aware robot mobility assistant. |
X S Papageorgiou, C S Tzafestas, P Maragos, G Pavlakos, G Chalvatzaki, G Moustris, I Kokkinos, A Peer, B Stanczyk, E -S Fotinea, E Efthimiou Advances in Intelligent Mobility Assistance Robot Integrating Multimodal Sensory Processing Conference J_HCII, Springer International Publishing, Cham, 2014, ISBN: 978-3-319-07446-7. Abstract | BibTeX | Links: [PDF] @conference{HCII2014, title = {Advances in Intelligent Mobility Assistance Robot Integrating Multimodal Sensory Processing}, author = {X S Papageorgiou and C S Tzafestas and P Maragos and G Pavlakos and G Chalvatzaki and G Moustris and I Kokkinos and A Peer and B Stanczyk and E -S Fotinea and E Efthimiou}, editor = {C Stephanidis and M Antona}, url = {http://robotics.ntua.gr/wp-content/publications/HCII2014.pdf}, doi = {https://doi.org/10.1007/978-3-319-07446-7_66}, isbn = {978-3-319-07446-7}, year = {2014}, date = {2014-01-01}, booktitle = {J_HCII}, pages = {692--703}, publisher = {Springer International Publishing}, address = {Cham}, abstract = {Mobility disabilities are prevalent in our ageing society and impede activities important for the independent living of elderly people and their quality of life. The goal of this work is to support human mobility and thus enforce fitness and vitality by developing intelligent robotic platforms designed to provide user-centred and natural support for ambulating in indoor environments. We envision the design of cognitive mobile robotic systems that can monitor and understand specific forms of human activity, in order to deduce what the human needs are, in terms of mobility. The goal is to provide user and context adaptive active support and ambulation assistance to elderly users, and generally to individuals with specific forms of moderate to mild walking impairment.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mobility disabilities are prevalent in our ageing society and impede activities important for the independent living of elderly people and their quality of life. The goal of this work is to support human mobility and thus enforce fitness and vitality by developing intelligent robotic platforms designed to provide user-centred and natural support for ambulating in indoor environments. We envision the design of cognitive mobile robotic systems that can monitor and understand specific forms of human activity, in order to deduce what the human needs are, in terms of mobility. The goal is to provide user and context adaptive active support and ambulation assistance to elderly users, and generally to individuals with specific forms of moderate to mild walking impairment. |
Theodora Chaspari, Dimitrios Dimitriadis, Petros Maragos Emotion classification of speech using modulation features Conference European Signal Processing Conference, 2014, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{170, title = {Emotion classification of speech using modulation features}, author = { Theodora Chaspari and Dimitrios Dimitriadis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ChaspariDimitriadisMaragos_EmotionRecognitionSpeech_EUSIPCO2014_cr.pdf}, issn = {22195491}, year = {2014}, date = {2014-01-01}, booktitle = {European Signal Processing Conference}, pages = {1552--1556}, abstract = {Automatic classification of a speaker's affective state is one of the major challenges in signal processing community, since it can improve Human-Computer interaction and give insights into the nature of emotions from psychology perspective. The amplitude and frequency control of sound production influences strongly the affective voice content. In this paper, we take advantage of the inherent speech modulations and propose the use of instant amplitude- and frequency-derived features for efficient emotion recognition. Our results indicate that these features can further increase the performance of the widely-used spectral-prosodic information, achieving improvements on two emotional databases, the Berlin Database of Emotional Speech and the recently collected Athens Emotional States Inventory.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Automatic classification of a speaker's affective state is one of the major challenges in signal processing community, since it can improve Human-Computer interaction and give insights into the nature of emotions from psychology perspective. The amplitude and frequency control of sound production influences strongly the affective voice content. In this paper, we take advantage of the inherent speech modulations and propose the use of instant amplitude- and frequency-derived features for efficient emotion recognition. Our results indicate that these features can further increase the performance of the widely-used spectral-prosodic information, achieving improvements on two emotional databases, the Berlin Database of Emotional Speech and the recently collected Athens Emotional States Inventory. |
Panagiotis Giannoulis, Gerasimos Potamianos, Athanasios Katsamanis, Petros Maragos Multi-microphone fusion for detection of speech and acoustic events in smart spaces Conference European Signal Processing Conference, 2014, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{168, title = {Multi-microphone fusion for detection of speech and acoustic events in smart spaces}, author = { Panagiotis Giannoulis and Gerasimos Potamianos and Athanasios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/GiannoulisEtAl_MultimicrFusionDetectionSpeechEventsSmartspaces_EUSIPCO2014.pdf}, issn = {22195491}, year = {2014}, date = {2014-01-01}, booktitle = {European Signal Processing Conference}, pages = {2375--2379}, abstract = {In this paper, we examine the challenging problem of de- tecting acoustic events and voice activity in smart indoors environments, equipped with multiple microphones. In par- ticular, we focus on channel combination strategies, aiming to take advantage of the multiple microphones installed in the smart space, capturing the potentially noisy acoustic scene from the far-field. We propose various such approaches that can be formulated as fusion at the signal, feature, or at the decision level, as well as combinations of the above, also including multi-channel training. We apply our methods on two multi-microphone databases: (a) one recorded inside a small meeting room, containing twelve classes of isolated acoustic events; and (b) a speech corpus containing inter- fering noise sources, simulated inside a smart home with multiple rooms. Our multi-channel approaches demonstrate significant improvements, reaching relative error reductions over a single-channel baseline of 9.3% and 44.8% in the two datasets, respectively.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we examine the challenging problem of de- tecting acoustic events and voice activity in smart indoors environments, equipped with multiple microphones. In par- ticular, we focus on channel combination strategies, aiming to take advantage of the multiple microphones installed in the smart space, capturing the potentially noisy acoustic scene from the far-field. We propose various such approaches that can be formulated as fusion at the signal, feature, or at the decision level, as well as combinations of the above, also including multi-channel training. We apply our methods on two multi-microphone databases: (a) one recorded inside a small meeting room, containing twelve classes of isolated acoustic events; and (b) a speech corpus containing inter- fering noise sources, simulated inside a smart home with multiple rooms. Our multi-channel approaches demonstrate significant improvements, reaching relative error reductions over a single-channel baseline of 9.3% and 44.8% in the two datasets, respectively. |
A. Katsamanis, I. Rodomagoulakis, G. Potamianos, P. Maragos, A. Tsiami Robust far-field spoken command recognition for home automation combining adaptation and multichannel processing Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2014, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{171, title = {Robust far-field spoken command recognition for home automation combining adaptation and multichannel processing}, author = { A. Katsamanis and I. Rodomagoulakis and G. Potamianos and P. Maragos and A. Tsiami}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisEtAl_MultichannelASR_DIRHA_icassp2014.pdf}, doi = {10.1109/ICASSP.2014.6854664}, issn = {15206149}, year = {2014}, date = {2014-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {5547--5551}, abstract = {The paper presents our approach to speech-controlled home automa- tion. We are focusing on the detection and recognition of spoken commands preceded by a key-phrase as recorded in a voice-enabled apartment by a set of multiple microphones installed in the rooms. For both problems we investigate robust modeling, environmental adaptation and multichannel processing to cope with a) insufficient training data and b) the far-field effects and noise in the apartment. The proposed integrated scheme is evaluated in a challenging and highly realistic corpus of simulated audio recordings and achieves F-measure close to 0.70 for key-phrase spotting and word accuracy close to 98% for the command recognition task.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The paper presents our approach to speech-controlled home automa- tion. We are focusing on the detection and recognition of spoken commands preceded by a key-phrase as recorded in a voice-enabled apartment by a set of multiple microphones installed in the rooms. For both problems we investigate robust modeling, environmental adaptation and multichannel processing to cope with a) insufficient training data and b) the far-field effects and noise in the apartment. The proposed integrated scheme is evaluated in a challenging and highly realistic corpus of simulated audio recordings and achieves F-measure close to 0.70 for key-phrase spotting and word accuracy close to 98% for the command recognition task. |
Kevis Maninis, Petros Koutras, Petros Maragos ADVANCES ON ACTION RECOGNITION IN VIDEOS USING AN INTEREST POINT DETECTOR BASED ON MULTIBAND SPATIO-TEMPORAL ENERGIES Conference Icip, 2014, ISBN: 9781479957514. @conference{164, title = {ADVANCES ON ACTION RECOGNITION IN VIDEOS USING AN INTEREST POINT DETECTOR BASED ON MULTIBAND SPATIO-TEMPORAL ENERGIES }, author = { Kevis Maninis and Petros Koutras and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ManinisKoutrasMaragos_Action_ICIP2014.pdf}, isbn = {9781479957514}, year = {2014}, date = {2014-01-01}, booktitle = {Icip}, pages = {1490--1494}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgios Pavlakos, Stavros Theodorakis, Vassilis Pitsikalis, Athanasios Katsamanis, Petros Maragos Kinect-based multimodal gesture recognition using a two-pass fusion scheme Conference 2014 IEEE International Conference on Image Processing, ICIP 2014, 2014, ISBN: 9781479957514. Abstract | BibTeX | Links: [PDF] @conference{165, title = {Kinect-based multimodal gesture recognition using a two-pass fusion scheme}, author = { Georgios Pavlakos and Stavros Theodorakis and Vassilis Pitsikalis and Athanasios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PTPΚΜ_MultimodalGestureRecogn2PassFusion_ICIP2014.pdf}, doi = {10.1109/ICIP.2014.7025299}, isbn = {9781479957514}, year = {2014}, date = {2014-01-01}, booktitle = {2014 IEEE International Conference on Image Processing, ICIP 2014}, pages = {1495--1499}, abstract = {We present a new framework for multimodal gesture recognition that is based on a two-pass fusion scheme. In this, we deal with a demanding Kinect-based multimodal dataset, which was introduced in a recent gesture recognition challenge. We employ multiple modalities, i.e., visual cues, such as colour and depth images, as well as audio, and we specifically extract feature descriptors of the hands' movement, handshape, and audio spectral properties. Based on these features, we statistically train separate unimodal gesture-word models, namely hidden Markov models, explicitly accounting for the dynamics of each modality. Multimodal recognition of unknown gesture sequences is achieved by combining these models in a late, two-pass fusion scheme that exploits a set of unimodally generated n-best recognition hypotheses. The proposed scheme achieves 88.2% gesture recognition accuracy in the Kinect-based multimodal dataset, outperforming all recently published approaches on the same challenging multimodal gesture recognition task.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We present a new framework for multimodal gesture recognition that is based on a two-pass fusion scheme. In this, we deal with a demanding Kinect-based multimodal dataset, which was introduced in a recent gesture recognition challenge. We employ multiple modalities, i.e., visual cues, such as colour and depth images, as well as audio, and we specifically extract feature descriptors of the hands' movement, handshape, and audio spectral properties. Based on these features, we statistically train separate unimodal gesture-word models, namely hidden Markov models, explicitly accounting for the dynamics of each modality. Multimodal recognition of unknown gesture sequences is achieved by combining these models in a late, two-pass fusion scheme that exploits a set of unimodally generated n-best recognition hypotheses. The proposed scheme achieves 88.2% gesture recognition accuracy in the Kinect-based multimodal dataset, outperforming all recently published approaches on the same challenging multimodal gesture recognition task. |
2013 |
Anastasios Roussos, Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Dynamic Affine-Invariant Shape-Appearance Handshape Features and Classification in Sign Language Videos Journal Article Journal of Machine Learning Research, 14 , pp. 1627-1663, 2013. BibTeX | Links: [Webpage] [PDF] @article{142, title = {Dynamic Affine-Invariant Shape-Appearance Handshape Features and Classification in Sign Language Videos}, author = {Anastasios Roussos and Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://link.springer.com/10.1007/978-3-319-57021-1_8 http://robotics.ntua.gr/wp-content/uploads/sites/2/roussos13a.pdf}, doi = {10.1007/978-3-319-57021-1_8}, year = {2013}, date = {2013-06-01}, journal = {Journal of Machine Learning Research}, volume = {14}, pages = {1627-1663}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Zlatintsi, P Maragos Multiscale Fractal Analysis of Musical Instrument Signals with Application to Recognition Journal Article IEEE Trans. Audio, Speech, and Language Processing, 21 (4), pp. 737-748, 2013. @article{Zlatintsi2013, title = {Multiscale Fractal Analysis of Musical Instrument Signals with Application to Recognition}, author = {A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2013_ZlatintsiMaragos_MultiscaleFractalAnalMusicInstrumSignalsApplicRecogn_ieeeTASL.pdf}, doi = {10.1109/TASL.2012.2231073}, year = {2013}, date = {2013-04-01}, journal = {IEEE Trans. Audio, Speech, and Language Processing}, volume = {21}, number = {4}, pages = {737-748}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Costas S Tzafestas, Spyros Velanas Telehaptic perception of delayed stiffness using adaptive impedance control: Experimental psychophysical analysis Journal Article Presence: Teleoperators and Virtual Environments, 22 (4), pp. 323–344, 2013, ISSN: 15313263. Abstract | BibTeX | Links: [PDF] @article{Tzafestas2013, title = {Telehaptic perception of delayed stiffness using adaptive impedance control: Experimental psychophysical analysis}, author = {Costas S Tzafestas and Spyros Velanas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/tzafestas2013.pdf}, doi = {10.1162/PRES_a_00162}, issn = {15313263}, year = {2013}, date = {2013-01-01}, journal = {Presence: Teleoperators and Virtual Environments}, volume = {22}, number = {4}, pages = {323--344}, abstract = {Abstract Telehaptics is the science of transmitting touch-related sensations over computer networks. With respect to robot teleoperation, telehaptics emphasizes more on reliably reproducing physical properties of a remote environment, as mediated over a network through the use of appropriate haptic interfacing technologies. One of the main factors that can cause degradation of the quality of a telehaptic system is the presence of time delays. Inspired by concepts such as impedance-reflection and model-mediated telemanipulation, an adaptive impedance control scheme has been proposed aiming to mitigate some of the problems caused by network delays in a telehaptic system. This paper presents an experimental analysis, which has been conducted to assess the actual performance of the proposed telehaptic scheme in terms of both control and human perception objectives. Firstly, a set of comparative numerical experiments is presented aiming to analyze stability and characterize transparency of the telehaptic system under large time delays. The results show the superior performance of the proposed adaptive impedance scheme as compared to direct force-reflecting teleoperation. Then, a series of psychophysical experiments is described, to evaluate the performance of the telehaptic system with respect to human perception of remote (delayed) stiffness. An analysis of the obtained results shows that the proposed adaptive scheme significantly improves telehaptic perception of linear stiffness in the presence of network delays, maintaining perceptual thresholds close to the ones obtained in the case of direct, nondelayed stimuli. A comparative experimental evaluation of psychometric transparency confirms the superior robustness with regard to time delay of the adaptive impedance telehaptic scheme as compared to state-of-the-art position/force transparentizing methods.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Telehaptics is the science of transmitting touch-related sensations over computer networks. With respect to robot teleoperation, telehaptics emphasizes more on reliably reproducing physical properties of a remote environment, as mediated over a network through the use of appropriate haptic interfacing technologies. One of the main factors that can cause degradation of the quality of a telehaptic system is the presence of time delays. Inspired by concepts such as impedance-reflection and model-mediated telemanipulation, an adaptive impedance control scheme has been proposed aiming to mitigate some of the problems caused by network delays in a telehaptic system. This paper presents an experimental analysis, which has been conducted to assess the actual performance of the proposed telehaptic scheme in terms of both control and human perception objectives. Firstly, a set of comparative numerical experiments is presented aiming to analyze stability and characterize transparency of the telehaptic system under large time delays. The results show the superior performance of the proposed adaptive impedance scheme as compared to direct force-reflecting teleoperation. Then, a series of psychophysical experiments is described, to evaluate the performance of the telehaptic system with respect to human perception of remote (delayed) stiffness. An analysis of the obtained results shows that the proposed adaptive scheme significantly improves telehaptic perception of linear stiffness in the presence of network delays, maintaining perceptual thresholds close to the ones obtained in the case of direct, nondelayed stimuli. A comparative experimental evaluation of psychometric transparency confirms the superior robustness with regard to time delay of the adaptive impedance telehaptic scheme as compared to state-of-the-art position/force transparentizing methods. |
Georgios Evangelopoulos, Athanasia Zlatintsi, Alexandros Potamianos, Petros Maragos, Konstantinos Rapantzikos, Georgios Skoumas, Yannis Avrithis Multimodal saliency and fusion for movie summarization based on aural, visual, and textual attention Journal Article IEEE Transactions on Multimedia, 15 (7), pp. 1553–1568, 2013, ISSN: 15209210. Abstract | BibTeX | Links: [PDF] @article{141, title = {Multimodal saliency and fusion for movie summarization based on aural, visual, and textual attention}, author = {Georgios Evangelopoulos and Athanasia Zlatintsi and Alexandros Potamianos and Petros Maragos and Konstantinos Rapantzikos and Georgios Skoumas and Yannis Avrithis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EZPMRSA_MultimodalSaliencyFusionMovieSumAVTattention_ieeetMM13.pdf}, doi = {10.1109/TMM.2013.2267205}, issn = {15209210}, year = {2013}, date = {2013-01-01}, journal = {IEEE Transactions on Multimedia}, volume = {15}, number = {7}, pages = {1553--1568}, abstract = {Multimodal streams of sensory information are naturally parsed and integrated by humans using signal-level feature extraction and higher level cognitive processes. Detection of attention-invoking audiovisual segments is formulated in this work on the basis of saliency models for the audio, visual, and textual information conveyed in a video stream. Aural or auditory saliency is assessed by cues that quantify multifrequency waveform modulations, extracted through nonlinear operators and energy tracking. Visual saliency is measured through a spatiotemporal attention model driven by intensity, color, and orientation. Textual or linguistic saliency is extracted from part-of-speech tagging on the subtitles information available with most movie distributions. The individual saliency streams, obtained from modality-depended cues, are integrated in a multimodal saliency curve, modeling the time-varying perceptual importance of the composite video stream and signifying prevailing sensory events. The multimodal saliency representation forms the basis of a generic, bottom-up video summarization algorithm. Different fusion schemes are evaluated on a movie database of multimodal saliency annotations with comparative results provided across modalities. The produced summaries, based on low-level features and content-independent fusion and selection, are of subjectively high aesthetic and informative quality.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Multimodal streams of sensory information are naturally parsed and integrated by humans using signal-level feature extraction and higher level cognitive processes. Detection of attention-invoking audiovisual segments is formulated in this work on the basis of saliency models for the audio, visual, and textual information conveyed in a video stream. Aural or auditory saliency is assessed by cues that quantify multifrequency waveform modulations, extracted through nonlinear operators and energy tracking. Visual saliency is measured through a spatiotemporal attention model driven by intensity, color, and orientation. Textual or linguistic saliency is extracted from part-of-speech tagging on the subtitles information available with most movie distributions. The individual saliency streams, obtained from modality-depended cues, are integrated in a multimodal saliency curve, modeling the time-varying perceptual importance of the composite video stream and signifying prevailing sensory events. The multimodal saliency representation forms the basis of a generic, bottom-up video summarization algorithm. Different fusion schemes are evaluated on a movie database of multimodal saliency annotations with comparative results provided across modalities. The produced summaries, based on low-level features and content-independent fusion and selection, are of subjectively high aesthetic and informative quality. |
G. P. Moustris, A. I. Mantelos, C. S. Tzafestas Enhancing surgical accuracy using virtual fixtures and motion compensation in robotic beating heart surgery Conference 2013 21st Mediterranean Conference on Control and Automation, MED 2013 - Conference Proceedings, 2013, ISSN: 1050-4729. Abstract | BibTeX | Links: [PDF] @conference{Moustris2013, title = {Enhancing surgical accuracy using virtual fixtures and motion compensation in robotic beating heart surgery}, author = { G. P. Moustris and A. I. Mantelos and C. S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Enhancing_surgical_accuracy_using_virtual_fixtures_and_motion_compensation_in_robotic_beating_heart_surgery.pdf}, doi = {10.1109/MED.2013.6608880}, issn = {1050-4729}, year = {2013}, date = {2013-06-01}, booktitle = {2013 21st Mediterranean Conference on Control and Automation, MED 2013 - Conference Proceedings}, pages = {1254--1260}, abstract = {This paper proposes a novel technique for applying virtual fixtures in a changing environment. The main targeted application is robotic beating heart surgery, which enables the surgeon to operate directly on a beating heart. Using a motion compensation framework, the motion of the heart surface is stabilized in a virtual space, which is presented to the surgeon to operate in. Consequently, the fixture is implemented in this static space, bypassing problems of dynamic fixtures such as position update, placement and force transients. Randomized experiments were performed using a trained surgeon comparing our approach to simple motion compensation and no compensation at all. The positive effect of the fixture in surgical accuracy for a tracking task is also discussed. textcopyright 2013 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper proposes a novel technique for applying virtual fixtures in a changing environment. The main targeted application is robotic beating heart surgery, which enables the surgeon to operate directly on a beating heart. Using a motion compensation framework, the motion of the heart surface is stabilized in a virtual space, which is presented to the surgeon to operate in. Consequently, the fixture is implemented in this static space, bypassing problems of dynamic fixtures such as position update, placement and force transients. Randomized experiments were performed using a trained surgeon comparing our approach to simple motion compensation and no compensation at all. The positive effect of the fixture in surgical accuracy for a tracking task is also discussed. textcopyright 2013 IEEE. |
G.P. Moustris, A.I. Mantelos, C.S. Tzafestas Active motion compensation in robotic cardiac surgery Conference 2013 European Control Conference, ECC 2013, 2013, ISBN: 9783033039629. Abstract | BibTeX | Links: [PDF] @conference{Moustris2013b, title = {Active motion compensation in robotic cardiac surgery}, author = { G.P. Moustris and A.I. Mantelos and C.S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Active_motion_compensation_in_robotic_cardiac_surgery.pdf}, isbn = {9783033039629}, year = {2013}, date = {2013-06-01}, booktitle = {2013 European Control Conference, ECC 2013}, abstract = {Motion compensation is a prominent application in robotic beating heart surgery, with significant potential benefits for both surgeons and patients. In this paper we investigate an activate assistance control scheme on a simple tracking task, which helps the surgeon guide the robot on a predefined reference. The control is implemented on top of a shared control system, which serves as a basis for implementing higher level controllers. Experiments with a trained surgeon are also presented, which show the positive effect of the approach. textcopyright 2013 EUCA.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Motion compensation is a prominent application in robotic beating heart surgery, with significant potential benefits for both surgeons and patients. In this paper we investigate an activate assistance control scheme on a simple tracking task, which helps the surgeon guide the robot on a predefined reference. The control is implemented on top of a shared control system, which serves as a basis for implementing higher level controllers. Experiments with a trained surgeon are also presented, which show the positive effect of the approach. textcopyright 2013 EUCA. |
Nikolaos Karianakis, Petros Maragos An integrated system for digital restoration of prehistoric theran wall paintings Conference 2013 18th International Conference on Digital Signal Processing, DSP 2013, 2013, ISBN: 9781467358057. @conference{174, title = {An integrated system for digital restoration of prehistoric theran wall paintings}, author = { Nikolaos Karianakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KarianakisMaragos_IntegratedSystemDigitalRestorationPrehistoricTheranWallpaintings_DSP2013.pdf}, doi = {10.1109/ICDSP.2013.6622838}, isbn = {9781467358057}, year = {2013}, date = {2013-01-01}, booktitle = {2013 18th International Conference on Digital Signal Processing, DSP 2013}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stamatios Lefkimmiatis, Anastasios Roussos, Michael Unser, Petros Maragos Supplementary Material for the SSVM-2013 paper entitled “ Convex Generalizations of Total Variation based on the Structure Tensor with Applications to Inverse Problems ” Conference Scale Space and Variational Methods in Computer Vision (SSVM-2013), pp.48-60, 2013, 2013. @conference{172, title = {Supplementary Material for the SSVM-2013 paper entitled “ Convex Generalizations of Total Variation based on the Structure Tensor with Applications to Inverse Problems ”}, author = { Stamatios Lefkimmiatis and Anastasios Roussos and Michael Unser and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LRUM_ConvexGeneralizationsTotalVariationStructureTensorInverseProblems_SSVM2013.pdf}, year = {2013}, date = {2013-01-01}, booktitle = {Scale Space and Variational Methods in Computer Vision (SSVM-2013), pp.48-60, 2013}, pages = {1--4}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I. Rodomagoulakis, P. Giannoulis, Z. I. Skordilis, P. Maragos, G. Potamianos Experiments on far-field multichannel speech processing in smart homes Conference 2013 18th International Conference on Digital Signal Processing, DSP 2013, 2013, ISBN: 9781467358057. @conference{175, title = {Experiments on far-field multichannel speech processing in smart homes}, author = { I. Rodomagoulakis and P. Giannoulis and Z. I. Skordilis and P. Maragos and G. Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RGSMP_ExperimsFarfieldMultichannelSpeechProcessSmartHomes_DSP2013.pdf}, doi = {10.1109/ICDSP.2013.6622707}, isbn = {9781467358057}, year = {2013}, date = {2013-01-01}, booktitle = {2013 18th International Conference on Digital Signal Processing, DSP 2013}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos, Kimon Drakopoulos Segmentation and Skeletonization on Arbitrary Graphs Using Multiscale Morphology and Active Contours Book Chapter Breuß, Michael; Bruckstein, Alfred; Maragos, Petros (Ed.): Innovations for Shape Analysis: Models and Algorithms, pp. 53–75, Springer Berlin Heidelberg, Berlin, Heidelberg, 2013, ISBN: 978-3-642-34141-0. Abstract | BibTeX | Links: [Webpage] [PDF] @inbook{Maragos2013b, title = {Segmentation and Skeletonization on Arbitrary Graphs Using Multiscale Morphology and Active Contours}, author = {Petros Maragos and Kimon Drakopoulos}, editor = {Michael Breuß and Alfred Bruckstein and Petros Maragos}, url = {https://doi.org/10.1007/978-3-642-34141-0_3 http://robotics.ntua.gr/wp-content/uploads/sites/2/978-3-642-34141-0_3.pdf}, doi = {10.1007/978-3-642-34141-0_3}, isbn = {978-3-642-34141-0}, year = {2013}, date = {2013-01-01}, booktitle = {Innovations for Shape Analysis: Models and Algorithms}, pages = {53--75}, publisher = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = {In this chapter we focus on formulating and implementing on abstract domains such as arbitrary graphs popular methods and techniques developed for image analysis, in particular multiscale morphology and active contours. To this goal we extend existing work on graph morphology to multiscale dilation and erosion and implement them recursively using level sets of functions defined on the graph's nodes. We propose approximations to the calculation of the gradient and the divergence of vector functions defined on graphs and use these approximations to apply the technique of geodesic active contours for object detection on graphs via segmentation. Finally, using these novel ideas, we propose a method for multiscale shape skeletonization on arbitrary graphs.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } In this chapter we focus on formulating and implementing on abstract domains such as arbitrary graphs popular methods and techniques developed for image analysis, in particular multiscale morphology and active contours. To this goal we extend existing work on graph morphology to multiscale dilation and erosion and implement them recursively using level sets of functions defined on the graph's nodes. We propose approximations to the calculation of the gradient and the divergence of vector functions defined on graphs and use these approximations to apply the technique of geodesic active contours for object detection on graphs via segmentation. Finally, using these novel ideas, we propose a method for multiscale shape skeletonization on arbitrary graphs. |
P Maragos Representations for Morphological Image Operators and Analogies with Linear Operators Book Chapter Hawkes, P W (Ed.): Advances in Imaging and Electron Physics, 177 , pp. 45-187, Elsevier Acad. Press, 2013. @inbook{Maragos2013b, title = {Representations for Morphological Image Operators and Analogies with Linear Operators}, author = {P Maragos}, editor = {P W Hawkes}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2013_Maragos_OperatorRepresentations_chapter_AIEP_reprint.pdf}, doi = {https://doi.org/10.1016/B978-0-12-407702-7.00002-4}, year = {2013}, date = {2013-01-01}, booktitle = {Advances in Imaging and Electron Physics}, volume = {177}, pages = {45-187}, publisher = {Elsevier Acad. Press}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
A Zlatintsi Music Signal Processing and Applications in Recognition PhD Thesis School of ECE, NTUA, 2013. Abstract | BibTeX | Links: [PDF] @phdthesis{Zlatintsi13, title = {Music Signal Processing and Applications in Recognition}, author = {A Zlatintsi}, url = {http://robotics.ntua.gr/wp-content/publications/Zlatintsi_PhDThesis_Dec2013_EMP.pdf}, year = {2013}, date = {2013-12-01}, school = {School of ECE, NTUA}, abstract = {This thesis lays in the area of signal processing and analysis of music signalsusing computational methods for the extraction of effective representations for automatic recognition. We explore and develop efficient algorithms using nonlinear methods for the analysis of the structure of music signals, which is of importance for their modeling. Our main research directions deals with the analysis of the structure and the characteristics of musical instruments in order to gain insight about their function and properties. We study the characteristics of the different genres of music.Finally, we evaluate the effectiveness of the proposed nonlinear models for the detection of perceptually important music and audio events. The approach we follow contributes to state-of-the-art technologies related to automatic computer-based recognition of musical signals and audio summarization, which nowadays are essential in everyday life. Because of the vast amount of music, audio and multimedia data in the web and our personal computers, the use of this study could be shown in applications such as automatic genre classification, automatic recognition of music’s basic structures, such as musical instruments, and audio content analysis for music and audio summarization. The above mentioned applications require robust solutions to information processing problems. Toward this goal, the development of efficient digital signal processing methods and the extraction of relevant features is of importance. In this thesis we propose such methods and algorithms for feature extraction with interesting results that render the descriptors of direct applicability. The proposed methods are applied on classification experiments illustrating that they can capture important aspects of music, such as the micro-variations of their structure. Descriptors based on macro-structures may reduce the complexity of the classification system, since satisfactory results can be achieved using simpler statistical models. Finally, the introduction of a ‘‘music’’ filterbank appears to be promising for automatic genre classification.}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } This thesis lays in the area of signal processing and analysis of music signalsusing computational methods for the extraction of effective representations for automatic recognition. We explore and develop efficient algorithms using nonlinear methods for the analysis of the structure of music signals, which is of importance for their modeling. Our main research directions deals with the analysis of the structure and the characteristics of musical instruments in order to gain insight about their function and properties. We study the characteristics of the different genres of music.Finally, we evaluate the effectiveness of the proposed nonlinear models for the detection of perceptually important music and audio events. The approach we follow contributes to state-of-the-art technologies related to automatic computer-based recognition of musical signals and audio summarization, which nowadays are essential in everyday life. Because of the vast amount of music, audio and multimedia data in the web and our personal computers, the use of this study could be shown in applications such as automatic genre classification, automatic recognition of music’s basic structures, such as musical instruments, and audio content analysis for music and audio summarization. The above mentioned applications require robust solutions to information processing problems. Toward this goal, the development of efficient digital signal processing methods and the extraction of relevant features is of importance. In this thesis we propose such methods and algorithms for feature extraction with interesting results that render the descriptors of direct applicability. The proposed methods are applied on classification experiments illustrating that they can capture important aspects of music, such as the micro-variations of their structure. Descriptors based on macro-structures may reduce the complexity of the classification system, since satisfactory results can be achieved using simpler statistical models. Finally, the introduction of a ‘‘music’’ filterbank appears to be promising for automatic genre classification. |
2012 |
Argiro Vatakis, Petros Maragos, Isidoros Rodomagoulakis, Charles Spence Assessing the effect of physical differences in the articulation of consonants and vowels on audiovisual temporal perception Journal Article Frontiers in Integrative Neuroscience, 6 , 2012, ISSN: 1662-5145. Abstract | BibTeX | Links: [PDF] [Webpage] @article{138, title = {Assessing the effect of physical differences in the articulation of consonants and vowels on audiovisual temporal perception}, author = {Argiro Vatakis and Petros Maragos and Isidoros Rodomagoulakis and Charles Spence}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/VMRS_ArticulConsonVowelAudioVisualTimePerception_fiin2012.pdf http://journal.frontiersin.org/article/10.3389/fnint.2012.00071/abstract}, doi = {10.3389/fnint.2012.00071}, issn = {1662-5145}, year = {2012}, date = {2012-01-01}, journal = {Frontiers in Integrative Neuroscience}, volume = {6}, abstract = {We investigated how the physical differences associated with the articulation of speech affect the temporal aspects of audiovisual speech perception. Video clips of consonants and vowels uttered by three different speakers were presented. The video clips were analyzed using an auditory-visual signal saliency model in order to compare signal saliency and behavioral data. Participants made temporal order judgments (TOJs) regarding which speech-stream (auditory or visual) had been presented first. The sensitivity of participants' TOJs and the point of subjective simultaneity (PSS) were analyzed as a function of the place, manner of articulation, and voicing for consonants, and the height/backness of the tongue and lip-roundedness for vowels. We expected that in the case of the place of articulation and roundedness, where the visual-speech signal is more salient, temporal perception of speech would be modulated by the visual-speech signal. No such effect was expected for the manner of articulation or height. The results demonstrate that for place and manner of articulation, participants' temporal percept was affected (although not always significantly) by highly-salient speech-signals with the visual-signals requiring smaller visual-leads at the PSS. This was not the case when height was evaluated. These findings suggest that in the case of audiovisual speech perception, a highly salient visual-speech signal may lead to higher probabilities regarding the identity of the auditory-signal that modulate the temporal window of multisensory integration of the speech-stimulus.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We investigated how the physical differences associated with the articulation of speech affect the temporal aspects of audiovisual speech perception. Video clips of consonants and vowels uttered by three different speakers were presented. The video clips were analyzed using an auditory-visual signal saliency model in order to compare signal saliency and behavioral data. Participants made temporal order judgments (TOJs) regarding which speech-stream (auditory or visual) had been presented first. The sensitivity of participants' TOJs and the point of subjective simultaneity (PSS) were analyzed as a function of the place, manner of articulation, and voicing for consonants, and the height/backness of the tongue and lip-roundedness for vowels. We expected that in the case of the place of articulation and roundedness, where the visual-speech signal is more salient, temporal perception of speech would be modulated by the visual-speech signal. No such effect was expected for the manner of articulation or height. The results demonstrate that for place and manner of articulation, participants' temporal percept was affected (although not always significantly) by highly-salient speech-signals with the visual-signals requiring smaller visual-leads at the PSS. This was not the case when height was evaluated. These findings suggest that in the case of audiovisual speech perception, a highly salient visual-speech signal may lead to higher probabilities regarding the identity of the auditory-signal that modulate the temporal window of multisensory integration of the speech-stimulus. |
Kimon Drakopoulos, Petros Maragos Active contours on graphs: Multiscale morphology and graphcuts Journal Article IEEE Journal on Selected Topics in Signal Processing, 6 (7), pp. 780–794, 2012, ISSN: 19324553. @article{139, title = {Active contours on graphs: Multiscale morphology and graphcuts}, author = {Kimon Drakopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DrakopoulosMaragos_ACs-on-Graphs-MultiscaleMorf-Graphcuts_ieeejSTSP2012.pdf}, doi = {10.1109/JSTSP.2012.2213675}, issn = {19324553}, year = {2012}, date = {2012-01-01}, journal = {IEEE Journal on Selected Topics in Signal Processing}, volume = {6}, number = {7}, pages = {780--794}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Sheraz Khan, Nikos Mitsou, Dirk Wollherr, Costas Tzafestas An optimization approach for 3D environment mapping using normal vector uncertainty Conference 2012 12th International Conference on Control Automation Robotics & Vision (ICARCV), 2012 (December), 2012, ISBN: 978-1-4673-1872-3. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Khan2012, title = {An optimization approach for 3D environment mapping using normal vector uncertainty}, author = { Sheraz Khan and Nikos Mitsou and Dirk Wollherr and Costas Tzafestas}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6485267 http://robotics.ntua.gr/wp-content/uploads/sites/2/khan2012.pdf}, doi = {10.1109/ICARCV.2012.6485267}, isbn = {978-1-4673-1872-3}, year = {2012}, date = {2012-12-01}, booktitle = {2012 12th International Conference on Control Automation Robotics & Vision (ICARCV)}, volume = {2012}, number = {December}, pages = {841--846}, abstract = {In this paper a novel approach for 3D environment mapping using registered robot poses is presented. The proposed algorithm focuses on improving the quality of robot generated 3D maps by incorporating the uncertainty of 3D points and propagating it into the normal vectors of surfaces. The uncertainty of normal vectors is an indicator of the quality of the detected surface. A controlled random search algorithm is applied to optimize a non-convex function of uncertain normal vectors and number of clusters in order to find the optimal threshold parameter for the segmentation process. This approach leads to an improved cluster coherence and thus better maps.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper a novel approach for 3D environment mapping using registered robot poses is presented. The proposed algorithm focuses on improving the quality of robot generated 3D maps by incorporating the uncertainty of 3D points and propagating it into the normal vectors of surfaces. The uncertainty of normal vectors is an indicator of the quality of the detected surface. A controlled random search algorithm is applied to optimize a non-convex function of uncertain normal vectors and number of clusters in order to find the optimal threshold parameter for the segmentation process. This approach leads to an improved cluster coherence and thus better maps. |
Spyros V. Velanas, Costas S. Tzafestas Model-mediated telehaptic perception of delayed curvature Conference Proceedings - IEEE International Workshop on Robot and Human Interactive Communication, 2012, ISBN: 9781467346054. Abstract | BibTeX | Links: [PDF] @conference{Velanas2012, title = {Model-mediated telehaptic perception of delayed curvature}, author = { Spyros V. Velanas and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/velanas2012.pdf}, doi = {10.1109/ROMAN.2012.6343871}, isbn = {9781467346054}, year = {2012}, date = {2012-09-01}, booktitle = {Proceedings - IEEE International Workshop on Robot and Human Interactive Communication}, pages = {941--947}, abstract = {This paper proposes a model-mediated telemanipulation scheme, focusing on the kinaesthetic perception of specific geometric properties of the remote environment in the presence of time delay. The basic idea is inspired from previous work on impedance-reflection teleoperation, which is here extended to incorporate the construction of a two-dimensional local geometric model. This model incorporates the local curvature of the remote environment, estimated online using a recursive least squares (RLS) method, which is then used to reconstruct a virtual surface model at the master site for haptic display. A series of experiments has been conducted, where each subject manipulated the haptic master to kinaes-thetically explore the surface of a remote (virtual) environment. The analysis of the obtained experimental results, in terms of telehaptic discrimination of curvature, shows the effectiveness of the proposed model-mediated scheme at mitigating some of the adverse effects of time delay in the communication loop. textcopyright 2012 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper proposes a model-mediated telemanipulation scheme, focusing on the kinaesthetic perception of specific geometric properties of the remote environment in the presence of time delay. The basic idea is inspired from previous work on impedance-reflection teleoperation, which is here extended to incorporate the construction of a two-dimensional local geometric model. This model incorporates the local curvature of the remote environment, estimated online using a recursive least squares (RLS) method, which is then used to reconstruct a virtual surface model at the master site for haptic display. A series of experiments has been conducted, where each subject manipulated the haptic master to kinaes-thetically explore the surface of a remote (virtual) environment. The analysis of the obtained experimental results, in terms of telehaptic discrimination of curvature, shows the effectiveness of the proposed model-mediated scheme at mitigating some of the adverse effects of time delay in the communication loop. textcopyright 2012 IEEE. |
A Zlatintsi, P Maragos AM-FM Modulation Features for Music Instrument Signal Analysis and Recognition Conference Proc. European Signal Processing Conference, Bucharest, Romania, 2012. Abstract | BibTeX | Links: [PDF] @conference{ZlMa12, title = {AM-FM Modulation Features for Music Instrument Signal Analysis and Recognition}, author = {A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos_MusicalInstrumentsAMFM_EUSIPCO2012.pdf}, year = {2012}, date = {2012-08-01}, booktitle = {Proc. European Signal Processing Conference}, address = {Bucharest, Romania}, abstract = {In this paper, we explore a nonlinear AM-FM model to extract alternative features for music instrument recognition tasks. Amplitude and frequency micro-modulations are measured in musical signals and are employed to model the existing information. The features used are the multiband mean instantaneous amplitude (mean-IAM) and mean instantaneous frequency (mean-IFM) modulation. The instantaneous features are estimated using the multiband Gabor Energy Separation Algorithm (Gabor-ESA). An alternative method, the iterative-ESA is also explored; and initial experimentation shows that it could be used to estimate the harmonic content of a tone. The Gabor-ESA is evaluated against and in combination with Mel frequency cepstrum coefficients (MFCCs) using both static and dynamic classifiers. The method used in this paper has proven to be able to extract the fine-structured modulations of music signals; further, it has shown to be promising for recognition tasks accomplishing an error rate reduction up to 60% for the best recognition case combined with MFCCs.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we explore a nonlinear AM-FM model to extract alternative features for music instrument recognition tasks. Amplitude and frequency micro-modulations are measured in musical signals and are employed to model the existing information. The features used are the multiband mean instantaneous amplitude (mean-IAM) and mean instantaneous frequency (mean-IFM) modulation. The instantaneous features are estimated using the multiband Gabor Energy Separation Algorithm (Gabor-ESA). An alternative method, the iterative-ESA is also explored; and initial experimentation shows that it could be used to estimate the harmonic content of a tone. The Gabor-ESA is evaluated against and in combination with Mel frequency cepstrum coefficients (MFCCs) using both static and dynamic classifiers. The method used in this paper has proven to be able to extract the fine-structured modulations of music signals; further, it has shown to be promising for recognition tasks accomplishing an error rate reduction up to 60% for the best recognition case combined with MFCCs. |
A Zlatintsi, P Maragos, A Potamianos, G Evangelopoulos A Saliency-Based Approach to Audio Event Detection and Summarization Conference Proc. European Signal Processing Conference, Bucharest, Romania, 2012. Abstract | BibTeX | Links: [PDF] @conference{ZMP+12, title = {A Saliency-Based Approach to Audio Event Detection and Summarization}, author = {A Zlatintsi and P Maragos and A Potamianos and G Evangelopoulos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos+_SaliencyBasedAudioSummarization_EUSIPCO2012.pdf}, year = {2012}, date = {2012-08-01}, booktitle = {Proc. European Signal Processing Conference}, address = {Bucharest, Romania}, abstract = {In this paper, we approach the problem of audio summarization by saliency computation of audio streams, exploring the potential of a modulation model for the detection of perceptually important audio events based on saliency models, along with various fusion schemes for their combination. The fusion schemes include linear, adaptive and nonlinear methods. A machine learning approach, where training of the features is performed, was also applied for the purpose of comparison with the proposed technique. For the evaluation of the algorithm we use audio data taken from movies and we show that nonlinear fusion schemes perform best. The results are reported on the MovSum database, using objective evaluations (against ground-truth denoting the perceptually important audio events). Analysis of the selected audio segments is also performed against a labeled database in respect to audio categories, while a method for fine-tuning of the selected audio events is proposed.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we approach the problem of audio summarization by saliency computation of audio streams, exploring the potential of a modulation model for the detection of perceptually important audio events based on saliency models, along with various fusion schemes for their combination. The fusion schemes include linear, adaptive and nonlinear methods. A machine learning approach, where training of the features is performed, was also applied for the purpose of comparison with the proposed technique. For the evaluation of the algorithm we use audio data taken from movies and we show that nonlinear fusion schemes perform best. The results are reported on the MovSum database, using objective evaluations (against ground-truth denoting the perceptually important audio events). Analysis of the selected audio segments is also performed against a labeled database in respect to audio categories, while a method for fine-tuning of the selected audio events is proposed. |
John N. Karigiannis, Costas S. Tzafestas Robustness and generalization of model-free learning for robot kinematic control using a nested-hierarchical multi-agent topology Conference Proceedings of the IEEE RAS and EMBS International Conference on Biomedical Robotics and Biomechatronics, 2012, ISSN: 21551774. Abstract | BibTeX | Links: [PDF] @conference{Karigiannis2012, title = {Robustness and generalization of model-free learning for robot kinematic control using a nested-hierarchical multi-agent topology}, author = { John N. Karigiannis and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/karigiannis2012.pdf}, doi = {10.1109/BioRob.2012.6290276}, issn = {21551774}, year = {2012}, date = {2012-06-01}, booktitle = {Proceedings of the IEEE RAS and EMBS International Conference on Biomedical Robotics and Biomechatronics}, pages = {1140--1147}, abstract = {This paper focuses on evaluating the robustness and knowledge generalization properties of a model-free learning mechanism, applied for the kinematic control of robot manipulation chains based on a nested-hierarchical multi-agent architecture. In the proposed topology, the agents correspond to independent degrees-of-freedom (DOF) of the system, managing to gain experience over the task that they collaboratively perform by continuously exploring and exploiting their state-to-action mapping space. Each agent forms a local (partial) view of the global system state and task progress, through a recursive learning process. By organizing the agents in a nested topology, the goal is to facilitate modular scaling to more complex kinematic topologies, with loose control coupling among the agents. Reinforcement learning is applied within each agent, to evolve a local state-to-action mapping in a continuous domain, thus leading to a system that exhibits developmental properties. This work addresses problem settings in the domain of kinematic control of dexterous-redundant robot manipulation systems. The numerical experiments performed consider the case of a single-linkage open kinematic chain, presenting kinematic redundancies given the desired task-goal. The focal issue in these experiments is to assess the capacity of the proposed multi-agent system to progressively and autonomously acquire cooperative sensorimotor skills through a self-learning process, that is, without the use of any explicit model-based planning strategy. In this paper, generalization and robustness properties of the overall multi-agent system are explored. Furthermore, the proposed framework is evaluated in constrained motion tasks, both in static and non-static environments. The computational cost of the proposed multi-agent architecture is also assessed.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper focuses on evaluating the robustness and knowledge generalization properties of a model-free learning mechanism, applied for the kinematic control of robot manipulation chains based on a nested-hierarchical multi-agent architecture. In the proposed topology, the agents correspond to independent degrees-of-freedom (DOF) of the system, managing to gain experience over the task that they collaboratively perform by continuously exploring and exploiting their state-to-action mapping space. Each agent forms a local (partial) view of the global system state and task progress, through a recursive learning process. By organizing the agents in a nested topology, the goal is to facilitate modular scaling to more complex kinematic topologies, with loose control coupling among the agents. Reinforcement learning is applied within each agent, to evolve a local state-to-action mapping in a continuous domain, thus leading to a system that exhibits developmental properties. This work addresses problem settings in the domain of kinematic control of dexterous-redundant robot manipulation systems. The numerical experiments performed consider the case of a single-linkage open kinematic chain, presenting kinematic redundancies given the desired task-goal. The focal issue in these experiments is to assess the capacity of the proposed multi-agent system to progressively and autonomously acquire cooperative sensorimotor skills through a self-learning process, that is, without the use of any explicit model-based planning strategy. In this paper, generalization and robustness properties of the overall multi-agent system are explored. Furthermore, the proposed framework is evaluated in constrained motion tasks, both in static and non-static environments. The computational cost of the proposed multi-agent architecture is also assessed. |
Epameinondas Antonakos, Vassilis Pitsikalis, Isidoros Rodomagoulakis, Petros Maragos Unsupervised classification of extreme facial events using active appearance models tracking for sign language videos Conference Proceedings - International Conference on Image Processing, ICIP, 2012, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{178, title = {Unsupervised classification of extreme facial events using active appearance models tracking for sign language videos}, author = { Epameinondas Antonakos and Vassilis Pitsikalis and Isidoros Rodomagoulakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/APRM_UnsupervisClassifExtremeFacialEventsAAM-SignLangVideos_ICIP2012.pdf}, doi = {10.1109/ICIP.2012.6467133}, issn = {15224880}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {1409--1412}, abstract = {We propose an Unsupervised method for Extreme States Classification (UnESC) on feature spaces of facial cues of interest. The method is built upon Active Appearance Models (AAM) face tracking and on feature extraction of Global and Local AAMs. UnESC is applied primarily on facial pose, but is shown to be extendable for the case of local models on the eyes and mouth. Given the importance of facial events in Sign Languages we apply the UnESC on videos from two sign language corpora, both American (ASL) and Greek (GSL) yielding promising qualitative and quantitative results. Apart from the detection of extreme facial states, the proposed Un-ESC also has impact for SL corpora lacking any facial annotations.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose an Unsupervised method for Extreme States Classification (UnESC) on feature spaces of facial cues of interest. The method is built upon Active Appearance Models (AAM) face tracking and on feature extraction of Global and Local AAMs. UnESC is applied primarily on facial pose, but is shown to be extendable for the case of local models on the eyes and mouth. Given the importance of facial events in Sign Languages we apply the UnESC on videos from two sign language corpora, both American (ASL) and Greek (GSL) yielding promising qualitative and quantitative results. Apart from the detection of extreme facial states, the proposed Un-ESC also has impact for SL corpora lacking any facial annotations. |
Christos Georgakis, Petros Maragos, Georgios Evangelopoulos, Dimitrios Dimitriadis Dominant spatio-temporal modulations and energy tracking in videos: Application to interest point detection for action recognition Conference Proceedings - International Conference on Image Processing, ICIP, 2012, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{176, title = {Dominant spatio-temporal modulations and energy tracking in videos: Application to interest point detection for action recognition}, author = { Christos Georgakis and Petros Maragos and Georgios Evangelopoulos and Dimitrios Dimitriadis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/GMED_SpatioTemporModulationsEnergyTrackVideos-InterestPointDetectActionRecogn_ICIP2012.pdf}, doi = {10.1109/ICIP.2012.6466966}, issn = {15224880}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {741--744}, abstract = {The presence of multiband amplitude and frequency modulations (AM-FM) in wideband signals, such as textured images or speech, has led to the development of efficient multicomponent modulation models for low-level image and sound analysis. Moreover, compact yet descriptive representations have emerged by tracking, through non-linear energy operators, the dominant model components across time, space or frequency.In this paper, we propose a generalization of such approaches in the 3D spatio-temporal domain and explore the benefits of incorporating the Dominant Component Analysis scheme for interest point detection in videos for action recognition. Within this framework, actions are implicitly considered as manifestations of spatio-temporal oscillations in the dynamic visual stream. Multiband filtering and energy operators are applied to track the source energy in both spatial and temporal frequency bands. A new measure for extracting keypoint locations is formulated as the temporal dominant energy computed over the locally dominant modulation components, in terms of spatial modulation energy, of the input video frames. Theoretical formulation is supported by evaluation and comparisons in human action classification, which demonstrate the potential of the proposed detector.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The presence of multiband amplitude and frequency modulations (AM-FM) in wideband signals, such as textured images or speech, has led to the development of efficient multicomponent modulation models for low-level image and sound analysis. Moreover, compact yet descriptive representations have emerged by tracking, through non-linear energy operators, the dominant model components across time, space or frequency.In this paper, we propose a generalization of such approaches in the 3D spatio-temporal domain and explore the benefits of incorporating the Dominant Component Analysis scheme for interest point detection in videos for action recognition. Within this framework, actions are implicitly considered as manifestations of spatio-temporal oscillations in the dynamic visual stream. Multiband filtering and energy operators are applied to track the source energy in both spatial and temporal frequency bands. A new measure for extracting keypoint locations is formulated as the temporal dominant energy computed over the locally dominant modulation components, in terms of spatial modulation energy, of the input video frames. Theoretical formulation is supported by evaluation and comparisons in human action classification, which demonstrate the potential of the proposed detector. |
Nikos Mitsou, Roderick de Nijs, David Lenz, Johannes Frimberger, Dirk Wollherr, Kolja Kühnlenz, Costas Tzafestas Gesture and Sign Language in Human-Computer Interaction and Embodied Communication Conference Spatial Cognition VIII, 7206 , 2012, ISSN: 03029743. Abstract | BibTeX | Links: [Webpage] @conference{31b, title = {Gesture and Sign Language in Human-Computer Interaction and Embodied Communication}, author = { Nikos Mitsou and Roderick de Nijs and David Lenz and Johannes Frimberger and Dirk Wollherr and Kolja Kühnlenz and Costas Tzafestas}, url = {http://link.springer.com/10.1007/978-3-642-34182-3}, doi = {10.1007/978-3-642-34182-3}, issn = {03029743}, year = {2012}, date = {2012-01-01}, booktitle = {Spatial Cognition VIII}, volume = {7206}, pages = {54--73}, abstract = {We extend and develop an existing virtual agent system to generate communicative gestures for different embodiments (i.e. virtual or physical agents). This paper presents our ongoing work on an implementation of this system for the NAO humanoid robot. From a specification of multi-modal behaviors encoded with the behavior markup language, BML, the system synchronizes and realizes the verbal and nonverbal behaviors on the robot.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We extend and develop an existing virtual agent system to generate communicative gestures for different embodiments (i.e. virtual or physical agents). This paper presents our ongoing work on an implementation of this system for the NAO humanoid robot. From a specification of multi-modal behaviors encoded with the behavior markup language, BML, the system synchronizes and realizes the verbal and nonverbal behaviors on the robot. |
Anastasios Roussos, Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Hand tracking and affine shape-appearance handshape sub-units in continuous sign language recognition Conference Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 6553 LNCS (PART 1), 2012, ISSN: 03029743. Abstract | BibTeX | Links: [PDF] @conference{189, title = {Hand tracking and affine shape-appearance handshape sub-units in continuous sign language recognition}, author = { Anastasios Roussos and Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosTheodorakisPitsikalisMaragos_HandTrackingAffineSAHandshapeSUsCSLR_ECCV10-SGA.pdf}, doi = {10.1007/978-3-642-35749-7_20}, issn = {03029743}, year = {2012}, date = {2012-01-01}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {6553 LNCS}, number = {PART 1}, pages = {258--272}, abstract = {We propose and investigate a framework that utilizes novel aspects concerning probabilistic and morphological visual processing for the segmentation, tracking and handshape modeling of the hands, which is used as front-end for sign language video analysis. Our ultimate goal is to explore the automatic Handshape Sub-Unit (HSU) construction and moreover the exploitation of the overall system in automatic sign language recognition (ASLR). We employ probabilistic skin color detection followed by the proposed morphological algorithms and related shape filtering for fast and reliable segmentation of hands and head. This is then fed to our hand tracking system which emphasizes robust handling of occlusions based on forward-backward prediction and incorporation of probabilistic constraints. The tracking is exploited by an Affine-invariant Modeling of hand Shape-Appearance images, offering a compact and descriptive representation of the hand configurations. We further propose that the handshape features extracted via the fitting of this model are utilized to construct in an unsupervised way basic HSUs. We first provide intuitive results on the HSU to sign mapping and further quantitatively evaluate the integrated system and the constructed HSUs on ASLR experiments at the sub-unit and sign level. These are conducted on continuous SL data from the BU400 corpus and investigate the effect of the involved parameters. The experiments indicate the effectiveness of the overall approach and especially for the modeling of handshapes when incorporated in the HSU-based framework showing promising results}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose and investigate a framework that utilizes novel aspects concerning probabilistic and morphological visual processing for the segmentation, tracking and handshape modeling of the hands, which is used as front-end for sign language video analysis. Our ultimate goal is to explore the automatic Handshape Sub-Unit (HSU) construction and moreover the exploitation of the overall system in automatic sign language recognition (ASLR). We employ probabilistic skin color detection followed by the proposed morphological algorithms and related shape filtering for fast and reliable segmentation of hands and head. This is then fed to our hand tracking system which emphasizes robust handling of occlusions based on forward-backward prediction and incorporation of probabilistic constraints. The tracking is exploited by an Affine-invariant Modeling of hand Shape-Appearance images, offering a compact and descriptive representation of the hand configurations. We further propose that the handshape features extracted via the fitting of this model are utilized to construct in an unsupervised way basic HSUs. We first provide intuitive results on the HSU to sign mapping and further quantitatively evaluate the integrated system and the constructed HSUs on ASLR experiments at the sub-unit and sign level. These are conducted on continuous SL data from the BU400 corpus and investigate the effect of the involved parameters. The experiments indicate the effectiveness of the overall approach and especially for the modeling of handshapes when incorporated in the HSU-based framework showing promising results |
Sotirios Stasinopoulos, Petros Maragos Human action recognition using Histographic methods and hidden Markov models for visual martial arts applications Conference Image Processing (ICIP), 2012 19th IEEE International Conference on, 2012. @conference{179, title = {Human action recognition using Histographic methods and hidden Markov models for visual martial arts applications}, author = { Sotirios Stasinopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/StasinopoulosMaragos_ActionRecognHistogrHMMsVisualMartialArts_icip2012.pdf}, year = {2012}, date = {2012-01-01}, booktitle = {Image Processing (ICIP), 2012 19th IEEE International Conference on}, pages = {745--748}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stavros Theodorakis, Vassilis Pitsikalis, Isidoros Rodomagoulakis, Petros Maragos RECOGNITION WITH RAW CANONICAL PHONETIC MOVEMENT AND HANDSHAPE SUBUNITS ON VIDEOS OF CONTINUOUS SIGN LANGUAGE Conference Proc. IEEE Int'l Conf. on Image Processing, (Icip), 2012, ISBN: 9781467325332. @conference{177, title = {RECOGNITION WITH RAW CANONICAL PHONETIC MOVEMENT AND HANDSHAPE SUBUNITS ON VIDEOS OF CONTINUOUS SIGN LANGUAGE}, author = { Stavros Theodorakis and Vassilis Pitsikalis and Isidoros Rodomagoulakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/TPM_ContSignLangRecognRawCanonPhonSubunits_ICIP2012.pdf}, isbn = {9781467325332}, year = {2012}, date = {2012-01-01}, booktitle = {Proc. IEEE Int'l Conf. on Image Processing}, number = {Icip}, pages = {1413--1416}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2011 |
Dimitrios Dimitriadis, Petros Maragos, Alexandros Potamianos On the effects of filterbank design and energy computation on robust speech recognition Journal Article IEEE Transactions on Audio, Speech and Language Processing, 19 (6), pp. 1504–1516, 2011, ISSN: 15587916. Abstract | BibTeX | Links: [PDF] @article{137, title = {On the effects of filterbank design and energy computation on robust speech recognition}, author = {Dimitrios Dimitriadis and Petros Maragos and Alexandros Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMaragosPotamianos_Effects-Filterbank-Design-Energy-Computation-Robust-Speech-Recognition_ieeeTASLP_aug11.pdf}, doi = {10.1109/TASL.2010.2092766}, issn = {15587916}, year = {2011}, date = {2011-01-01}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {19}, number = {6}, pages = {1504--1516}, abstract = {In this paper, we examine how energy computation and filterbank design contribute to the overall front-end robustness, especially when the investigated features are applied to noisy speech signals, in mismatched training-testing conditions. In prior work (“Auditory Teager energy cepstrum coefficients for robust speech recognition,” D. Dimitriadis, P. Maragos, and A. Potamianos, in Proc. Eurospeech'05, Sep. 2005), a novel feature set called “Teager energy cepstrum coefficients” (TECCs) has been proposed, employing a dense, smooth filterbank and alternative energy computation schemes. TECCs were shown to be more robust to noise and exhibit improved performance compared to the widely used Mel frequency cepstral coefficients (MFCCs). In this paper, we attempt to interpret these results using a combined theoretical and experimental analysis framework. Specifically, we investigate in detail the connection between the filterbank design, i.e., the filter shape and bandwidth, the energy estimation scheme and the automatic speech recognition (ASR) performance under a variety of additive and/or convolutional noise conditions. For this purpose: 1) the performance of filterbanks using triangular, Gabor, and Gammatone filters with various bandwidths and filter positions are examined under different noisy speech recognition tasks, and 2) the squared amplitude and Teager–Kaiser energy operators are compared as two alternative approaches of computing the signal energy. Our end-goal is to understand how to select the most efficient filterbank and energy computation scheme that are maximally robust under both clean and noisy recording conditions. Theoretical and experimental results show that: 1) the filter bandwidth is one of the most important factors affecting speech recognition performance in noise, while the shape of the filter is of secondary importance, and 2) the Teager–Kaiser operator outperforms (on the average and for most noise types) the squared amplitude energy computation scheme for speech recognition in noisy conditions, especially, for large filter bandwidths. Experimental results show that selecting the appropriate filterbank and energy computation scheme can lead to significant error rate reduction over both MFCC and perceptual linear predicion (PLP) features for a variety of speech recognition tasks. A relative error rate reduction of up to textlessformula formulatype="inline"textgreatertextlesstex Notation="TeX"textgreater$backslashsimbackslashhbox 30backslashhboxbackslash%$textless/textextgreater textless/formulatextgreater for MFCCs and textlessformula formulatype="inline"textgreatertextlesstex Notation="TeX"textgreater$backslashsimbackslashhbox 39backslashhboxbackslash%$textless/textextgreatertextless/formulatextgreater for PLPs is shown for the Aurora-3 Spanish Task.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper, we examine how energy computation and filterbank design contribute to the overall front-end robustness, especially when the investigated features are applied to noisy speech signals, in mismatched training-testing conditions. In prior work (“Auditory Teager energy cepstrum coefficients for robust speech recognition,” D. Dimitriadis, P. Maragos, and A. Potamianos, in Proc. Eurospeech'05, Sep. 2005), a novel feature set called “Teager energy cepstrum coefficients” (TECCs) has been proposed, employing a dense, smooth filterbank and alternative energy computation schemes. TECCs were shown to be more robust to noise and exhibit improved performance compared to the widely used Mel frequency cepstral coefficients (MFCCs). In this paper, we attempt to interpret these results using a combined theoretical and experimental analysis framework. Specifically, we investigate in detail the connection between the filterbank design, i.e., the filter shape and bandwidth, the energy estimation scheme and the automatic speech recognition (ASR) performance under a variety of additive and/or convolutional noise conditions. For this purpose: 1) the performance of filterbanks using triangular, Gabor, and Gammatone filters with various bandwidths and filter positions are examined under different noisy speech recognition tasks, and 2) the squared amplitude and Teager–Kaiser energy operators are compared as two alternative approaches of computing the signal energy. Our end-goal is to understand how to select the most efficient filterbank and energy computation scheme that are maximally robust under both clean and noisy recording conditions. Theoretical and experimental results show that: 1) the filter bandwidth is one of the most important factors affecting speech recognition performance in noise, while the shape of the filter is of secondary importance, and 2) the Teager–Kaiser operator outperforms (on the average and for most noise types) the squared amplitude energy computation scheme for speech recognition in noisy conditions, especially, for large filter bandwidths. Experimental results show that selecting the appropriate filterbank and energy computation scheme can lead to significant error rate reduction over both MFCC and perceptual linear predicion (PLP) features for a variety of speech recognition tasks. A relative error rate reduction of up to textlessformula formulatype="inline"textgreatertextlesstex Notation="TeX"textgreater$backslashsimbackslashhbox 30backslashhboxbackslash%$textless/textextgreater textless/formulatextgreater for MFCCs and textlessformula formulatype="inline"textgreatertextlesstex Notation="TeX"textgreater$backslashsimbackslashhbox 39backslashhboxbackslash%$textless/textextgreatertextless/formulatextgreater for PLPs is shown for the Aurora-3 Spanish Task. |
Nikos Mitsou, Irene Ntoutsi, Dirk Wollherr, Costas Tzafestas, Hans Peter Kriegel Revealing cluster formation over huge volatile robotic data Conference Proceedings - IEEE International Conference on Data Mining, ICDM, 2011, ISSN: 15504786. Abstract | BibTeX | Links: [PDF] @conference{Mitsou2011, title = {Revealing cluster formation over huge volatile robotic data}, author = { Nikos Mitsou and Irene Ntoutsi and Dirk Wollherr and Costas Tzafestas and Hans Peter Kriegel}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/mitsou2011.pdf}, doi = {10.1109/ICDMW.2011.147}, issn = {15504786}, year = {2011}, date = {2011-12-01}, booktitle = {Proceedings - IEEE International Conference on Data Mining, ICDM}, pages = {450--457}, abstract = {In this paper, we propose a driven by the robotics field method for revealing global clusters over a fast, huge and volatile stream of robotic data. The stream comes from a mobile robot which autonomously navigates in an unknown environment perceiving it through its sensors. The sensor data arrives fast, is huge and evolves quickly over time as the robot explores the environment and observes new objects or new parts of already observed objects. To deal with the nature of data, we propose a grid-based algorithm that updates the grid structure and adjusts the so far built clusters online. Our method is capable of detecting object formations over time based on the partial observations of the robot at each time point. Experiments on real data verify the usefulness and efficiency of our method.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we propose a driven by the robotics field method for revealing global clusters over a fast, huge and volatile stream of robotic data. The stream comes from a mobile robot which autonomously navigates in an unknown environment perceiving it through its sensors. The sensor data arrives fast, is huge and evolves quickly over time as the robot explores the environment and observes new objects or new parts of already observed objects. To deal with the nature of data, we propose a grid-based algorithm that updates the grid structure and adjusts the so far built clusters online. Our method is capable of detecting object formations over time based on the partial observations of the robot at each time point. Experiments on real data verify the usefulness and efficiency of our method. |
A Zlatintsi, P Maragos Musical Instruments Signal Analysis and Recognition Using Fractal Features Conference Proc. European Signal Processing Conference, Barcelona, Spain, 2011. Abstract | BibTeX | Links: [PDF] @conference{ZlMa11, title = {Musical Instruments Signal Analysis and Recognition Using Fractal Features}, author = {A Zlatintsi and P Maragos}, url = {http://robotics.ntua.gr/wp-content/publications/ZlatintsiMaragos_MusicalInstrumentsMFD_EUSIPCO2011.pdf}, year = {2011}, date = {2011-08-01}, booktitle = {Proc. European Signal Processing Conference}, address = {Barcelona, Spain}, abstract = {Analyzing the structure of music signals at multiple time scales is of importance both for modeling music signals and their automatic computer-based recognition. In this paper we propose the multiscale fractal dimension prourl as a descriptor useful to quantify the multiscale complexity of the music waveform. We have experimentally found that this descriptor can discriminate several aspects among different music instruments. We compare the descriptiveness of our features against that of Mel frequency cepstral coefficients (MFCCs) using both static and dynamic classifiers, such as Gaussian mixture models (GMMs) and hidden Markov models (HMMs). The methods and features proposed in this paper are promising for music signal analysis and of direct applicability in large-scale music classification tasks.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Analyzing the structure of music signals at multiple time scales is of importance both for modeling music signals and their automatic computer-based recognition. In this paper we propose the multiscale fractal dimension prourl as a descriptor useful to quantify the multiscale complexity of the music waveform. We have experimentally found that this descriptor can discriminate several aspects among different music instruments. We compare the descriptiveness of our features against that of Mel frequency cepstral coefficients (MFCCs) using both static and dynamic classifiers, such as Gaussian mixture models (GMMs) and hidden Markov models (HMMs). The methods and features proposed in this paper are promising for music signal analysis and of direct applicability in large-scale music classification tasks. |
N Malandrakis, A Potamianos, G Evangelopoulos, A Zlatintsi A Supervised Approach to Movie Emotion Tracking Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, Prague, Czech Republic, 2011. Abstract | BibTeX | Links: [PDF] @conference{MPE+11, title = {A Supervised Approach to Movie Emotion Tracking}, author = {N Malandrakis and A Potamianos and G Evangelopoulos and A Zlatintsi}, url = {http://robotics.ntua.gr/wp-content/publications/Malandrakis+_movie_emotion_ICASSP11.pdf}, year = {2011}, date = {2011-05-01}, booktitle = {Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing}, address = {Prague, Czech Republic}, abstract = {In this paper, we present experiments on continuous time, continuous scale affective movie content recognition (emotion tracking). A major obstacle for emotion research has been the lack of appropriately annotated databases, limiting the potential for supervised algorithms. To that end we develop and present a database of movie affect, annotated in continuous time, on a continuous valence-arousal scale. Supervised learning methods are proposed to model the continuous affective response using hidden Markov Models (independent) in each dimension. These models classify each video frame into one of seven discrete categories (in each dimension); the discrete-valued curves are then converted to continuous values via spline interpolation. A variety of audio-visual features are investigated and an optimal feature set is selected. The potential of the method is experimentally verified on twelve 30-minute movie clips with good precision at a macroscopic level.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we present experiments on continuous time, continuous scale affective movie content recognition (emotion tracking). A major obstacle for emotion research has been the lack of appropriately annotated databases, limiting the potential for supervised algorithms. To that end we develop and present a database of movie affect, annotated in continuous time, on a continuous valence-arousal scale. Supervised learning methods are proposed to model the continuous affective response using hidden Markov Models (independent) in each dimension. These models classify each video frame into one of seven discrete categories (in each dimension); the discrete-valued curves are then converted to continuous values via spline interpolation. A variety of audio-visual features are investigated and an optimal feature set is selected. The potential of the method is experimentally verified on twelve 30-minute movie clips with good precision at a macroscopic level. |
I Rodomagoulakis, S Theodorakis, V Pitsikalis, P Maragos Experiments on global and local active appearance models for analysis of sign language facial expressions Conference 9th International Gesture łdots, 2011. BibTeX | Links: [Webpage] [PDF] @conference{Rodomagoulakis2011, title = {Experiments on global and local active appearance models for analysis of sign language facial expressions}, author = { I Rodomagoulakis and S Theodorakis and V Pitsikalis and P Maragos}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.414.7847&rep=rep1&type=pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/RodomagoulakisTheodorakisPitsikalisMaragos_ExperimsGlobalLocalActiveAppearanceModels-AnalSignLanguageFacialExpressions_GW2011.pdf}, year = {2011}, date = {2011-05-01}, booktitle = {9th International Gesture łdots}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
S Theodorakis, V Pitsikalis, P Maragos Advances in Dynamic-Static Integration of Manual Cues for Sign Language Recognition Conference łdots Gesture Workshop (GW 2011): Gesture in łdots, 2011. BibTeX | Links: [Webpage] [PDF] @conference{Theodorakis2011, title = {Advances in Dynamic-Static Integration of Manual Cues for Sign Language Recognition}, author = { S Theodorakis and V Pitsikalis and P Maragos}, url = {http://scholar.google.com.tr/scholar?start=250&q=%22sign+language+recognition%22&hl=en&as_sdt=0,5&as_ylo=2011#0 http://robotics.ntua.gr/wp-content/uploads/sites/2/TheodorakisPitsikalisMaragos_AdvancesDynamicStaticIntegrationManualCuesSignLanguageRecognition_GW2011.pdf}, year = {2011}, date = {2011-05-01}, booktitle = {łdots Gesture Workshop (GW 2011): Gesture in łdots}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Athanasia Zlatintsi, Petros Maragos MUSICAL INSTRUMENTS SIGNAL ANALYSIS AND RECOGNITION USING FRACTAL FEATURES Conference Proc. 19th European Signal Processing Conference (EUSIPCO-2011), (Eusipco), 2011. @conference{182, title = {MUSICAL INSTRUMENTS SIGNAL ANALYSIS AND RECOGNITION USING FRACTAL FEATURES}, author = { Athanasia Zlatintsi and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ZlatintsiMaragos_MusicalInstrumentsMFD_EUSIPCO2011.pdf}, year = {2011}, date = {2011-01-01}, booktitle = {Proc. 19th European Signal Processing Conference (EUSIPCO-2011)}, number = {Eusipco}, pages = {684--688}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Vassilis Pitsikalis, Stavros Theodorakis, Christian Vogler, Petros Maragos Advances in phonetics-based sub-unit modeling for transcription alignment and sign language recognition Conference IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops, 2011, ISSN: 21607508. Abstract | BibTeX | Links: [PDF] @conference{183, title = {Advances in phonetics-based sub-unit modeling for transcription alignment and sign language recognition}, author = { Vassilis Pitsikalis and Stavros Theodorakis and Christian Vogler and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PitsikalisTheodorakisVoglerMaragos_Advances-Phonetics-based-SubUnit-Modeling-Transcription-Alignmentand-SignLanguage-Recognition_CVPR2011Workshop.pdf}, doi = {10.1109/CVPRW.2011.5981681}, issn = {21607508}, year = {2011}, date = {2011-01-01}, booktitle = {IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops}, abstract = {We explore novel directions for incorporating phonetic transcriptions into sub-unit based statistical models for sign language recognition. First, we employ a new symbolic processing approach for converting sign language annotations, based on HamNoSys symbols, into structured sequences of labels according to the Posture-Detention-Transition-Steady Shift phonetic model. Next, we exploit these labels, and their correspondence with visual features to construct phonetics-based statistical sub-unit models. We also align these sequences, via the statistical sub-unit construction and decoding, to the visual data to extract time boundary information that they would lack otherwise. The resulting phonetic sub-units offer new perspectives for sign language analysis, phonetic modeling, and automatic recognition. We evaluate this approach via sign language recognition experiments on an extended Lemmas Corpus of Greek Sign Language, which results not only in improved performance compared to pure data-driven approaches, but also in meaningful phonetic sub-unit models that can be further exploited in interdisciplinary sign language analysis.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We explore novel directions for incorporating phonetic transcriptions into sub-unit based statistical models for sign language recognition. First, we employ a new symbolic processing approach for converting sign language annotations, based on HamNoSys symbols, into structured sequences of labels according to the Posture-Detention-Transition-Steady Shift phonetic model. Next, we exploit these labels, and their correspondence with visual features to construct phonetics-based statistical sub-unit models. We also align these sequences, via the statistical sub-unit construction and decoding, to the visual data to extract time boundary information that they would lack otherwise. The resulting phonetic sub-units offer new perspectives for sign language analysis, phonetic modeling, and automatic recognition. We evaluate this approach via sign language recognition experiments on an extended Lemmas Corpus of Greek Sign Language, which results not only in improved performance compared to pure data-driven approaches, but also in meaningful phonetic sub-unit models that can be further exploited in interdisciplinary sign language analysis. |
2010 |
John N. Karigiannis, Theodoros I. Rekatsinas, Costas S. Tzafestas Fuzzy rule based neuro-dynamic programming for mobile robot skill acquisition on the basis of a nested multi-agent architecture Conference 2010 IEEE International Conference on Robotics and Biomimetics, ROBIO 2010, 2010, ISBN: 9781424493173. Abstract | BibTeX | Links: [PDF] @conference{Karigiannis2010, title = {Fuzzy rule based neuro-dynamic programming for mobile robot skill acquisition on the basis of a nested multi-agent architecture}, author = { John N. Karigiannis and Theodoros I. Rekatsinas and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/karigiannis2010.pdf}, doi = {10.1109/ROBIO.2010.5723346}, isbn = {9781424493173}, year = {2010}, date = {2010-12-01}, booktitle = {2010 IEEE International Conference on Robotics and Biomimetics, ROBIO 2010}, pages = {312--319}, abstract = {Biologically inspired architectures that mimic the organizational structure of living organisms and in general frameworks that will improve the design of intelligent robots attract significant attention from the research community. Self-organization problems, intrinsic behaviors as well as effective learning and skill transfer processes in the context of robotic systems have been significantly investigated by researchers. Our work presents a new framework of developmental skill learning process by introducing a hierarchical nested multi-agent architecture. A neuro-dynamic learning mechanism employing function approximators in a fuzzified state-space is utilized, leading to a collaborative control scheme among the distributed agents engaged in a continuous space, which enables the multi-agent system to learn, over a period of time, how to perform sequences of continuous actions in a cooperative manner without any prior task model. The agents comprising the system manage to gain experience over the task that they collaboratively perform by continuously exploring and exploiting their state-to-action mapping space. For the specific problem setting, the proposed theoretical framework is employed in the case of two simulated e-Puck robots performing a collaborative box-pushing task. This task involves active cooperation between the robots in order to jointly push an object on a plane to a specified goal location. We should note that 1) there are no contact points specified for the two e-Pucks and 2) the shape of the object is indifferent. The actuated wheels of the mobile robots are considered as the independent agents that have to build up cooperative skills over time, in order for the robot to demonstrate intelligent behavior. Our goal in this experimental study is to evaluate both the proposed hierarchical multi-agent architecture, as well as the methodological control framework. Such a hierarchical multi-agent approach is envisioned to be highly scalable for the control of complex biologically inspired robot locomotion systems.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Biologically inspired architectures that mimic the organizational structure of living organisms and in general frameworks that will improve the design of intelligent robots attract significant attention from the research community. Self-organization problems, intrinsic behaviors as well as effective learning and skill transfer processes in the context of robotic systems have been significantly investigated by researchers. Our work presents a new framework of developmental skill learning process by introducing a hierarchical nested multi-agent architecture. A neuro-dynamic learning mechanism employing function approximators in a fuzzified state-space is utilized, leading to a collaborative control scheme among the distributed agents engaged in a continuous space, which enables the multi-agent system to learn, over a period of time, how to perform sequences of continuous actions in a cooperative manner without any prior task model. The agents comprising the system manage to gain experience over the task that they collaboratively perform by continuously exploring and exploiting their state-to-action mapping space. For the specific problem setting, the proposed theoretical framework is employed in the case of two simulated e-Puck robots performing a collaborative box-pushing task. This task involves active cooperation between the robots in order to jointly push an object on a plane to a specified goal location. We should note that 1) there are no contact points specified for the two e-Pucks and 2) the shape of the object is indifferent. The actuated wheels of the mobile robots are considered as the independent agents that have to build up cooperative skills over time, in order for the robot to demonstrate intelligent behavior. Our goal in this experimental study is to evaluate both the proposed hierarchical multi-agent architecture, as well as the methodological control framework. Such a hierarchical multi-agent approach is envisioned to be highly scalable for the control of complex biologically inspired robot locomotion systems. |
Spyros V Velanas, Costas S Tzafestas Human Telehaptic Perception of Stiffness using an Adaptive Impedance Re fl ection Bilateral Teleoperation Control Scheme Conference In: Proceedings of the IEEE International Workshop on Robot and Human Interactive Communication, ROMAN-2010, 2010, ISBN: 9781424479894. @conference{Velanas2010, title = {Human Telehaptic Perception of Stiffness using an Adaptive Impedance Re fl ection Bilateral Teleoperation Control Scheme}, author = { Spyros V Velanas and Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/velanas2010.pdf}, isbn = {9781424479894}, year = {2010}, date = {2010-09-01}, booktitle = {In: Proceedings of the IEEE International Workshop on Robot and Human Interactive Communication, ROMAN-2010}, pages = {21--26}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Vassilis Pitsikalis, Stavros Theodorakis, Petros Maragos Data-Driven Sub-Units and Modeling Structure for Continuous Sign Language Recognition with Multiple Cues Conference Proceedings of the Language Resources and Evaluation Conference Workshop on the Representation and Processing of Sign Languages : Corpora and Sign Languages Technologies, 2010. @conference{Pitsikalis2010, title = {Data-Driven Sub-Units and Modeling Structure for Continuous Sign Language Recognition with Multiple Cues}, author = { Vassilis Pitsikalis and Stavros Theodorakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Data-Driven_Sub-Units_and_Modeling_Structure_for_C.pdf}, year = {2010}, date = {2010-05-01}, booktitle = {Proceedings of the Language Resources and Evaluation Conference Workshop on the Representation and Processing of Sign Languages : Corpora and Sign Languages Technologies}, pages = {196--203}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Ioannis Gkioulekas, Georgios Evangelopoulos, Petros Maragos Spatial Bayesian surprise for image saliency and quality assessment Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{187, title = {Spatial Bayesian surprise for image saliency and quality assessment}, author = { Ioannis Gkioulekas and Georgios Evangelopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/GkioulekasEvangMaragos_SpatialBayesianSurpriseImageSaliency_ICIP10.pdf}, doi = {10.1109/ICIP.2010.5650991}, issn = {15224880}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {1081--1084}, abstract = {We propose an alternative interpretation of Bayesian surprise in the spatial domain, to account for saliency arising from contrast in image context. Our saliency formulation is integrated in three different application scenaria, with considerable improvements in performance: 1) visual attention prediction, validated using eye- and mouse-tracking data, 2) region of interest detection, to improve scale selection and localization, 3) image quality assessment to achieve better agreement with subjective human evaluations.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose an alternative interpretation of Bayesian surprise in the spatial domain, to account for saliency arising from contrast in image context. Our saliency formulation is integrated in three different application scenaria, with considerable improvements in performance: 1) visual attention prediction, validated using eye- and mouse-tracking data, 2) region of interest detection, to improve scale selection and localization, 3) image quality assessment to achieve better agreement with subjective human evaluations. |
John N Karigiannis, Theodoros I Rekatsinas, Costas S Tzafestas Hierarchical Multi-Agent Architecture employing TD ( $łambda$ ) Learning with Function Approximators for Robot Skill Acquisition Conference Architecture, 2010. @conference{36b, title = {Hierarchical Multi-Agent Architecture employing TD ( $łambda$ ) Learning with Function Approximators for Robot Skill Acquisition}, author = { John N Karigiannis and Theodoros I Rekatsinas and Costas S Tzafestas}, year = {2010}, date = {2010-01-01}, booktitle = {Architecture}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Anastasios Roussos, Petros Maragos Tensor-based image diffusions derived from generalizations of the total variation and beltrami functionals Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{188, title = {Tensor-based image diffusions derived from generalizations of the total variation and beltrami functionals}, author = { Anastasios Roussos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosMaragos_TensorBasedImageDiffusions_ICIP10.pdf}, doi = {10.1109/ICIP.2010.5653241}, issn = {15224880}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {4141--4144}, abstract = {We introduce a novel functional for vector-valued images that generalizes several variational methods, such as the Total Variation and Beltrami Functionals. This functional is based on the structure tensor that describes the geometry of image structures within the neighborhood of each point. We first generalize the Beltrami functional based on the image patches and using embeddings in high dimensional spaces. Proceeding to the most general form of the proposed functional, we prove that its minimization leads to a nonlinear anisotropic diffusion that is regularized, in the sense that its diffusion tensor contains convolutions with a kernel. Using this result we propose two novel diffusion methods, the Generalized Beltrami Flow and the Tensor Total Variation. These methods combine the advantages of the variational approaches with those of the tensor-based diffusion approaches.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We introduce a novel functional for vector-valued images that generalizes several variational methods, such as the Total Variation and Beltrami Functionals. This functional is based on the structure tensor that describes the geometry of image structures within the neighborhood of each point. We first generalize the Beltrami functional based on the image patches and using embeddings in high dimensional spaces. Proceeding to the most general form of the proposed functional, we prove that its minimization leads to a nonlinear anisotropic diffusion that is regularized, in the sense that its diffusion tensor contains convolutions with a kernel. Using this result we propose two novel diffusion methods, the Generalized Beltrami Flow and the Tensor Total Variation. These methods combine the advantages of the variational approaches with those of the tensor-based diffusion approaches. |
Anastasios Roussos, Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Affine-invariant modeling of shape-appearance images applied on sign language handshape classification Conference Proceedings - International Conference on Image Processing, ICIP, 2010, ISSN: 15224880. @conference{186, title = {Affine-invariant modeling of shape-appearance images applied on sign language handshape classification}, author = { Anastasios Roussos and Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Roussos+_AffineInvariantModelingSLHandshapeClassification_ICIP10.pdf}, doi = {10.1109/ICIP.2010.5651358}, issn = {15224880}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {1417--1420}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stavros Theodorakis, Vassilis Pitsikalis, Petros Maragos Model-level data-driven sub-units for signs in videos of continuous sign language Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2010, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{191, title = {Model-level data-driven sub-units for signs in videos of continuous sign language}, author = { Stavros Theodorakis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/TheodorakisPitsikalisMaragos_ModelDatadrivenSubunitsSignsVideoContSignLang_ICASSP2010.pdf}, doi = {10.1109/ICASSP.2010.5495875}, issn = {15206149}, year = {2010}, date = {2010-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {2262--2265}, abstract = {We investigate the issue of sign language automatic phonetic sub-unit modeling, that is completely data driven and without any prior phonetic information. A first step of visual processing leads to simple and effective region-based visual features. Prior to the sub-unit modeling we propose to employ a pronunciation clustering step with respect to each sign. Afterwards, for each sign and pronunciation group we find the time segmentation at the hidden Markov model (HMM) level. The models employed refer to movements as a sequence of dominant hand positions. The constructed segments are exploited explicitly at the model level via hierarchical clustering of HMMs and lead to the data-driven movement sub-unit construction. The constructed movement sub-units are evaluated in qualitative analysis experiments on data from the Boston University (BU)-400 American Sign Language corpus showing promising results. textcopyright2010 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We investigate the issue of sign language automatic phonetic sub-unit modeling, that is completely data driven and without any prior phonetic information. A first step of visual processing leads to simple and effective region-based visual features. Prior to the sub-unit modeling we propose to employ a pronunciation clustering step with respect to each sign. Afterwards, for each sign and pronunciation group we find the time segmentation at the hidden Markov model (HMM) level. The models employed refer to movements as a sequence of dominant hand positions. The constructed segments are exploited explicitly at the model level via hierarchical clustering of HMMs and lead to the data-driven movement sub-unit construction. The constructed movement sub-units are evaluated in qualitative analysis experiments on data from the Boston University (BU)-400 American Sign Language corpus showing promising results. textcopyright2010 IEEE. |
2009 |
Iasonas Kokkinos, Petros Maragos Synergy between object recognition and image segmentation using the expectation-maximization algorithm Journal Article IEEE Transactions on Pattern Analysis and Machine Intelligence, 31 (8), pp. 1486–1501, 2009, ISSN: 01628828. Abstract | BibTeX | Links: [PDF] @article{135, title = {Synergy between object recognition and image segmentation using the expectation-maximization algorithm}, author = {Iasonas Kokkinos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KokkinosMaragos_SynergyBetweenObjectRecognitionAndImageSegmentation_ieeetPAMI09.pdf}, doi = {10.1109/TPAMI.2008.158}, issn = {01628828}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {31}, number = {8}, pages = {1486--1501}, abstract = {In this work, we formulate the interaction between image segmentation and object recognition in the framework of the Expectation-Maximization (EM) algorithm. We consider segmentation as the assignment of image observations to object hypotheses and phrase it as the E-step, while the M-step amounts to fitting the object models to the observations. These two tasks are performed iteratively, thereby simultaneously segmenting an image and reconstructing it in terms of objects. We model objects using Active Appearance Models (AAMs) as they capture both shape and appearance variation. During the E-step, the fidelity of the AAM predictions to the image is used to decide about assigning observations to the object. For this, we propose two top-down segmentation algorithms. The first starts with an oversegmentation of the image and then softly assigns image segments to objects, as in the common setting of EM. The second uses curve evolution to minimize a criterion derived from the variational interpretation of EM and introduces AAMs as shape priors. For the M-step, we derive AAM fitting equations that accommodate segmentation information, thereby allowing for the automated treatment of occlusions. Apart from top-down segmentation results, we provide systematic experiments on object detection that validate the merits of our joint segmentation and recognition approach.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this work, we formulate the interaction between image segmentation and object recognition in the framework of the Expectation-Maximization (EM) algorithm. We consider segmentation as the assignment of image observations to object hypotheses and phrase it as the E-step, while the M-step amounts to fitting the object models to the observations. These two tasks are performed iteratively, thereby simultaneously segmenting an image and reconstructing it in terms of objects. We model objects using Active Appearance Models (AAMs) as they capture both shape and appearance variation. During the E-step, the fidelity of the AAM predictions to the image is used to decide about assigning observations to the object. For this, we propose two top-down segmentation algorithms. The first starts with an oversegmentation of the image and then softly assigns image segments to objects, as in the common setting of EM. The second uses curve evolution to minimize a criterion derived from the variational interpretation of EM and introduces AAMs as shape priors. For the M-step, we derive AAM fitting equations that accommodate segmentation information, thereby allowing for the automated treatment of occlusions. Apart from top-down segmentation results, we provide systematic experiments on object detection that validate the merits of our joint segmentation and recognition approach. |
Vassilis Pitsikalis, Petros Maragos Analysis and classification of speech signals by generalized fractal dimension features Journal Article Speech Communication, 51 (12), pp. 1206–1223, 2009, ISSN: 01676393. Abstract | BibTeX | Links: [PDF] @article{136, title = {Analysis and classification of speech signals by generalized fractal dimension features}, author = {Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PitsikalisMaragos_AnalysisClassificationfSpeechFractalDimFeat_SpeechCommunication09.pdf}, doi = {10.1016/j.specom.2009.06.005}, issn = {01676393}, year = {2009}, date = {2009-01-01}, journal = {Speech Communication}, volume = {51}, number = {12}, pages = {1206--1223}, abstract = {We explore nonlinear signal processing methods inspired by dynamical systems and fractal theory in order to analyze and characterize speech sounds. A speech signal is at first embedded in a multidimensional phase-space and further employed for the estimation of measurements related to the fractal dimensions. Our goals are to compute these raw measurements in the practical cases of speech signals, to further utilize them for the extraction of simple descriptive features and to address issues on the efficacy of the proposed features to characterize speech sounds. We observe that distinct feature vector elements obtain values or show statistical trends that on average depend on general characteristics such as the voicing, the manner and the place of articulation of broad phoneme classes. Moreover the way that the statistical parameters of the features are altered as an effect of the variation of phonetic characteristics seem to follow some roughly formed patterns. We also discuss some qualitative aspects concerning the linear phoneme-wise correlation between the fractal features and the commonly employed mel-frequency cepstral coefficients (MFCCs) demonstrating phonetic cases of maximal and minimal correlation. In the same context we also investigate the fractal features' spectral content, in terms of the most and least correlated components with the MFCC. Further the proposed methods are examined under the light of indicative phoneme classification experiments. These quantify the efficacy of the features to characterize broad classes of speech sounds. The results are shown to be comparable for some classification scenarios with the corresponding ones of the MFCC features. textcopyright 2009 Elsevier B.V. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We explore nonlinear signal processing methods inspired by dynamical systems and fractal theory in order to analyze and characterize speech sounds. A speech signal is at first embedded in a multidimensional phase-space and further employed for the estimation of measurements related to the fractal dimensions. Our goals are to compute these raw measurements in the practical cases of speech signals, to further utilize them for the extraction of simple descriptive features and to address issues on the efficacy of the proposed features to characterize speech sounds. We observe that distinct feature vector elements obtain values or show statistical trends that on average depend on general characteristics such as the voicing, the manner and the place of articulation of broad phoneme classes. Moreover the way that the statistical parameters of the features are altered as an effect of the variation of phonetic characteristics seem to follow some roughly formed patterns. We also discuss some qualitative aspects concerning the linear phoneme-wise correlation between the fractal features and the commonly employed mel-frequency cepstral coefficients (MFCCs) demonstrating phonetic cases of maximal and minimal correlation. In the same context we also investigate the fractal features' spectral content, in terms of the most and least correlated components with the MFCC. Further the proposed methods are examined under the light of indicative phoneme classification experiments. These quantify the efficacy of the features to characterize broad classes of speech sounds. The results are shown to be comparable for some classification scenarios with the corresponding ones of the MFCC features. textcopyright 2009 Elsevier B.V. All rights reserved. |
George Papandreou, Athanassios Katsamanis, Vassilis Pitsikalis, Petros Maragos Adaptive multimodal fusion by uncertainty compensation with application to audiovisual speech recognition Journal Article IEEE Transactions on Audio, Speech and Language Processing, 17 (3), pp. 423–435, 2009, ISSN: 15587916. Abstract | BibTeX | Links: [PDF] [Webpage] @article{131, title = {Adaptive multimodal fusion by uncertainty compensation with application to audiovisual speech recognition}, author = {George Papandreou and Athanassios Katsamanis and Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/PapandreouKatsamanisPitsikalisMaragos_MultimodalFusionUncertaintyCompensationAvasr_ieee-j-aslp09.pdf http://www.scopus.com/inward/record.url?eid=2-s2.0-44949227080&partnerID=40&md5=6edf7efa047e4239c0ea003cf525bf63}, doi = {10.1109/TASL.2008.2011515}, issn = {15587916}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {17}, number = {3}, pages = {423--435}, abstract = {While the accuracy of feature measurements heavily depends on changing environmental conditions, studying the consequences of this fact in pattern recognition tasks has received relatively little attention to date. In this paper, we explicitly take feature measurement uncertainty into account and show how multimodal classification and learning rules should be adjusted to compensate for its effects. Our approach is particularly fruitful in multimodal fusion scenarios, such as audiovisual speech recognition, where multiple streams of complementary time-evolving features are integrated. For such applications, provided that the measurement noise uncertainty for each feature stream can be estimated, the proposed framework leads to highly adaptive multimodal fusion rules which are easy and efficient to implement. Our technique is widely applicable and can be transparently integrated with either synchronous or asynchronous multimodal sequence integration architectures. We further show that multimodal fusion methods relying on stream weights can naturally emerge from our scheme under certain assumptions; this connection provides valuable insights into the adaptivity properties of our multimodal uncertainty compensation approach. We show how these ideas can be practically applied for audiovisual speech recognition. In this context, we propose improved techniques for person-independent visual feature extraction and uncertainty estimation with active appearance models, and also discuss how enhanced audio features along with their uncertainty estimates can be effectively computed. We demonstrate the efficacy of our approach in audiovisual speech recognition experiments on the CUAVE database using either synchronous or asynchronous multimodal integration models.}, keywords = {}, pubstate = {published}, tppubtype = {article} } While the accuracy of feature measurements heavily depends on changing environmental conditions, studying the consequences of this fact in pattern recognition tasks has received relatively little attention to date. In this paper, we explicitly take feature measurement uncertainty into account and show how multimodal classification and learning rules should be adjusted to compensate for its effects. Our approach is particularly fruitful in multimodal fusion scenarios, such as audiovisual speech recognition, where multiple streams of complementary time-evolving features are integrated. For such applications, provided that the measurement noise uncertainty for each feature stream can be estimated, the proposed framework leads to highly adaptive multimodal fusion rules which are easy and efficient to implement. Our technique is widely applicable and can be transparently integrated with either synchronous or asynchronous multimodal sequence integration architectures. We further show that multimodal fusion methods relying on stream weights can naturally emerge from our scheme under certain assumptions; this connection provides valuable insights into the adaptivity properties of our multimodal uncertainty compensation approach. We show how these ideas can be practically applied for audiovisual speech recognition. In this context, we propose improved techniques for person-independent visual feature extraction and uncertainty estimation with active appearance models, and also discuss how enhanced audio features along with their uncertainty estimates can be effectively computed. We demonstrate the efficacy of our approach in audiovisual speech recognition experiments on the CUAVE database using either synchronous or asynchronous multimodal integration models. |
I Kokkinos, G Evangelopoulos, P Maragos Texture Analysis and Segmentation Using Modulation Features, Generative Models, and Weighted Curve Evolution Journal Article IEEE Transactions on Pattern Analysis and Machine Intelligence, 31 (1), pp. 142-157, 2009, ISSN: 0162-8828. Abstract | BibTeX | Links: [PDF] @article{4447672, title = {Texture Analysis and Segmentation Using Modulation Features, Generative Models, and Weighted Curve Evolution}, author = {I Kokkinos and G Evangelopoulos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/KokkinosEvangelopoulosMaragos_TextureAnalSegmModulGenerativeCurvEvol_ieeetPAMI2009.pdf}, doi = {10.1109/TPAMI.2008.33}, issn = {0162-8828}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {31}, number = {1}, pages = {142-157}, abstract = {In this work we approach the analysis and segmentation of natural textured images by combining ideas from image analysis and probabilistic modeling. We rely on AM-FM texture models and specifically on the Dominant Component Analysis (DCA) paradigm for feature extraction. This method provides a low-dimensional, dense and smooth descriptor, capturing essential aspects of texture, namely scale, orientation, and contrast. Our contributions are at three levels of the texture analysis and segmentation problems: First, at the feature extraction stage we propose a regularized demodulation algorithm that provides more robust texture features and explore the merits of modifying the channel selection criterion of DCA. Second, we propose a probabilistic interpretation of DCA and Gabor filtering in general, in terms of Local Generative Models. Extending this point of view to edge detection facilitates the estimation of posterior probabilities for the edge and texture classes. Third, we propose the weighted curve evolution scheme that enhances the Region Competition/ Geodesic Active Regions methods by allowing for the locally adaptive fusion of heterogeneous cues. Our segmentation results are evaluated on the Berkeley Segmentation Benchmark, and compare favorably to current state-of-the-art methods.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this work we approach the analysis and segmentation of natural textured images by combining ideas from image analysis and probabilistic modeling. We rely on AM-FM texture models and specifically on the Dominant Component Analysis (DCA) paradigm for feature extraction. This method provides a low-dimensional, dense and smooth descriptor, capturing essential aspects of texture, namely scale, orientation, and contrast. Our contributions are at three levels of the texture analysis and segmentation problems: First, at the feature extraction stage we propose a regularized demodulation algorithm that provides more robust texture features and explore the merits of modifying the channel selection criterion of DCA. Second, we propose a probabilistic interpretation of DCA and Gabor filtering in general, in terms of Local Generative Models. Extending this point of view to edge detection facilitates the estimation of posterior probabilities for the edge and texture classes. Third, we propose the weighted curve evolution scheme that enhances the Region Competition/ Geodesic Active Regions methods by allowing for the locally adaptive fusion of heterogeneous cues. Our segmentation results are evaluated on the Berkeley Segmentation Benchmark, and compare favorably to current state-of-the-art methods. |
Athanassios Katsamanis, George Papandreou, Petros Maragos Face active appearance modeling and speech acoustic information to recover articulation Journal Article IEEE Transactions on Audio, Speech and Language Processing, 17 (3), pp. 411–422, 2009, ISSN: 15587916. Abstract | BibTeX | Links: [PDF] @article{130, title = {Face active appearance modeling and speech acoustic information to recover articulation}, author = {Athanassios Katsamanis and George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisPapandreouMaragos_AudiovisualSpeechInversion_ieee-j-aslp09.pdf}, doi = {10.1109/TASL.2008.2008740}, issn = {15587916}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {17}, number = {3}, pages = {411--422}, abstract = {We are interested in recovering aspects of vocal tract's geometry$backslash$nand dynamics from speech, a problem referred to as speech inversion.$backslash$nTraditional audio-only speech inversion techniques are inherently$backslash$nill-posed since the same speech acoustics can be produced by multiple$backslash$narticulatory configurations. To alleviate the ill-posedness of the$backslash$naudio-only inversion process, we propose an inversion scheme which$backslash$nalso exploits visual information from the speaker's face. The complex$backslash$naudiovisual-to-articulatory mapping is approximated by an adaptive$backslash$npiecewise linear model. Model switching is governed by a Markovian$backslash$ndiscrete process which captures articulatory dynamic information.$backslash$nEach constituent linear mapping is effectively estimated via canonical$backslash$ncorrelation analysis. In the described multimodal context, we investigate$backslash$nalternative fusion schemes which allow interaction between the audio$backslash$nand visual modalities at various synchronization levels. For facial$backslash$nanalysis, we employ active appearance models (AAMs) and demonstrate$backslash$nfully automatic face tracking and visual feature extraction. Using$backslash$nthe AAM features in conjunction with audio features such as Mel frequency$backslash$ncepstral coefficients (MFCCs) or line spectral frequencies (LSFs)$backslash$nleads to effective estimation of the trajectories followed by certain$backslash$npoints of interest in the speech production system. We report experiments$backslash$non the QSMT and MOCHA databases which contain audio, video, and electromagnetic$backslash$narticulography data recorded in parallel. The results show that exploiting$backslash$nboth audio and visual modalities in a multistream hidden Markov model$backslash$nbased scheme clearly improves performance relative to either audio$backslash$nor visual-only estimation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We are interested in recovering aspects of vocal tract's geometry$backslash$nand dynamics from speech, a problem referred to as speech inversion.$backslash$nTraditional audio-only speech inversion techniques are inherently$backslash$nill-posed since the same speech acoustics can be produced by multiple$backslash$narticulatory configurations. To alleviate the ill-posedness of the$backslash$naudio-only inversion process, we propose an inversion scheme which$backslash$nalso exploits visual information from the speaker's face. The complex$backslash$naudiovisual-to-articulatory mapping is approximated by an adaptive$backslash$npiecewise linear model. Model switching is governed by a Markovian$backslash$ndiscrete process which captures articulatory dynamic information.$backslash$nEach constituent linear mapping is effectively estimated via canonical$backslash$ncorrelation analysis. In the described multimodal context, we investigate$backslash$nalternative fusion schemes which allow interaction between the audio$backslash$nand visual modalities at various synchronization levels. For facial$backslash$nanalysis, we employ active appearance models (AAMs) and demonstrate$backslash$nfully automatic face tracking and visual feature extraction. Using$backslash$nthe AAM features in conjunction with audio features such as Mel frequency$backslash$ncepstral coefficients (MFCCs) or line spectral frequencies (LSFs)$backslash$nleads to effective estimation of the trajectories followed by certain$backslash$npoints of interest in the speech production system. We report experiments$backslash$non the QSMT and MOCHA databases which contain audio, video, and electromagnetic$backslash$narticulography data recorded in parallel. The results show that exploiting$backslash$nboth audio and visual modalities in a multistream hidden Markov model$backslash$nbased scheme clearly improves performance relative to either audio$backslash$nor visual-only estimation. |
Dimitrios Dimitriadis, Alexandros Potamianos, Petros Maragos A comparison of the squared energy and teager-kaiser operators for short-term energy estimation in additive noise Journal Article IEEE Transactions on Signal Processing, 57 (7), pp. 2569–2581, 2009, ISSN: 1053587X. Abstract | BibTeX | Links: [PDF] @article{132, title = {A comparison of the squared energy and teager-kaiser operators for short-term energy estimation in additive noise}, author = {Dimitrios Dimitriadis and Alexandros Potamianos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisPotamianosMaragos_ComparisonSquaredAmpl-TKOper-EnergyEstimation_ieeetSP2008.pdf}, doi = {10.1109/TSP.2009.2019299}, issn = {1053587X}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Signal Processing}, volume = {57}, number = {7}, pages = {2569--2581}, abstract = {Time-frequency distributions that evaluate the signal's energy content both in the time and frequency domains are indispensable signal processing tools, especially, for nonstationary signals. Various short-time energy computation schemes are used in practice, including the mean squared amplitude and Teager-Kaiser energy approaches. Herein, we focus primarily on the short- and medium-term properties of these two energy estimation schemes, as well as, on their performance in the presence of additive noise. To facilitate this analysis and generalize the approach, we use a harmonic noise model to approximate the noise component. The error analysis is conducted both in the continuous- and discrete-time domains, deriving similar conclusions. The estimation errors are measured in terms of normalized deviations from the expected signal energy and are shown to greatly depend on both the signals' spectral content and the analysis window length. When medium- and long-term analysis windows are employed, the Teager-Kaiser energy operator is proven superior to the common squared energy operator, provided that the spectral content of the noise is more lowpass than the corresponding signal content, and vice versa. However, for shorter window lengths, the Teager-Kaiser operator always outperforms the squared energy operator. The theoretical results are experimentally verified for synthetic signals. Finally, the performance of the proposed energy operators is evaluated for short-term analysis of noisy speech signals and the implications for speech processing applications are outlined.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Time-frequency distributions that evaluate the signal's energy content both in the time and frequency domains are indispensable signal processing tools, especially, for nonstationary signals. Various short-time energy computation schemes are used in practice, including the mean squared amplitude and Teager-Kaiser energy approaches. Herein, we focus primarily on the short- and medium-term properties of these two energy estimation schemes, as well as, on their performance in the presence of additive noise. To facilitate this analysis and generalize the approach, we use a harmonic noise model to approximate the noise component. The error analysis is conducted both in the continuous- and discrete-time domains, deriving similar conclusions. The estimation errors are measured in terms of normalized deviations from the expected signal energy and are shown to greatly depend on both the signals' spectral content and the analysis window length. When medium- and long-term analysis windows are employed, the Teager-Kaiser energy operator is proven superior to the common squared energy operator, provided that the spectral content of the noise is more lowpass than the corresponding signal content, and vice versa. However, for shorter window lengths, the Teager-Kaiser operator always outperforms the squared energy operator. The theoretical results are experimentally verified for synthetic signals. Finally, the performance of the proposed energy operators is evaluated for short-term analysis of noisy speech signals and the implications for speech processing applications are outlined. |
Anastasios Roussos, Petros Maragos Reversible interpolation of vectorial images by an anisotropic diffusion-projection PDE Journal Article International Journal of Computer Vision, 84 (2), pp. 130–145, 2009, ISSN: 09205691. Abstract | BibTeX | Links: [PDF] @article{133, title = {Reversible interpolation of vectorial images by an anisotropic diffusion-projection PDE}, author = {Anastasios Roussos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosMaragos_InterpolAnisoDiffProjPDE_IJCV09.pdf}, doi = {10.1007/s11263-008-0132-x}, issn = {09205691}, year = {2009}, date = {2009-01-01}, journal = {International Journal of Computer Vision}, volume = {84}, number = {2}, pages = {130--145}, abstract = {In this paper, a nonlinear model for the interpo- lation of vector-valued images is proposed. This model is based on an anisotropic diffusion PDE and performs an in- terpolation that is reversible. The interpolation solution is restricted to the subspace of functions that can recover the discrete input image, after an appropriate smoothing and sampling. The proposed nonlinear diffusion flow lies on this subspace while its strength and anisotropy adapt to the lo- cal variations and geometry of image structures. The derived method effectively reconstructs the real image structures and yields a satisfactory interpolation result. Compared to clas- sic and other existing PDE-based interpolationmethods, our proposed method seems to increase the accuracy of the re- sult and to reduce the undesirable artifacts, such as blurring, ringing, block effects and edge distortion.We present exten- sive experimental results that demonstrate the potential of the method as applied to graylevel and color images.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper, a nonlinear model for the interpo- lation of vector-valued images is proposed. This model is based on an anisotropic diffusion PDE and performs an in- terpolation that is reversible. The interpolation solution is restricted to the subspace of functions that can recover the discrete input image, after an appropriate smoothing and sampling. The proposed nonlinear diffusion flow lies on this subspace while its strength and anisotropy adapt to the lo- cal variations and geometry of image structures. The derived method effectively reconstructs the real image structures and yields a satisfactory interpolation result. Compared to clas- sic and other existing PDE-based interpolationmethods, our proposed method seems to increase the accuracy of the re- sult and to reduce the undesirable artifacts, such as blurring, ringing, block effects and edge distortion.We present exten- sive experimental results that demonstrate the potential of the method as applied to graylevel and color images. |
Stamatios Lefkimmiatis, Petros Maragos, George Papandreou Bayesian inference on multiscale models for poisson intensity estimation: Applications to photon-limited image denoising Journal Article IEEE Transactions on Image Processing, 18 (8), pp. 1724–1741, 2009, ISSN: 10577149. Abstract | BibTeX | Links: [PDF] @article{134, title = {Bayesian inference on multiscale models for poisson intensity estimation: Applications to photon-limited image denoising}, author = {Stamatios Lefkimmiatis and Petros Maragos and George Papandreou}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LefkimmiatisMaragosPapandreou_BayesianMultiscalePoissonIntensityEstimation_ieee-j-ip09.pdf}, doi = {10.1109/TIP.2009.2022008}, issn = {10577149}, year = {2009}, date = {2009-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {18}, number = {8}, pages = {1724--1741}, abstract = {We present an improved statistical model for analyzing Poisson processes, with applications to photon-limited imaging. We build on previous work, adopting a multiscale representation of the Poisson process in which the ratios of the underlying Poisson intensities (rates) in adjacent scales are modeled as mixtures of conjugate parametric distributions. Our main contributions include: 1) a rigorous and robust regularized expectation-maximization (EM) algorithm for maximum-likelihood estimation of the rate-ratio density parameters directly from the noisy observed Poisson data (counts); 2) extension of the method to work under a multiscale hidden Markov tree model (HMT) which couples the mixture label assignments in consecutive scales, thus modeling interscale coefficient dependencies in the vicinity of image edges; 3) exploration of a 2-D recursive quad-tree image representation, involving Dirichlet-mixture rate-ratio densities, instead of the conventional separable binary-tree image representation involving beta-mixture rate-ratio densities; and 4) a novel multiscale image representation, which we term Poisson-Haar decomposition, that better models the image edge structure, thus yielding improved performance. Experimental results on standard images with artificially simulated Poisson noise and on real photon-limited images demonstrate the effectiveness of the proposed techniques.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We present an improved statistical model for analyzing Poisson processes, with applications to photon-limited imaging. We build on previous work, adopting a multiscale representation of the Poisson process in which the ratios of the underlying Poisson intensities (rates) in adjacent scales are modeled as mixtures of conjugate parametric distributions. Our main contributions include: 1) a rigorous and robust regularized expectation-maximization (EM) algorithm for maximum-likelihood estimation of the rate-ratio density parameters directly from the noisy observed Poisson data (counts); 2) extension of the method to work under a multiscale hidden Markov tree model (HMT) which couples the mixture label assignments in consecutive scales, thus modeling interscale coefficient dependencies in the vicinity of image edges; 3) exploration of a 2-D recursive quad-tree image representation, involving Dirichlet-mixture rate-ratio densities, instead of the conventional separable binary-tree image representation involving beta-mixture rate-ratio densities; and 4) a novel multiscale image representation, which we term Poisson-Haar decomposition, that better models the image edge structure, thus yielding improved performance. Experimental results on standard images with artificially simulated Poisson noise and on real photon-limited images demonstrate the effectiveness of the proposed techniques. |
C. Tzafestas, N. Mitsou, N. Georgakarakos, O. Diamanti, P. Maragos, S. E. Fotinea, E. Efthimiou Gestural teleoperation of a mobile robot based on visual recognition of sign language static handshapes Conference Proceedings - IEEE International Workshop on Robot and Human Interactive Communication, 2009, ISSN: 19449445. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas2009, title = {Gestural teleoperation of a mobile robot based on visual recognition of sign language static handshapes}, author = { C. Tzafestas and N. Mitsou and N. Georgakarakos and O. Diamanti and P. Maragos and S. E. Fotinea and E. Efthimiou}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/tzafestas2009.pdf}, doi = {10.1109/ROMAN.2009.5326235}, issn = {19449445}, year = {2009}, date = {2009-09-01}, booktitle = {Proceedings - IEEE International Workshop on Robot and Human Interactive Communication}, pages = {1073--1079}, abstract = {This paper presents results achieved in the frames of a national research project (titled ldquoDIANOEMArdquo), where visual analysis and sign recognition techniques have been explored on Greek Sign Language (GSL) data. Besides GSL modelling, the aim was to develop a pilot application for teleoperating a mobile robot using natural hand signs. A small vocabulary of hand signs has been designed to enable desktopbased teleoperation at a high-level of supervisory telerobotic control. Real-time visual recognition of the hand images is performed by training a multi-layer perceptron (MLP) neural network. Various shape descriptors of the segmented hand posture images have been explored as inputs to the MLP network. These include Fourier shape descriptors on the contour of the segmented hand sign images, moments, compactness, eccentricity, and histogram of the curvature. We have examined which of these shape descriptors are best suited for real-time recognition of hand signs, in relation to the number and choice of hand postures, in order to achieve maximum recognition performance. The hand-sign recognizer has been integrated in a graphical user interface, and has been implemented with success on a pilot application for real-time desktop-based gestural teleoperation of a mobile robot vehicle.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper presents results achieved in the frames of a national research project (titled ldquoDIANOEMArdquo), where visual analysis and sign recognition techniques have been explored on Greek Sign Language (GSL) data. Besides GSL modelling, the aim was to develop a pilot application for teleoperating a mobile robot using natural hand signs. A small vocabulary of hand signs has been designed to enable desktopbased teleoperation at a high-level of supervisory telerobotic control. Real-time visual recognition of the hand images is performed by training a multi-layer perceptron (MLP) neural network. Various shape descriptors of the segmented hand posture images have been explored as inputs to the MLP network. These include Fourier shape descriptors on the contour of the segmented hand sign images, moments, compactness, eccentricity, and histogram of the curvature. We have examined which of these shape descriptors are best suited for real-time recognition of hand signs, in relation to the number and choice of hand postures, in order to achieve maximum recognition performance. The hand-sign recognizer has been integrated in a graphical user interface, and has been implemented with success on a pilot application for real-time desktop-based gestural teleoperation of a mobile robot vehicle. |
G Evangelopoulos, A Zlatintsi, G Skoumas, K Rapantzikos, A Potamianos, P Maragos, Y Avrithis Video Event Detection and Summarization Using Audio, Visual and Text Saliency Conference Taipei, Taiwan, 2009. Abstract | BibTeX | Links: [PDF] @conference{EZS+09, title = {Video Event Detection and Summarization Using Audio, Visual and Text Saliency}, author = {G Evangelopoulos and A Zlatintsi and G Skoumas and K Rapantzikos and A Potamianos and P Maragos and Y Avrithis}, url = {http://robotics.ntua.gr/wp-content/publications/EvangelopoulosZlatintsiEtAl_VideoEventDetectionSummarizationUsingAVTSaliency_ICASSP09.pdf}, year = {2009}, date = {2009-04-01}, address = {Taipei, Taiwan}, abstract = {Detection of perceptually important video events is formulated here on the basis of saliency models for the audio, visual and textual information conveyed in a video stream. Audio saliency is assessed by cues that quantify multifrequency waveform modulations, extracted through nonlinear operators and energy tracking. Visual saliency is measured through a spatiotemporal attention model driven by intensity, color and motion. Text saliency is extracted from part-of-speech tagging on the subtitles information available with most movie distributions. The various modality curves are integrated in a single attention curve, where the presence of an event may be signised in one or multiple domains. This multimodal saliency curve is the basis of a bottom-up video summarization algorithm, that refines results from unimodal or audiovisual-based skimming. The algorithm performs favorably for video summarization in terms of informativeness and enjoyability.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Detection of perceptually important video events is formulated here on the basis of saliency models for the audio, visual and textual information conveyed in a video stream. Audio saliency is assessed by cues that quantify multifrequency waveform modulations, extracted through nonlinear operators and energy tracking. Visual saliency is measured through a spatiotemporal attention model driven by intensity, color and motion. Text saliency is extracted from part-of-speech tagging on the subtitles information available with most movie distributions. The various modality curves are integrated in a single attention curve, where the presence of an event may be signised in one or multiple domains. This multimodal saliency curve is the basis of a bottom-up video summarization algorithm, that refines results from unimodal or audiovisual-based skimming. The algorithm performs favorably for video summarization in terms of informativeness and enjoyability. |
M. Alifragis, C.S. Tzafestas Stereo pair matching of archaeological scenes using phase domain methods Conference IMAGAPP 2009 - Proceedings of the 1st International Conference on Computer Imaging Theory and Applications, 2009, ISBN: 9789898111685. Abstract | BibTeX | Links: [PDF] @conference{Alifragis2009, title = {Stereo pair matching of archaeological scenes using phase domain methods}, author = { M. Alifragis and C.S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Alifragis2009.pdf}, isbn = {9789898111685}, year = {2009}, date = {2009-01-01}, booktitle = {IMAGAPP 2009 - Proceedings of the 1st International Conference on Computer Imaging Theory and Applications}, abstract = {This paper conducts an experimental study on the application of some recent theories of image preprocessing and analysis in the frequency domain, particularly the phase congruency and monogenic filtering methods. Our goal was to examine the performance of such methods in a stereo matching problem setting, with photos of complicated scenes. Two objects were used: a scene of an ancient Greek temple of Acropolis and the out-side scene of the gate of an ancient theatre. Due to the complex structure of the photographed object, classic techniques used for feature detection and matching give poor results. The phase-domain approach follovved in this paper is based on the phase-congruency method for feature extraction, together with monogenic filtering and a new correlation measure in the frequency domain for image correspondence and stereo matching. Com-parative results show that the three-dimensional models of the scene computed when applying these phase domain methods are much more detailed and consistent as compared to the models obtained when using clas-sic approaches or the SIFT based techniques, which give poor depth representation and less accurate metric information.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper conducts an experimental study on the application of some recent theories of image preprocessing and analysis in the frequency domain, particularly the phase congruency and monogenic filtering methods. Our goal was to examine the performance of such methods in a stereo matching problem setting, with photos of complicated scenes. Two objects were used: a scene of an ancient Greek temple of Acropolis and the out-side scene of the gate of an ancient theatre. Due to the complex structure of the photographed object, classic techniques used for feature detection and matching give poor results. The phase-domain approach follovved in this paper is based on the phase-congruency method for feature extraction, together with monogenic filtering and a new correlation measure in the frequency domain for image correspondence and stereo matching. Com-parative results show that the three-dimensional models of the scene computed when applying these phase domain methods are much more detailed and consistent as compared to the models obtained when using clas-sic approaches or the SIFT based techniques, which give poor depth representation and less accurate metric information. |
D. Dimitriadis, A. Metallinou, I. Konstantinou, G. Goumas, P. Maragos, N. Koziris GridNews: A distributed automatic Greek broadcast transcription system Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2009, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{196, title = {GridNews: A distributed automatic Greek broadcast transcription system}, author = { D. Dimitriadis and A. Metallinou and I. Konstantinou and G. Goumas and P. Maragos and N. Koziris}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMetallinouEtAl_Gridnews-AutomaticGreekTranscriptionSystem_ICASSP09.pdf}, doi = {10.1109/ICASSP.2009.4959984}, issn = {15206149}, year = {2009}, date = {2009-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {1917--1920}, abstract = {In this paper, a distributed system storing and retrieving Broad-cast News data recorded from the Greek television is presented. These multimodal data are processed in a grid computational en-vironment interconnecting distributed data storage and processing subsystems. The innovative element of this system is the implemen-tation of the signal processing algorithms in this grid environment, offering additional flexibility and computational power. Among the developed signal processing modules are: the Segmentor, cutting up the original videos into shorter ones, the Classifier, recognizing whether these short videos contain speech or not, the Greek large-vocabulary speech Recognizer, transcribing speech into written text, and finally the text Search engine and the video Retriever. All the processed data are stored and retrieved in geographically distributed storage elements. A user-friendly, web-based interface is developed, facilitating the transparent import and storage of new multimodal data, their off-line processing and finally, their search and retrieval.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, a distributed system storing and retrieving Broad-cast News data recorded from the Greek television is presented. These multimodal data are processed in a grid computational en-vironment interconnecting distributed data storage and processing subsystems. The innovative element of this system is the implemen-tation of the signal processing algorithms in this grid environment, offering additional flexibility and computational power. Among the developed signal processing modules are: the Segmentor, cutting up the original videos into shorter ones, the Classifier, recognizing whether these short videos contain speech or not, the Greek large-vocabulary speech Recognizer, transcribing speech into written text, and finally the text Search engine and the video Retriever. All the processed data are stored and retrieved in geographically distributed storage elements. A user-friendly, web-based interface is developed, facilitating the transparent import and storage of new multimodal data, their off-line processing and finally, their search and retrieval. |
G Evangelopoulos, A Zlatintsi, G Skoumas, K Rapantzikos, A Potamianos, P Maragos, Y Avrithis Video Event Detection and Summarization using Audio, Visual and Text Saliency Conference Icassp, (2), 2009, ISBN: 9781424423545. @conference{195, title = {Video Event Detection and Summarization using Audio, Visual and Text Saliency}, author = { G Evangelopoulos and A Zlatintsi and G Skoumas and K Rapantzikos and A Potamianos and P Maragos and Y Avrithis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EvangelopoulosZlatintsiEtAl_VideoEventDetectionSummarizationUsingAVTSaliency_ICASSP09.pdf}, isbn = {9781424423545}, year = {2009}, date = {2009-01-01}, booktitle = {Icassp}, number = {2}, pages = {3553--3556}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
S.E. Fotinea, Eleni Efthimiou, George Caridakis, Olga Diamanti, N. Mitsou, K. Karpouzis, C. Tzafestas, P. Maragos DIANOEMA: Visual analysis and sign recognition for GSL modelling and robot teleoperation Conference Language and Speech, 2009. BibTeX | Links: [Webpage] [PDF] @conference{Fotinea2009, title = {DIANOEMA: Visual analysis and sign recognition for GSL modelling and robot teleoperation}, author = { S.E. Fotinea and Eleni Efthimiou and George Caridakis and Olga Diamanti and N. Mitsou and K. Karpouzis and C. Tzafestas and P. Maragos}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.149.1666&rep=rep1&type=pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Dianoema_2009.pdf}, year = {2009}, date = {2009-01-01}, booktitle = {Language and Speech}, pages = {2--4}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stamatios Lefkimmiatis, George Papandreou, Petros Maragos Poisson-haar transform: A nonlinear multiscale representation for photon-limited image denoising Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{192, title = {Poisson-haar transform: A nonlinear multiscale representation for photon-limited image denoising}, author = { Stamatios Lefkimmiatis and George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LefkimmiatisPapandreouMaragos_PoissonHaarTransform_icip09.pdf}, doi = {10.1109/ICIP.2009.5414053}, issn = {15224880}, year = {2009}, date = {2009-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {3853--3856}, abstract = {We present a novel multiscale image representation belonging to the class of multiscale multiplicative decompositions, which we term Poisson-Haar transform. The proposed representation is well-suited for analyzing images degraded by signal-dependent Poisson noise, allowing efficient estimation of their underlying intensity by means of multiscale Bayesian schemes. The Poisson-Haar decomposition has a direct link to the standard 2-D Haar wavelet transform, thus retaining many of the properties that have made wavelets successful in signal processing and analysis. The practical relevance and effectiveness of the proposed approach is verified through denoising experiments on simulated and real-world photon-limited images.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We present a novel multiscale image representation belonging to the class of multiscale multiplicative decompositions, which we term Poisson-Haar transform. The proposed representation is well-suited for analyzing images degraded by signal-dependent Poisson noise, allowing efficient estimation of their underlying intensity by means of multiscale Bayesian schemes. The Poisson-Haar decomposition has a direct link to the standard 2-D Haar wavelet transform, thus retaining many of the properties that have made wavelets successful in signal processing and analysis. The practical relevance and effectiveness of the proposed approach is verified through denoising experiments on simulated and real-world photon-limited images. |
Petros Maragos, Corinne Vachier Overview of adaptive morphology: Trends and perspectives Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{194, title = {Overview of adaptive morphology: Trends and perspectives}, author = { Petros Maragos and Corinne Vachier}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosVachier_OverviewOfAdaptiveMorphology_ICIP09.pdf}, doi = {10.1109/ICIP.2009.5413961}, issn = {15224880}, year = {2009}, date = {2009-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {2241--2244}, abstract = {In this paper we briefly overview emerging trends in `Adaptive Morphology', i.e. work related to the theory and/or applications of image analysis filters, systems, or algorithms based on mathematical morphology, that are adaptive w.r.t. to space or intensity or use any other adaptive scheme. We present a new classification of work in this area structured along several major theoretical perspectives. We then sample specific approaches that develop spatially-variant structuring elements or intensity level-adaptive operators, modeled and implemented either via conventional nonlinear digital filtering or via geometric PDEs. Finally, we discuss some applications.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we briefly overview emerging trends in `Adaptive Morphology', i.e. work related to the theory and/or applications of image analysis filters, systems, or algorithms based on mathematical morphology, that are adaptive w.r.t. to space or intensity or use any other adaptive scheme. We present a new classification of work in this area structured along several major theoretical perspectives. We then sample specific approaches that develop spatially-variant structuring elements or intensity level-adaptive operators, modeled and implemented either via conventional nonlinear digital filtering or via geometric PDEs. Finally, we discuss some applications. |
Anastasios Roussos, Athanassios Katsamanis, Petros Maragos Tongue tracking in ultrasound images with active appearance models Conference Proceedings - International Conference on Image Processing, ICIP, 2009, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{193, title = {Tongue tracking in ultrasound images with active appearance models}, author = { Anastasios Roussos and Athanassios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosKatsamanisMaragos_TongueTrackingInUltrrasoundImages_ICIP09.pdf}, doi = {10.1109/ICIP.2009.5414520}, issn = {15224880}, year = {2009}, date = {2009-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {1733--1736}, abstract = {Tongue Ultrasound imaging is widely used for human speech production analysis and modeling. In this paper, we propose a novel method to automatically detect and track the tongue contour in Ultrasound (US) videos. Our method is built on a variant of Active Appearance Modeling. It incorporates shape prior information and can estimate the entire tongue contour robustly and accurately in a sequence of US frames. Experimental evaluation demonstrates the effectiveness of our approach and its improved performance compared to previously proposed tongue tracking techniques.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Tongue Ultrasound imaging is widely used for human speech production analysis and modeling. In this paper, we propose a novel method to automatically detect and track the tongue contour in Ultrasound (US) videos. Our method is built on a variant of Active Appearance Modeling. It incorporates shape prior information and can estimate the entire tongue contour robustly and accurately in a sequence of US frames. Experimental evaluation demonstrates the effectiveness of our approach and its improved performance compared to previously proposed tongue tracking techniques. |
Stavros Theodorakis, Athanassios Katsamanis, Petros Maragos Product-HMMS for automatic sign language recognition Conference ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, 2009, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{197, title = {Product-HMMS for automatic sign language recognition}, author = { Stavros Theodorakis and Athanassios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/TheodorakisKatsamanisMaragos_ProductHMM-AutomaticSignLanguageRecogn_ICASSP2009.pdf}, doi = {10.1109/ICASSP.2009.4959905}, issn = {15206149}, year = {2009}, date = {2009-01-01}, booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}, pages = {1601--1604}, abstract = {We address multistream sign language recognition and focus on ef- ficient multistream integration schemes. Alternative approaches are investigated and the application of Product-HMMs (PHMM) is pro- posed. The PHMM is a variant of the general multistream HMM that also allows for partial asynchrony between the streams. Exper- iments in classification and isolated sign recognition for the Greek Sign Language using different fusion methods, show that the PH- MMs perform the best. Fusing movement and shape information with the PHMMs has increased sign classification performance by 1,2% in comparison to the Parallel HMM fusion model. Isolated sign recognition rate increased by 8,3% over movement only mod- els and by 1,5% over movement-shape models using multistream HMMs. Index}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We address multistream sign language recognition and focus on ef- ficient multistream integration schemes. Alternative approaches are investigated and the application of Product-HMMs (PHMM) is pro- posed. The PHMM is a variant of the general multistream HMM that also allows for partial asynchrony between the streams. Exper- iments in classification and isolated sign recognition for the Greek Sign Language using different fusion methods, show that the PH- MMs perform the best. Fusing movement and shape information with the PHMMs has increased sign classification performance by 1,2% in comparison to the Parallel HMM fusion model. Isolated sign recognition rate increased by 8,3% over movement only mod- els and by 1,5% over movement-shape models using multistream HMMs. Index |
C Tzafestas Telehaptics: issues of control stability and human perception in remote kineasthetic exploration Conference 18th IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN 2009), Workshop on Robot-Human Synergies, Sept. 27 - Oct. 2 2009, 2009. @conference{38b, title = {Telehaptics: issues of control stability and human perception in remote kineasthetic exploration}, author = { C Tzafestas}, year = {2009}, date = {2009-01-01}, booktitle = {18th IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN 2009), Workshop on Robot-Human Synergies, Sept. 27 - Oct. 2 2009}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2008 |
Costas S Tzafestas, Kostas Birbas, Yiannis Koumpouros, Dimitri Christopoulos Pilot Evaluation Study of a Virtual Paracentesis Simulator for Skill Training and Assessment: The Beneficial Effect of Haptic Display Journal Article Presence: Teleoperators & Virtual Environments, 17 (2), pp. 212–229, 2008, ISBN: 10547460. Abstract | BibTeX | Links: [Webpage] [PDF] @article{Tzafestas2008, title = {Pilot Evaluation Study of a Virtual Paracentesis Simulator for Skill Training and Assessment: The Beneficial Effect of Haptic Display}, author = {Costas S Tzafestas and Kostas Birbas and Yiannis Koumpouros and Dimitri Christopoulos}, url = {http://ezproxy.lib.swin.edu.au/login?url=http://search.ebscohost.com/login.aspx?direct=true&db=a9h&AN=31446380&site=ehost-live&scope=site http://robotics.ntua.gr/wp-content/uploads/sites/2/tzafestas2008.pdf}, isbn = {10547460}, year = {2008}, date = {2008-04-01}, journal = {Presence: Teleoperators & Virtual Environments}, volume = {17}, number = {2}, pages = {212--229}, abstract = {Effective, real-time training of health care professionals in invasive procedures is a challenging task. Furthermore, assessing in practice the acquisition of the dexterity and skills required to safely perform such operations is particularly difficult to perform objectively and reliably. The development of virtual reality (VR) simulators offers great potential toward these objectives, and can help bypass some of the difficulties associated with classical surgical training and assessment procedures. In this context, we have developed a prototype VR simulator platform for training in a class of invasive procedures, such as accessing central vessels. This paper focuses more particularly on a pilot study treating the specific application case of subclavian vein paracentesis. The simulation incorporates 3D models of all the human anatomy structures involved in this procedure, where collision detection and response algorithms are implemented to simulate most of the potential complications in accordance with the situations encountered in real clinical practice. Furthermore, haptic display is integrated using a typical force feedback device providing the user with a sense of touch during the simulated operations. Our main objective in this study was to obtain quantitative evaluation results regarding the effect of haptic display on performance. Two user groups participated in the study: (I) novice users and (II) experienced surgeons. The system automatically provides quantitative assessment scores of users' performance, applying a set of objective measures that also involve the optimality of the needle insertion path and indicators of maneuvering errors. Training and skill assessment performance of the system is evaluated in a twofold manner, regarding respectively: (a) the learning curve of novice users, and (b) the correlation of the system-generated scores with the actual surgical experience of the user. These performance indicators are assessed with respect to the activation of the haptic display and to whether this has any beneficial effect (or not). The experimental findings of this first pilot study provide quantitative evidence about the significance of haptic display, not only as a means to enhance the realism of the surgical simulation, but especially as an irreplaceable component for achieving objective and reliable skill assessment. Further larger-scale and long-term clinical studies are needed to validate the effectiveness of such platforms for actual training and dexterity enhancement, particularly when more complex sensorimotor skills are involved. [ABSTRACT FROM AUTHOR] Copyright of Presence: Teleoperators & Virtual Environments is the property of MIT Press and its content may not be copied or emailed to multiple sites or posted to a listserv without the copyright holder's express written permission. However, users may print, download, or email articles for individual use. This abstract may be abridged. No warranty is given about the accuracy of the copy. Users should refer to the original published version of the material for the full abstract. (Copyright applies to all Abstracts.)}, keywords = {}, pubstate = {published}, tppubtype = {article} } Effective, real-time training of health care professionals in invasive procedures is a challenging task. Furthermore, assessing in practice the acquisition of the dexterity and skills required to safely perform such operations is particularly difficult to perform objectively and reliably. The development of virtual reality (VR) simulators offers great potential toward these objectives, and can help bypass some of the difficulties associated with classical surgical training and assessment procedures. In this context, we have developed a prototype VR simulator platform for training in a class of invasive procedures, such as accessing central vessels. This paper focuses more particularly on a pilot study treating the specific application case of subclavian vein paracentesis. The simulation incorporates 3D models of all the human anatomy structures involved in this procedure, where collision detection and response algorithms are implemented to simulate most of the potential complications in accordance with the situations encountered in real clinical practice. Furthermore, haptic display is integrated using a typical force feedback device providing the user with a sense of touch during the simulated operations. Our main objective in this study was to obtain quantitative evaluation results regarding the effect of haptic display on performance. Two user groups participated in the study: (I) novice users and (II) experienced surgeons. The system automatically provides quantitative assessment scores of users' performance, applying a set of objective measures that also involve the optimality of the needle insertion path and indicators of maneuvering errors. Training and skill assessment performance of the system is evaluated in a twofold manner, regarding respectively: (a) the learning curve of novice users, and (b) the correlation of the system-generated scores with the actual surgical experience of the user. These performance indicators are assessed with respect to the activation of the haptic display and to whether this has any beneficial effect (or not). The experimental findings of this first pilot study provide quantitative evidence about the significance of haptic display, not only as a means to enhance the realism of the surgical simulation, but especially as an irreplaceable component for achieving objective and reliable skill assessment. Further larger-scale and long-term clinical studies are needed to validate the effectiveness of such platforms for actual training and dexterity enhancement, particularly when more complex sensorimotor skills are involved. [ABSTRACT FROM AUTHOR] Copyright of Presence: Teleoperators & Virtual Environments is the property of MIT Press and its content may not be copied or emailed to multiple sites or posted to a listserv without the copyright holder's express written permission. However, users may print, download, or email articles for individual use. This abstract may be abridged. No warranty is given about the accuracy of the copy. Users should refer to the original published version of the material for the full abstract. (Copyright applies to all Abstracts.) |
Anastasia Sofou, Petros Maragos Generalized flooding and multicue PDE-based image segmentation Journal Article IEEE Transactions on Image Processing, 17 (3), pp. 364–376, 2008, ISSN: 10577149. Abstract | BibTeX | Links: [PDF] @article{126, title = {Generalized flooding and multicue PDE-based image segmentation}, author = {Anastasia Sofou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SofouMaragos_GeneralizedFloodingMulticuePDEBasedImageSegm_ImageProc08.pdf}, doi = {10.1109/TIP.2007.916156}, issn = {10577149}, year = {2008}, date = {2008-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {17}, number = {3}, pages = {364--376}, abstract = {Image segmentation remains an important, but hard-to-solve, problem since it appears to be application dependent with usually no a priori information available regarding the image structure. Moreover, the increasing demands of image analysis tasks in terms of segmentation results' quality introduce the necessity of employing multiple cues for improving image segmentation results. In this paper, we attempt to incorporate cues such as intensity contrast, region size, and texture in the segmentation procedure and derive improved results compared to using individual cues separately. We emphasize on the overall segmentation procedure, and we propose efficient simplification operators and feature extraction schemes, capable of quantifying important characteristics, like geometrical complexity, rate of change in local contrast variations, and orientation, that eventually favor the final segmentation result. Based on the well-known morphological paradigm of watershed transform segmentation, which exploits intensity contrast and region size criteria, we investigate its partial differential equation (PDE) formulation, and we extend it in order to satisfy various flooding criteria, thus making it applicable to a wider range of images. Going a step further, we introduce a segmentation scheme that couples contrast criteria in flooding with texture information. The modeling of the proposed scheme is done via PDEs and the efficient incorporation of the available contrast and texture information, is done by selecting an appropriate cartoon-texture image decomposition scheme. The proposed coupled segmentation scheme is driven by two separate image components: cartoon U (for contrast information) and texture component V. The performance of the proposed segmentation scheme is demonstrated through a complete set of experimental results and substantiated using quantitative and qualitative criteria.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Image segmentation remains an important, but hard-to-solve, problem since it appears to be application dependent with usually no a priori information available regarding the image structure. Moreover, the increasing demands of image analysis tasks in terms of segmentation results' quality introduce the necessity of employing multiple cues for improving image segmentation results. In this paper, we attempt to incorporate cues such as intensity contrast, region size, and texture in the segmentation procedure and derive improved results compared to using individual cues separately. We emphasize on the overall segmentation procedure, and we propose efficient simplification operators and feature extraction schemes, capable of quantifying important characteristics, like geometrical complexity, rate of change in local contrast variations, and orientation, that eventually favor the final segmentation result. Based on the well-known morphological paradigm of watershed transform segmentation, which exploits intensity contrast and region size criteria, we investigate its partial differential equation (PDE) formulation, and we extend it in order to satisfy various flooding criteria, thus making it applicable to a wider range of images. Going a step further, we introduce a segmentation scheme that couples contrast criteria in flooding with texture information. The modeling of the proposed scheme is done via PDEs and the efficient incorporation of the available contrast and texture information, is done by selecting an appropriate cartoon-texture image decomposition scheme. The proposed coupled segmentation scheme is driven by two separate image components: cartoon U (for contrast information) and texture component V. The performance of the proposed segmentation scheme is demonstrated through a complete set of experimental results and substantiated using quantitative and qualitative criteria. |
I Kokkinos, R Deriche, O Faugeras, P Maragos Computational Analysis and Learning for a Biologically Motivated Model of Boundary Detection and Image Segmentation Journal Article Neurocomputing, 71 (10-12), pp. 1798–1812, 2008. @article{127, title = {Computational Analysis and Learning for a Biologically Motivated Model of Boundary Detection and Image Segmentation}, author = {I Kokkinos and R Deriche and O Faugeras and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KokkinosDericheFaugersMaragos_Neurocomputing_2008_preprint.pdf}, year = {2008}, date = {2008-01-01}, journal = {Neurocomputing}, volume = {71}, number = {10-12}, pages = {1798--1812}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Margarita Kotti, Dimitrios Ververidis, Georgios Evangelopoulos, Ioannis Panagakis, Constantine Kotropoulos, Petros Maragos, Ioannis Pitas Audio-assisted movie dialogue detection Journal Article IEEE Transactions on Circuits and Systems for Video Technology, 18 (11), pp. 1618–1627, 2008, ISSN: 10518215. Abstract | BibTeX | Links: [PDF] @article{128, title = {Audio-assisted movie dialogue detection}, author = {Margarita Kotti and Dimitrios Ververidis and Georgios Evangelopoulos and Ioannis Panagakis and Constantine Kotropoulos and Petros Maragos and Ioannis Pitas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Kotti_Audio-Assisted-Movie-Dialogue-Detection_ieeeTCSVT2008.pdf}, doi = {10.1109/TCSVT.2008.2005613}, issn = {10518215}, year = {2008}, date = {2008-01-01}, journal = {IEEE Transactions on Circuits and Systems for Video Technology}, volume = {18}, number = {11}, pages = {1618--1627}, abstract = {An audio-assisted system is investigated that detects if a movie scene is a dialogue or not. The system is based on actor indicator functions. That is, functions which define if an actor speaks at a certain time instant. In particular, the cross-correlation and the magnitude of the corresponding the cross-power spectral density of a pair of indicator functions are input to various classifiers, such as voted perceptrons, radial basis function networks, random trees, and support vector machines for dialogue/non-dialogue detection. To boost classifier efficiency AdaBoost is also exploited. The aforementioned classifiers are trained using ground truth indicator functions determined by human annotators for 41 dialogue and another 20 non-dialogue audio instances. For testing, actual indicator functions are derived by applying audio activity detection and actor clustering to audio recordings. 23 instances are randomly chosen among the aforementioned 41 dialogue instances, 17 of which correspond to dialogue scenes and 6 to non-dialogue ones. Accuracy ranging between 0.739 and 0.826 is reported.}, keywords = {}, pubstate = {published}, tppubtype = {article} } An audio-assisted system is investigated that detects if a movie scene is a dialogue or not. The system is based on actor indicator functions. That is, functions which define if an actor speaks at a certain time instant. In particular, the cross-correlation and the magnitude of the corresponding the cross-power spectral density of a pair of indicator functions are input to various classifiers, such as voted perceptrons, radial basis function networks, random trees, and support vector machines for dialogue/non-dialogue detection. To boost classifier efficiency AdaBoost is also exploited. The aforementioned classifiers are trained using ground truth indicator functions determined by human annotators for 41 dialogue and another 20 non-dialogue audio instances. For testing, actual indicator functions are derived by applying audio activity detection and actor clustering to audio recordings. 23 instances are randomly chosen among the aforementioned 41 dialogue instances, 17 of which correspond to dialogue scenes and 6 to non-dialogue ones. Accuracy ranging between 0.739 and 0.826 is reported. |
A Katsamanis, A Roussos, P Maragos, M Aron, M.-O. Berger Inversion from audiovisual speech to articulatory information by exploiting multimodal data Conference Proceedings of ISSP 2008 - 8th International Seminar on Speech Production, 2008. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Katsamanis2008, title = {Inversion from audiovisual speech to articulatory information by exploiting multimodal data}, author = { A Katsamanis and A Roussos and P Maragos and M Aron and M.-O. Berger}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84902379110&partnerID=40&md5=e9c293b717940a725c6d5239569fab7d http://robotics.ntua.gr/wp-content/uploads/sites/2/KatsamanisRoussosMaragosAronBerger_AVInversionMultimodalArtData_ISSP2008.pdf}, year = {2008}, date = {2008-12-01}, booktitle = {Proceedings of ISSP 2008 - 8th International Seminar on Speech Production}, pages = {301--304}, abstract = {We present an inversion framework to identify speech production properties from audiovisual information. Our system is built on a multimodal articulatory dataset comprising ultrasound, X-ray, magnetic resonance images, electromagnetic articulography data as well as audio and stereovisual recordings of the speaker. Visual information is captured via stereovision while the vocal tract state is represented by a properly trained articulatory model. The audiovisual-to-articulation relationship is approximated by an adaptive piecewise linear mapping. The presented system can recover the hidden vocal tract shapes and may serve as a basis for a more widely applicable inversion setup.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We present an inversion framework to identify speech production properties from audiovisual information. Our system is built on a multimodal articulatory dataset comprising ultrasound, X-ray, magnetic resonance images, electromagnetic articulography data as well as audio and stereovisual recordings of the speaker. Visual information is captured via stereovision while the vocal tract state is represented by a properly trained articulatory model. The audiovisual-to-articulation relationship is approximated by an adaptive piecewise linear mapping. The presented system can recover the hidden vocal tract shapes and may serve as a basis for a more widely applicable inversion setup. |
Manthos Alifragis, Andreas Mantelos, Costas S. Tzafestas Web-based remote and virtual programming console of the V+ robotic system Conference IECON Proceedings (Industrial Electronics Conference), 2008, ISSN: 1553-572X. Abstract | BibTeX | Links: [PDF] @conference{Alifragis2008, title = {Web-based remote and virtual programming console of the V+ robotic system}, author = { Manthos Alifragis and Andreas Mantelos and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/alifragis2008.pdf}, doi = {10.1109/IECON.2008.4758534}, issn = {1553-572X}, year = {2008}, date = {2008-11-01}, booktitle = {IECON Proceedings (Industrial Electronics Conference)}, pages = {3551--3555}, abstract = {The objective of practical training is a major issue in students education, in many engineering disciplines. The access to specialized technological equipment for education is often limited by specific time restriction, or not provided at all. Therefore, the benefits by providing a Web-based platform for remote experimentation via LAN or Internet are evident. This paper describes the development of an e-laboratory platform intending to be used as a distance training system in the field of robotic task planning (e.g. programming of a robotic pick and place task). In prior work, this platform was evaluated by training students remotely to implement robotic tasks, using the robotpsilas Teach Pendant. This paper is focusing on the design of a training platform, aiming to make students familiar with the Vtextlesssuptextgreater+textless/suptextgreater robotic operating system. The proposed platform intends to remotely provide the students with the ability of programming robotic manipulation tasks using directly Vtextlesssuptextgreater+textless/suptextgreater scripts. An evaluation protocol, presented in [11], [12], is considered to be employed in the near future, in order to assess the performance of the proposed e-laboratory platform, with respect to the level of students learning and assimilating of the robotpsilas programming language (Vtextlesssuptextgreater+textless/suptextgreater).}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The objective of practical training is a major issue in students education, in many engineering disciplines. The access to specialized technological equipment for education is often limited by specific time restriction, or not provided at all. Therefore, the benefits by providing a Web-based platform for remote experimentation via LAN or Internet are evident. This paper describes the development of an e-laboratory platform intending to be used as a distance training system in the field of robotic task planning (e.g. programming of a robotic pick and place task). In prior work, this platform was evaluated by training students remotely to implement robotic tasks, using the robotpsilas Teach Pendant. This paper is focusing on the design of a training platform, aiming to make students familiar with the Vtextlesssuptextgreater+textless/suptextgreater robotic operating system. The proposed platform intends to remotely provide the students with the ability of programming robotic manipulation tasks using directly Vtextlesssuptextgreater+textless/suptextgreater scripts. An evaluation protocol, presented in [11], [12], is considered to be employed in the near future, in order to assess the performance of the proposed e-laboratory platform, with respect to the level of students learning and assimilating of the robotpsilas programming language (Vtextlesssuptextgreater+textless/suptextgreater). |
Olga Diamanti, Petros Maragos Geodesic active regions for segmentation and tracking of human gestures in sign language videos Conference 2008 15th IEEE International Conference on Image Processing, 2008, ISSN: 1522-4880. BibTeX | Links: [Webpage] [PDF] @conference{Diamanti2008, title = {Geodesic active regions for segmentation and tracking of human gestures in sign language videos}, author = { Olga Diamanti and Petros Maragos}, url = {http://ieeexplore.ieee.org/document/4711950/ http://robotics.ntua.gr/wp-content/uploads/sites/2/DiamantiMaragos_GeodesicActiveRegionsForSegmentTrackSignLanguage_ICIP08.pdf}, doi = {10.1109/ICIP.2008.4711950}, issn = {1522-4880}, year = {2008}, date = {2008-10-01}, booktitle = {2008 15th IEEE International Conference on Image Processing}, pages = {1096--1099}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
John N. Karigiannis, Costas S. Tzafestas Multi-agent hierarchical architecture modeling kinematic chains employing continuous RL learning with fuzzified state space Conference Proceedings of the 2nd Biennial IEEE/RAS-EMBS International Conference on Biomedical Robotics and Biomechatronics, BioRob 2008, 2008, ISBN: 9781424428830. Abstract | BibTeX | Links: [PDF] @conference{Karigiannis2008, title = {Multi-agent hierarchical architecture modeling kinematic chains employing continuous RL learning with fuzzified state space}, author = { John N. Karigiannis and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/karigiannis2008.pdf}, doi = {10.1109/BIOROB.2008.4762862}, isbn = {9781424428830}, year = {2008}, date = {2008-10-01}, booktitle = {Proceedings of the 2nd Biennial IEEE/RAS-EMBS International Conference on Biomedical Robotics and Biomechatronics, BioRob 2008}, pages = {716--723}, abstract = {In the context of multi-agent systems, we are proposing a hierarchical robot control architecture that comprises artificial intelligence (AI) techniques and traditional control methodologies, based on the realization of a learning team of agents in a continuous problem setting. In a multiagent system, action selection is important for cooperation and coordination among the agents. By employing reinforcement learning (RL) methods in a fuzzified state-space, we accomplish to design a control architecture and a corresponding methodology, engaged in a continuous space, which enables the agents to learn, over a period of time, to perform sequences of continuous actions in a cooperative manner, in order to reach their goal without any prior generated task model. By organizing the agents in a nested architecture, as proposed in this work, a type of problem-specific recursive knowledge acquisition is attempted. Furthermore, the agents try to exploit the knowledge gathered in order to be in position to execute tasks that indicate certain degree of similarity. The agents correspond in fact to independent degrees of freedom of the system, and achieve to gain experience over the task that they collaboratively perform, by exploring and exploiting their state-to-action mapping space. A numerical experiment is presented in this paper, performed on a simulated planar 4 degrees of freedom (DOF) manipulator, in order to evaluate both the proposed hierarchical multiagent architecture as well as the proposed methodological framework. It is anticipated that such an approach can be highly scalable for the control of robotic systems that are kinematically more complex, comprising multiple DOFs and potentially redundancies in open or closed kinematic chains, particularly dexterous manipulators. textcopyright 2008 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In the context of multi-agent systems, we are proposing a hierarchical robot control architecture that comprises artificial intelligence (AI) techniques and traditional control methodologies, based on the realization of a learning team of agents in a continuous problem setting. In a multiagent system, action selection is important for cooperation and coordination among the agents. By employing reinforcement learning (RL) methods in a fuzzified state-space, we accomplish to design a control architecture and a corresponding methodology, engaged in a continuous space, which enables the agents to learn, over a period of time, to perform sequences of continuous actions in a cooperative manner, in order to reach their goal without any prior generated task model. By organizing the agents in a nested architecture, as proposed in this work, a type of problem-specific recursive knowledge acquisition is attempted. Furthermore, the agents try to exploit the knowledge gathered in order to be in position to execute tasks that indicate certain degree of similarity. The agents correspond in fact to independent degrees of freedom of the system, and achieve to gain experience over the task that they collaboratively perform, by exploring and exploiting their state-to-action mapping space. A numerical experiment is presented in this paper, performed on a simulated planar 4 degrees of freedom (DOF) manipulator, in order to evaluate both the proposed hierarchical multiagent architecture as well as the proposed methodological framework. It is anticipated that such an approach can be highly scalable for the control of robotic systems that are kinematically more complex, comprising multiple DOFs and potentially redundancies in open or closed kinematic chains, particularly dexterous manipulators. textcopyright 2008 IEEE. |
G Evangelopoulos, K Rapantzikos, A Potamianos, P Maragos, A Zlatintsi, Y Avrithis Movie Summarization based on Audiovisual Saliency Detection Conference Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing, San Diego, CA, U.S.A., 2008. Abstract | BibTeX | Links: [PDF] @conference{ERP+08, title = {Movie Summarization based on Audiovisual Saliency Detection}, author = {G Evangelopoulos and K Rapantzikos and A Potamianos and P Maragos and A Zlatintsi and Y Avrithis}, url = {http://robotics.ntua.gr/wp-content/publications/EvangelopoulosRapantzikosEtAl_MovieSum_ICIP2008_fancyhead.pdf}, year = {2008}, date = {2008-10-01}, booktitle = {Proc. {IEEE} Int'l Conf. Acous., Speech, and Signal Processing}, address = {San Diego, CA, U.S.A.}, abstract = {Based on perceptual and computational attention modeling studies, we formulate measures of saliency for an audiovisual stream. Audio saliency is captured by signal modulations and related multi-frequency band features, extracted through nonlinear operators and energy tracking. Visual saliency is measured by means of a spatiotemporal attention model driven by various feature cues (intensity, color, motion). Audio and video curves are integrated in a single attention curve, where events may be enhanced, suppressed or vanished. The presence of salient events is signified on this audiovisual curve by geometrical features such as local extrema, sharp transition points and level sets. An audiovisual saliency-based movie summarization algorithm is proposed and evaluated. The algorithm is shown to perform very well in terms of summary informativeness and enjoyability for movie clips of various genres.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Based on perceptual and computational attention modeling studies, we formulate measures of saliency for an audiovisual stream. Audio saliency is captured by signal modulations and related multi-frequency band features, extracted through nonlinear operators and energy tracking. Visual saliency is measured by means of a spatiotemporal attention model driven by various feature cues (intensity, color, motion). Audio and video curves are integrated in a single attention curve, where events may be enhanced, suppressed or vanished. The presence of salient events is signified on this audiovisual curve by geometrical features such as local extrema, sharp transition points and level sets. An audiovisual saliency-based movie summarization algorithm is proposed and evaluated. The algorithm is shown to perform very well in terms of summary informativeness and enjoyability for movie clips of various genres. |
George Caridakis, Olga Diamanti, Kostas Karpouzis, Petros Maragos Automatic sign language recognition Conference Proceedings of the 1st ACM international conference on PErvasive Technologies Related to Assistive Environments - PETRA '08, 2008, ISBN: 9781605580678. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Caridakis2008, title = {Automatic sign language recognition}, author = { George Caridakis and Olga Diamanti and Kostas Karpouzis and Petros Maragos}, url = {http://portal.acm.org/citation.cfm?doid=1389586.1389687 http://robotics.ntua.gr/wp-content/uploads/sites/2/Caridakis2008.pdf}, doi = {10.1145/1389586.1389687}, isbn = {9781605580678}, year = {2008}, date = {2008-07-01}, booktitle = {Proceedings of the 1st ACM international conference on PErvasive Technologies Related to Assistive Environments - PETRA '08}, pages = {1}, abstract = {This work focuses on two of the research problems comprising automatic sign language recognition, namely robust computer vision techniques for consistent hand detection and tracking, while preserving the hand shape contour which is useful for extraction of features related to the handshape and a novel classification scheme incorporating Self-organizing maps, Markov chains and Hidden Markov Models. Geodesic Active Contours enhanced with skin color and motion information are employed for the hand detection and the extraction of the hand silhouette, while features extracted describe hand trajectory, region and shape. Extracted features are used as input to separate classifiers, forming a robust and adaptive architecture whose main contribution is the optimal utilization of the neighboring characteristic of the SOM during the decoding stage of the Markov chain, representing the sign class.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This work focuses on two of the research problems comprising automatic sign language recognition, namely robust computer vision techniques for consistent hand detection and tracking, while preserving the hand shape contour which is useful for extraction of features related to the handshape and a novel classification scheme incorporating Self-organizing maps, Markov chains and Hidden Markov Models. Geodesic Active Contours enhanced with skin color and motion information are employed for the hand detection and the extraction of the hand silhouette, while features extracted describe hand trajectory, region and shape. Extracted features are used as input to separate classifiers, forming a robust and adaptive architecture whose main contribution is the optimal utilization of the neighboring characteristic of the SOM during the decoding stage of the Markov chain, representing the sign class. |
George Papandreou, Petros Maragos Adaptive and constrained algorithms for inverse compositional active appearance model fitting Conference 26th IEEE Conference on Computer Vision and Pattern Recognition, CVPR, 2008, ISSN: 1063-6919. Abstract | BibTeX | Links: [PDF] @conference{Papandreou2008b, title = {Adaptive and constrained algorithms for inverse compositional active appearance model fitting}, author = { George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/papandreou2008.pdf}, doi = {10.1109/CVPR.2008.4587540}, issn = {1063-6919}, year = {2008}, date = {2008-06-01}, booktitle = {26th IEEE Conference on Computer Vision and Pattern Recognition, CVPR}, abstract = {Parametric models of shape and texture such as active appearance models (AAMs) are diverse tools for deformable object appearance modeling and have found important applications in both image synthesis and analysis problems. Among the numerous algorithms that have been proposed for AAM fitting, those based on the inverse-compositional image alignment technique have recently received considerable attention due to their potential for high efficiency. However, existing fitting algorithms perform poorly when used in conjunction with models exhibiting significant appearance variation, such as AAMs trained on multiple-subject human face images. We introduce two enhancements to inverse-compositional AAM matching algorithms in order to overcome this limitation. First, we propose fitting algorithm adaptation, by means of (a) fitting matrix adjustment and (b) AAM mean template update. Second, we show how prior information can be incorporated and constrain the AAM fitting process. The inverse-compositional nature of the algorithm allows efficient implementation of these enhancements. Both techniques substantially improve AAM fitting performance, as demonstrated with experiments on publicly available multi-face datasets.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Parametric models of shape and texture such as active appearance models (AAMs) are diverse tools for deformable object appearance modeling and have found important applications in both image synthesis and analysis problems. Among the numerous algorithms that have been proposed for AAM fitting, those based on the inverse-compositional image alignment technique have recently received considerable attention due to their potential for high efficiency. However, existing fitting algorithms perform poorly when used in conjunction with models exhibiting significant appearance variation, such as AAMs trained on multiple-subject human face images. We introduce two enhancements to inverse-compositional AAM matching algorithms in order to overcome this limitation. First, we propose fitting algorithm adaptation, by means of (a) fitting matrix adjustment and (b) AAM mean template update. Second, we show how prior information can be incorporated and constrain the AAM fitting process. The inverse-compositional nature of the algorithm allows efficient implementation of these enhancements. Both techniques substantially improve AAM fitting performance, as demonstrated with experiments on publicly available multi-face datasets. |
Georgios Evangelopoulos, Petros Maragos Image decomposition into structure and texture subcomponents with multifrequency modulation constraints Conference 2008 IEEE Conference on Computer Vision and Pattern Recognition, IEEE 2008. @conference{Evangelopoulos2008b, title = {Image decomposition into structure and texture subcomponents with multifrequency modulation constraints}, author = {Georgios Evangelopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/evangelopoulos2008.pdf}, year = {2008}, date = {2008-06-01}, booktitle = {2008 IEEE Conference on Computer Vision and Pattern Recognition}, pages = {1--8}, organization = {IEEE}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Costas Tzafestas, Spyros Velanas, George Fakiridis Adaptive impedance control in haptic teleoperation to improve transparency under time-delay Conference Proceedings - IEEE International Conference on Robotics and Automation, 2008, ISSN: 10504729. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas2008b, title = {Adaptive impedance control in haptic teleoperation to improve transparency under time-delay}, author = { Costas Tzafestas and Spyros Velanas and George Fakiridis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/tzafestasVelanas2008.pdf}, doi = {10.1109/ROBOT.2008.4543211}, issn = {10504729}, year = {2008}, date = {2008-05-01}, booktitle = {Proceedings - IEEE International Conference on Robotics and Automation}, pages = {212--219}, abstract = {This paper proposes the application of an adaptive impedance control scheme to alleviate some of the problems associated with the presence of time delays in a haptic teleoperation system. Continuous on-line estimation of the remote environment's impedance is performed, and is then used as a local model for haptic display control. Lyapunov stability of the proposed impedance adaptation law is demonstrated. A series of experiments is performed to evaluate the performance of this teleoperation control scheme. Two performance measures are defined to assess transparency and stability of the teleoperator. Simulation results show the superior performance of the proposed adaptive scheme, with respect to direct teleoperation, particularly in terms of increasing the stability margin and of significantly ameliorating transparency in the presence of large time delays. Experimental results, using a phantom omni as the haptic master device, support this conclusion.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper proposes the application of an adaptive impedance control scheme to alleviate some of the problems associated with the presence of time delays in a haptic teleoperation system. Continuous on-line estimation of the remote environment's impedance is performed, and is then used as a local model for haptic display control. Lyapunov stability of the proposed impedance adaptation law is demonstrated. A series of experiments is performed to evaluate the performance of this teleoperation control scheme. Two performance measures are defined to assess transparency and stability of the teleoperator. Simulation results show the superior performance of the proposed adaptive scheme, with respect to direct teleoperation, particularly in terms of increasing the stability margin and of significantly ameliorating transparency in the presence of large time delays. Experimental results, using a phantom omni as the haptic master device, support this conclusion. |
D Spachos, A Zlatintsi, V Moschou, P Antonopoulos, E Benetos, M Kotti, K Tzimouli, C Kotropoulos, N Nikolaidis, P Maragos, I Pitas MUSCLE Movie Database: A Multimodal Corpus With Rich Annotation For Dialogue And Saliency Detection Conference Marrakech, Morocco, 2008. Abstract | BibTeX | Links: [PDF] @conference{SZM+-8, title = {MUSCLE Movie Database: A Multimodal Corpus With Rich Annotation For Dialogue And Saliency Detection}, author = {D Spachos and A Zlatintsi and V Moschou and P Antonopoulos and E Benetos and M Kotti and K Tzimouli and C Kotropoulos and N Nikolaidis and P Maragos and I Pitas}, url = {http://robotics.ntua.gr/wp-content/publications/SpachosZlatintsi+_MuscleMovieDatabase_LREC08.pdf}, year = {2008}, date = {2008-05-01}, address = {Marrakech, Morocco}, abstract = {Semantic annotation of multimedia content is important for training, testing, and assessing content-based algorithms for indexing, organization, browsing, and retrieval. To this end, an annotated multimodal movie corpus has been collected to be used as a test bed for development and assessment of content-based multimedia processing, such as speaker clustering, speaker turn detection, visual speech activity detection, face detection, face clustering, scene segmentation, saliency detection, and visual dialogue detection. All metadata are saved in XML format following the MPEG-7 ISO prototype to ensure data compatibility and reusability. The entire MUSCLE movie database is available for download through the web. Visual speech activity and dialogue detection algorithms that have been developed within the software package DIVA3D and tested on this database are also briefly described. Furthermore, we review existing annotation tools with emphasis on the novel annotation tool Anthropos7 Editor.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Semantic annotation of multimedia content is important for training, testing, and assessing content-based algorithms for indexing, organization, browsing, and retrieval. To this end, an annotated multimodal movie corpus has been collected to be used as a test bed for development and assessment of content-based multimedia processing, such as speaker clustering, speaker turn detection, visual speech activity detection, face detection, face clustering, scene segmentation, saliency detection, and visual dialogue detection. All metadata are saved in XML format following the MPEG-7 ISO prototype to ensure data compatibility and reusability. The entire MUSCLE movie database is available for download through the web. Visual speech activity and dialogue detection algorithms that have been developed within the software package DIVA3D and tested on this database are also briefly described. Furthermore, we review existing annotation tools with emphasis on the novel annotation tool Anthropos7 Editor. |
Michael Aron, Anastasios Roussos, Marie Odile Berger, Erwan Kerrien, Petros Maragos Multimodality acquisition of articulatory data and processing Conference European Signal Processing Conference, 2008, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{205, title = {Multimodality acquisition of articulatory data and processing}, author = { Michael Aron and Anastasios Roussos and Marie Odile Berger and Erwan Kerrien and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/AronRoussosBergerKerrienMaragos_MultimodAcquisArticulDataProcessing_EUSIPCO2008.pdf}, issn = {22195491}, year = {2008}, date = {2008-01-01}, booktitle = {European Signal Processing Conference}, abstract = {In this paper1, a framework to acquire and process dynamic data of the tongue during speech processing is presented. First, a setup to acquire data of the tongue shape combining ultrasound images, electromagnetic localization sensors and sound is presented. Techniques to automatically calibrate and synchronize the data are described. A method to extract the tongue shape is then proposed, by combining a preprocessing of the ultrasound images with an imagebased tracking method that integrates adapted constraints. copyright by EURASIP.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper1, a framework to acquire and process dynamic data of the tongue during speech processing is presented. First, a setup to acquire data of the tongue shape combining ultrasound images, electromagnetic localization sensors and sound is presented. Techniques to automatically calibrate and synchronize the data are described. A method to extract the tongue shape is then proposed, by combining a preprocessing of the ultrasound images with an imagebased tracking method that integrates adapted constraints. copyright by EURASIP. |
G Evangelopoulos, P Maragos Texture Modulation-Constrained Image Decomposition Conference Proc. Int'l Conference on Image Processing (ICIP-2008), San Diego, California, Oct. 2008, 2008. @conference{200, title = {Texture Modulation-Constrained Image Decomposition}, author = { G Evangelopoulos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EvangelopoulosMaragos_ukv_Image Decomposition_ICIP08.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Proc. Int'l Conference on Image Processing (ICIP-2008), San Diego, California, Oct. 2008}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G. Evangelopoulos, K. Rapantzikos, A. Potamianos, P. Maragos, A. Zlatintsi, Y. Avrithis Movie summarization based on audiovisual saliency detection Conference Proceedings - International Conference on Image Processing, ICIP, 2008, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{203, title = {Movie summarization based on audiovisual saliency detection}, author = { G. Evangelopoulos and K. Rapantzikos and A. Potamianos and P. Maragos and A. Zlatintsi and Y. Avrithis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ERPMZA_MovieSummarizAVSaliency_ICIP2008.pdf}, doi = {10.1109/ICIP.2008.4712308}, issn = {15224880}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {2528--2531}, abstract = {Based on perceptual and computational attention modeling studies, we formulate measures of saliency for an audiovisual stream. Audio saliency is captured by signal modulations and related multi-frequency band features, extracted through nonlinear operators and energy tracking. Visual saliency is measured by means of a spatiotemporal attention model driven by various feature cues (intensity, color, motion). Audio and video curves are integrated in a single attention curve, where events may be enhanced, suppressed or vanished. The presence of salient events is signified on this audiovisual curve by geometrical features such as local extrema, sharp transition points and level sets. An audiovisual saliency-based movie summarization algorithm is proposed and evaluated. The algorithm is shown to perform very well in terms of summary informativeness and enjoyability for movie clips of various genres.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Based on perceptual and computational attention modeling studies, we formulate measures of saliency for an audiovisual stream. Audio saliency is captured by signal modulations and related multi-frequency band features, extracted through nonlinear operators and energy tracking. Visual saliency is measured by means of a spatiotemporal attention model driven by various feature cues (intensity, color, motion). Audio and video curves are integrated in a single attention curve, where events may be enhanced, suppressed or vanished. The presence of salient events is signified on this audiovisual curve by geometrical features such as local extrema, sharp transition points and level sets. An audiovisual saliency-based movie summarization algorithm is proposed and evaluated. The algorithm is shown to perform very well in terms of summary informativeness and enjoyability for movie clips of various genres. |
Georgios Evangelopoulos Image Decomposition into Structure and Texture Subcomponents with Conference Image (Rochester, N.Y.), 2008, ISBN: 9781424422432. @conference{210, title = {Image Decomposition into Structure and Texture Subcomponents with}, author = { Georgios Evangelopoulos}, isbn = {9781424422432}, year = {2008}, date = {2008-01-01}, booktitle = {Image (Rochester, N.Y.)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A Katsamanis, G Papandreou, P Maragos Audiovisual-to-Articulatory Speech Inversion Using Active Appearance Models for the Face and Hidden Markov Models for the Dynamics Conference IEEE Int. Conference on Acoustics, Speech, and Signal Processing, 2008. @conference{214, title = {Audiovisual-to-Articulatory Speech Inversion Using Active Appearance Models for the Face and Hidden Markov Models for the Dynamics}, author = { A Katsamanis and G Papandreou and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisPapandreouMaragos_AamAvHmmInversion_icassp08.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {IEEE Int. Conference on Acoustics, Speech, and Signal Processing}, pages = {2237--2240}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A. Katsamanis, G. Ananthakrishnan, G. Papandreou, P. Maragos, O. Engwall Audiovisual speech inversion by switching dynamical modeling governed by a Hidden Markov process Conference European Signal Processing Conference, 2008, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{206, title = {Audiovisual speech inversion by switching dynamical modeling governed by a Hidden Markov process}, author = { A. Katsamanis and G. Ananthakrishnan and G. Papandreou and P. Maragos and O. Engwall}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisAnanthPapandreouMaragosEngwall_AV-Speechinvers-SwitchDynModel-HidMarkov_EUSIPCO2008.pdf}, issn = {22195491}, year = {2008}, date = {2008-01-01}, booktitle = {European Signal Processing Conference}, abstract = {We propose a unified framework to recover articulation from$backslash$naudiovisual speech. The nonlinear audiovisual-to-articulatory$backslash$nmapping is modeled by means of a switching linear dynamical$backslash$nsystem. Switching is governed by a state sequence determined via a$backslash$nHidden Markov Model alignment process. Mel Frequency Cepstral$backslash$nCoefficients are extracted from audio while visual analysis is$backslash$nperformed using Active Appearance Models. The articulatory state is$backslash$nrepresented by the coordinates of points on important articulators,$backslash$ne.g., tongue and lips. To evaluate our inversion approach, instead$backslash$nof just using the conventional correlation coefficients and root$backslash$nmean squared errors, we introduce a novel evaluation scheme that is$backslash$nmore specific to the inversion problem. Prediction errors in the$backslash$npositions of the articulators are weighted differently depending on$backslash$ntheir relevant importance in the production of the corresponding$backslash$nsound. The applied weights are determined by an articulatory$backslash$nclassification analysis using Support Vector Machines with a radial$backslash$nbasis function kernel. Experiments are conducted in the$backslash$naudiovisual-articulatory MOCHA database.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose a unified framework to recover articulation from$backslash$naudiovisual speech. The nonlinear audiovisual-to-articulatory$backslash$nmapping is modeled by means of a switching linear dynamical$backslash$nsystem. Switching is governed by a state sequence determined via a$backslash$nHidden Markov Model alignment process. Mel Frequency Cepstral$backslash$nCoefficients are extracted from audio while visual analysis is$backslash$nperformed using Active Appearance Models. The articulatory state is$backslash$nrepresented by the coordinates of points on important articulators,$backslash$ne.g., tongue and lips. To evaluate our inversion approach, instead$backslash$nof just using the conventional correlation coefficients and root$backslash$nmean squared errors, we introduce a novel evaluation scheme that is$backslash$nmore specific to the inversion problem. Prediction errors in the$backslash$npositions of the articulators are weighted differently depending on$backslash$ntheir relevant importance in the production of the corresponding$backslash$nsound. The applied weights are determined by an articulatory$backslash$nclassification analysis using Support Vector Machines with a radial$backslash$nbasis function kernel. Experiments are conducted in the$backslash$naudiovisual-articulatory MOCHA database. |
Yves Laprie, Petros Maragos, Jean Schoentgen How can acoustic-to-articulatory maps be constrained? Conference European Signal Processing Conference, 2008, ISSN: 22195491. Abstract | BibTeX | Links: [PDF] @conference{207, title = {How can acoustic-to-articulatory maps be constrained?}, author = { Yves Laprie and Petros Maragos and Jean Schoentgen}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LaprieMaragosSchoentgen_HowCanAcousticToArticulatoryMapsBeConstrained_EUSIPCO2008.pdf}, issn = {22195491}, year = {2008}, date = {2008-01-01}, booktitle = {European Signal Processing Conference}, abstract = {The objective of the presentation is to examine issues in constraining$backslash$nacoustic-to-articulatory maps by means of facial data and other apriori$backslash$nknowledge regarding speech production. Constraints that are considered are$backslash$nthe insertion of data on lip opening, spread and protrusion, as well as$backslash$nother facial data together with constraints on the vocal tract length. A$backslash$npriori knowledge that has been taken into account concerns the deformation$backslash$nand speed of deformation of the vocal tract as well as phonetic rules$backslash$nregarding vowel-typical tract shapes. Inverse maps that have been tested$backslash$nare formant-to-area and formant-to-parametric sagittal profile maps as well$backslash$nas audio/visual-to-electromagnetic coil trajectory maps. The results$backslash$nobtained while mapping audio-only data compared to audio combined with$backslash$nother data are discussed.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The objective of the presentation is to examine issues in constraining$backslash$nacoustic-to-articulatory maps by means of facial data and other apriori$backslash$nknowledge regarding speech production. Constraints that are considered are$backslash$nthe insertion of data on lip opening, spread and protrusion, as well as$backslash$nother facial data together with constraints on the vocal tract length. A$backslash$npriori knowledge that has been taken into account concerns the deformation$backslash$nand speed of deformation of the vocal tract as well as phonetic rules$backslash$nregarding vowel-typical tract shapes. Inverse maps that have been tested$backslash$nare formant-to-area and formant-to-parametric sagittal profile maps as well$backslash$nas audio/visual-to-electromagnetic coil trajectory maps. The results$backslash$nobtained while mapping audio-only data compared to audio combined with$backslash$nother data are discussed. |
G. P. Maragos Papandreou, A. Kokaram. Image inpainting with a wavelet domain hidden Markov tree model Conference Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008, 2008. @conference{213, title = {Image inpainting with a wavelet domain hidden Markov tree model}, author = { G. P. Maragos} {Papandreou and A. Kokaram.}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PapandreouMaragosKokaram_HmtInpainting_icassp08.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Eftychios A. Pnevmatikakis, Petros Maragos An inpainting system for automatic image structure-texture restoration with text removal Conference Proceedings - International Conference on Image Processing, ICIP, 2008, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{201, title = {An inpainting system for automatic image structure-texture restoration with text removal}, author = { Eftychios A. Pnevmatikakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PnevmatikakisMaragos_TextureRestorationWithTextRemoval_ICIP08.pdf}, doi = {10.1109/ICIP.2008.4712330}, issn = {15224880}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, pages = {2616--2619}, abstract = {In this paper we deal with the inpainting problem and with the problem of finding text in images. We first review many of the methods used for structure and texture inpaintings. The novel contribution of the paper is the combination of the inpainting techniques with the techniques of finding text in images and a simple morphological algorithm that links them. This combination results in an automatic system for text removal and image restoration that requires no user interface at all. Examples on real images show very good performance of the proposed system and the importance of the new linking algorithm.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we deal with the inpainting problem and with the problem of finding text in images. We first review many of the methods used for structure and texture inpaintings. The novel contribution of the paper is the combination of the inpainting techniques with the techniques of finding text in images and a simple morphological algorithm that links them. This combination results in an automatic system for text removal and image restoration that requires no user interface at all. Examples on real images show very good performance of the proposed system and the importance of the new linking algorithm. |
D Spachos, A Zlatintsi MUSCLE movie database: A multimodal corpus with rich annotation for dialogue and saliency detection Conference Programme of the Workshop on Multimodal Corpora, 2008. @conference{211, title = {MUSCLE movie database: A multimodal corpus with rich annotation for dialogue and saliency detection}, author = { D Spachos and A Zlatintsi}, url = {http://users.uoi.gr/cs01702/MargaritaKotti/MypublicationsPDFs/Muscle movie.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Programme of the Workshop on Multimodal Corpora}, pages = {16}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stamatios Lefkimmiatis , George Papandreou, Petros Maragos PHOTON-LIMITED IMAGE DENOISING BY INFERENCE ON MULTISCALE MODELS Conference Proc. Int’l Conference on Image Processing, 2008, ISBN: 9781424417643. @conference{204, title = {PHOTON-LIMITED IMAGE DENOISING BY INFERENCE ON MULTISCALE MODELS}, author = {Stamatios Lefkimmiatis , George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LefkimmiatisPapandreouMaragos_PhotonLimitedImageDenoisingByInferenceMultiscaleModels_ICIP08.pdf}, isbn = {9781424417643}, year = {2008}, date = {2008-01-01}, booktitle = {Proc. Int’l Conference on Image Processing}, pages = {2332--2335}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Stamatios Lefkimmiatis, Petros Maragos, Athanassios Katsamanis MULTISENSOR MULTIBAND CROSS-ENERGY TRACKING FOR FEATURE EXTRACTION AND RECOGNITION Conference Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008, 2008. @conference{212, title = {MULTISENSOR MULTIBAND CROSS-ENERGY TRACKING FOR FEATURE EXTRACTION AND RECOGNITION }, author = {Stamatios Lefkimmiatis and Petros Maragos and Athanassios Katsamanis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LefkimmiatisMaragosKatsamanis_MultisensorMBandASR_icassp08.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP-08), Las Vegas, USA, April 2008}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos, Corinne Vachier A PDE formulation for viscous morphological operators with extensions to intensity-adaptive operators Conference Proc. Int’l Conference on Image Processing (ICIP-2008), San Diego, California, 2008, ISSN: 15224880. @conference{199, title = {A PDE formulation for viscous morphological operators with extensions to intensity-adaptive operators}, author = {Petros Maragos and Corinne Vachier}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosVachier_PDEFormulationViscousMorphologicalOperatorsToIntensityAdaptiveOps_ICIP08.pdf}, doi = {10.1109/ICIP.2008.4712226}, issn = {15224880}, year = {2008}, date = {2008-01-01}, booktitle = {Proc. Int’l Conference on Image Processing (ICIP-2008)}, journal = {Proceedings - International Conference on Image Processing, ICIP}, pages = {2200--2203}, address = {San Diego, California}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgios Evangelopoulos, Konstantinos Rapantzikos, Petros Maragos, Yannis Avrithis, Alexandros Potamianos Audiovisual Attention Modeling and Salient Event Detection Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–21, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. BibTeX | Links: [Webpage] [PDF] @inbook{Evangelopoulos2008b, title = {Audiovisual Attention Modeling and Salient Event Detection}, author = {Georgios Evangelopoulos and Konstantinos Rapantzikos and Petros Maragos and Yannis Avrithis and Alexandros Potamianos}, editor = {Petros Maragos and Alexandros Potamianos and Patrick Gros}, url = {https://doi.org/10.1007/978-0-387-76316-3_8 http://robotics.ntua.gr/wp-content/uploads/sites/2/Evangelopoulos-et-al_Chapter-of-Book_MPIAVT_Maragos-et-aled_Springer2008_peprint.pdf}, doi = {10.1007/978-0-387-76316-3_8}, isbn = {978-0-387-76316-3}, year = {2008}, date = {2008-01-01}, booktitle = {Multimodal Processing and Interaction: Audio, Video, Text}, pages = {1--21}, publisher = {Springer US}, address = {Boston, MA}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
Petros Maragos, Patrick Gros, Athanassios Katsamanis, George Papandreou Cross-Modal Integration for Performance Improving in Multimedia: A Review Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–46, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. BibTeX | Links: [Webpage] [PDF] @inbook{Maragos2008b, title = {Cross-Modal Integration for Performance Improving in Multimedia: A Review}, author = {Petros Maragos and Patrick Gros and Athanassios Katsamanis and George Papandreou}, editor = {Petros Maragos and Alexandros Potamianos and Patrick Gros}, url = {https://doi.org/10.1007/978-0-387-76316-3_1 http://robotics.ntua.gr/wp-content/uploads/sites/2/2008_MaragosEtAl_CrossModalIntegration-in-Multimedia-Review_Chap1-Book-MPIAVT_Springer_preprint.pdf}, doi = {10.1007/978-0-387-76316-3_1}, isbn = {978-0-387-76316-3}, year = {2008}, date = {2008-01-01}, booktitle = {Multimodal Processing and Interaction: Audio, Video, Text}, pages = {1--46}, publisher = {Springer US}, address = {Boston, MA}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
George Papandreou, Athanassios Katsamanis, Athanassios Katsamanis, Vassilis Pitsikalis, Petros Maragos Adaptive Multimodal Fusion by Uncertainty Compensation with Application to Audio-Visual Speech Recognition Book Chapter Maragos, Petros; Potamianos, Alexandros; Gros, Patrick (Ed.): Multimodal Processing and Interaction: Audio, Video, Text, pp. 1–15, Springer US, Boston, MA, 2008, ISBN: 978-0-387-76316-3. BibTeX | Links: [Webpage] [PDF] @inbook{Papandreou2008b, title = {Adaptive Multimodal Fusion by Uncertainty Compensation with Application to Audio-Visual Speech Recognition}, author = {George Papandreou and Athanassios Katsamanis and Athanassios Katsamanis and Vassilis Pitsikalis and Petros Maragos}, editor = {Petros Maragos and Alexandros Potamianos and Patrick Gros}, url = {https://doi.org/10.1007/978-0-387-76316-3_4 http://robotics.ntua.gr/wp-content/uploads/sites/2/Papandreou_chapter_2008.pdf}, doi = {10.1007/978-0-387-76316-3_4}, isbn = {978-0-387-76316-3}, year = {2008}, date = {2008-01-01}, booktitle = {Multimodal Processing and Interaction: Audio, Video, Text}, pages = {1--15}, publisher = {Springer US}, address = {Boston, MA}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
2007 |
Stamatios Lefkimmiatis, Petros Maragos A generalized estimation approach for linear and nonlinear microphone array post-filters Journal Article Speech Communication, 49 (7-8), pp. 657–666, 2007, ISSN: 01676393. Abstract | BibTeX | Links: [PDF] @article{124, title = {A generalized estimation approach for linear and nonlinear microphone array post-filters}, author = {Stamatios Lefkimmiatis and Petros Maragos}, url = {https://www.scopus.com/inward/record.url?eid=2-s2.0-34447096369&partnerID=40&md5=2a28c43abbc35eb2d516a43e23ea6602http://robotics.ntua.gr/wp-content/uploads/sites/2/LefkimmiatisMaragos_GeneralizedEstimationMicrophoneArrays_specom2007.pdf}, doi = {10.1016/j.specom.2007.02.004}, issn = {01676393}, year = {2007}, date = {2007-01-01}, journal = {Speech Communication}, volume = {49}, number = {7-8}, pages = {657--666}, abstract = {This paper presents a robust and general method for estimating the transfer functions of microphone array post-filters, derived under various speech enhancement criteria. For the case of the mean square error (MSE) criterion, the proposed method is an improvement of the existing McCowan post-filter, which under the assumption of a known noise field coherence function uses the auto- and cross-spectral densities of the microphone array noisy inputs to estimate the Wiener post-filter transfer function. In contrast to McCowan post-filter, the proposed method takes into account the noise reduction performed by the minimum variance distortionless response (MVDR) beamformer and obtains a more accurate estimation of the noise spectral density. Furthermore, the proposed estimation approach is general and can be used for the derivation of both linear and nonlinear microphone array post-filters, according to the utilized enhancement criterion. In experiments with real noise multichannel recordings the proposed technique has shown to obtain a significant gain over the other studied methods in terms of five different objective speech quality measures. textcopyright 2007 Elsevier B.V. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper presents a robust and general method for estimating the transfer functions of microphone array post-filters, derived under various speech enhancement criteria. For the case of the mean square error (MSE) criterion, the proposed method is an improvement of the existing McCowan post-filter, which under the assumption of a known noise field coherence function uses the auto- and cross-spectral densities of the microphone array noisy inputs to estimate the Wiener post-filter transfer function. In contrast to McCowan post-filter, the proposed method takes into account the noise reduction performed by the minimum variance distortionless response (MVDR) beamformer and obtains a more accurate estimation of the noise spectral density. Furthermore, the proposed estimation approach is general and can be used for the derivation of both linear and nonlinear microphone array post-filters, according to the utilized enhancement criterion. In experiments with real noise multichannel recordings the proposed technique has shown to obtain a significant gain over the other studied methods in terms of five different objective speech quality measures. textcopyright 2007 Elsevier B.V. All rights reserved. |
George Papandreou, Petros Maragos Multigrid Geometric Active Contour Models Journal Article IEEE Transactions on Image Processing, 16 (1), pp. 229-240, 2007. @article{125, title = {Multigrid Geometric Active Contour Models}, author = {George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PapandreouMaragos_MultigridGeometricActiveContourModels_tip07.pdf}, doi = {10.1109/TIP.2006.884952}, year = {2007}, date = {2007-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {16}, number = {1}, pages = {229-240}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Nikos C. Mitsou, Costas S. Tzafestas Temporal Occupancy Grid for mobile robot dynamic environment mapping Conference 2007 Mediterranean Conference on Control and Automation, MED, 2007, ISBN: 142441282X. Abstract | BibTeX | Links: [PDF] @conference{Mitsou2007, title = {Temporal Occupancy Grid for mobile robot dynamic environment mapping}, author = { Nikos C. Mitsou and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Temporal_Occupancy_Grid_for_mobile_robot_dynamic_environment_mapping.pdf}, doi = {10.1109/MED.2007.4433892}, isbn = {142441282X}, year = {2007}, date = {2007-07-01}, booktitle = {2007 Mediterranean Conference on Control and Automation, MED}, abstract = {Mapping dynamic environments is an open issue in the field of robotics. In this paper, we extend the well known Occupancy Grid structure to address the problem of generating valid maps for dynamic indoor environments. We propose a spatiotemporal access method to store all sensor values (instead of preserving only one value for each cell as in the common occupancy grid case). By searching for similar time series, we can detect moving objects that appear only in a limited number of possible configurations (e.g. doors or chairs). Simulated experiments demonstrate the potentialities of the proposed system.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Mapping dynamic environments is an open issue in the field of robotics. In this paper, we extend the well known Occupancy Grid structure to address the problem of generating valid maps for dynamic indoor environments. We propose a spatiotemporal access method to store all sensor values (instead of preserving only one value for each cell as in the common occupancy grid case). By searching for similar time series, we can detect moving objects that appear only in a limited number of possible configurations (e.g. doors or chairs). Simulated experiments demonstrate the potentialities of the proposed system. |
Maciej Cytowski Partial Differential Equations in Image Processing Conference Invited Paper, Proc. European Signal Processing Conf. (EUSIPCO--98), Rhodes, Greece, pp. 527-536, Sep. 1998, (May), 2007. @conference{265, title = {Partial Differential Equations in Image Processing}, author = { Maciej Cytowski}, year = {2007}, date = {2007-01-01}, booktitle = {Invited Paper, Proc. European Signal Processing Conf. (EUSIPCO--98), Rhodes, Greece, pp. 527-536, Sep. 1998}, number = {May}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos D. Dimitriadis, S Lefkimmiatis Multiband, Multisensor Features for Robust Speech Recognition Conference Proc. Int'l Conf. on Speech Technology and Communication (InterSpeech 2007 EuroSpeech), Antwerp, Belgium, Aug. 2007, 2007. @conference{220, title = {Multiband, Multisensor Features for Robust Speech Recognition}, author = { P Maragos {D. Dimitriadis} and S Lefkimmiatis}, url = {DimitriadisMaragosLefkimmiatis_MinTECC_ASR_InterSpeech2007.pdf}, year = {2007}, date = {2007-01-01}, booktitle = {Proc. Int'l Conf. on Speech Technology and Communication (InterSpeech 2007 EuroSpeech), Antwerp, Belgium, Aug. 2007}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Dimitrios Dimitriadis, JC Segura, Luz Garcia Advanced front-end for robust speech recognition in extremely adverse environments Conference Proc. łdots, 2007, ISBN: 9781605603162. Abstract | BibTeX | Links: [PDF] @conference{221, title = {Advanced front-end for robust speech recognition in extremely adverse environments}, author = { Dimitrios Dimitriadis and JC Segura and Luz Garcia}, url = {http://cvsp.cs.ntua.gr/projects/pub/HIWIRE/HiwirePublications/DMSP_HAFE_ASR_Interspeech07.pdf}, isbn = {9781605603162}, year = {2007}, date = {2007-01-01}, booktitle = {Proc. łdots}, pages = {1--4}, abstract = {In this paper, a unified approach to speech enhancement, feature extraction and feature normalization for speech recognition in adverse recording conditions is presented. The proposed front-end system consists of several different, independent, process-ing modules. Each of the algorithms contained in these modules has been independently applied to the problem of speech recog-nition in noise, significantly improving the recognition rates. In this work, these algorithms are merged in a single front-end and their combined performance is demonstrated. Specifically, the proposed advanced front-end extracts noise-invariant features via the following modules: Wiener filtering, voice-activity de-tection, robust feature extraction (nonlinear modulation or frac-tal features), parameter equalization and frame-dropping. The advanced front-end is applied to extremely adverse environ-ments where most feature extraction schemes fail. We show that by combining speech enhancement, robust feature extrac-tion and feature normalization up to a fivefold error rate reduc-tion can be achieved for certain tasks.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, a unified approach to speech enhancement, feature extraction and feature normalization for speech recognition in adverse recording conditions is presented. The proposed front-end system consists of several different, independent, process-ing modules. Each of the algorithms contained in these modules has been independently applied to the problem of speech recog-nition in noise, significantly improving the recognition rates. In this work, these algorithms are merged in a single front-end and their combined performance is demonstrated. Specifically, the proposed advanced front-end extracts noise-invariant features via the following modules: Wiener filtering, voice-activity de-tection, robust feature extraction (nonlinear modulation or frac-tal features), parameter equalization and frame-dropping. The advanced front-end is applied to extremely adverse environ-ments where most feature extraction schemes fail. We show that by combining speech enhancement, robust feature extrac-tion and feature normalization up to a fivefold error rate reduc-tion can be achieved for certain tasks. |
Athanassios Katsamanis, George Papandreou, Petros Maragos Audiovisual-to-articulatory speech inversion using HMMs Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, 2007, ISBN: 1424412749. Abstract | BibTeX | Links: [PDF] @conference{217, title = {Audiovisual-to-articulatory speech inversion using HMMs}, author = { Athanassios Katsamanis and George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KatsamanisPapandreouMaragos_AVSpeechInversionHMM_mmsp07.pdf}, doi = {10.1109/MMSP.2007.4412915}, isbn = {1424412749}, year = {2007}, date = {2007-01-01}, booktitle = {2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings}, pages = {457--460}, abstract = {We address the problem of audiovisual speech inversion, namely recovering the vocal tract's geometry from auditory and visual speech cues. We approach the problem in a statistical framework, combining ideas from multistream Hidden Markov Models and canonical correlation analysis, and demonstrate effective estimation of the trajectories followed by certain points of interest in the speech production system. Our experiments show that exploiting both audio and visual modalities clearly improves performance relative to either audio-only or visual-only estimation. We report experiments on the QSMT database which contains audio, video, and electromagnetic articulography data recorded in parallel.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We address the problem of audiovisual speech inversion, namely recovering the vocal tract's geometry from auditory and visual speech cues. We approach the problem in a statistical framework, combining ideas from multistream Hidden Markov Models and canonical correlation analysis, and demonstrate effective estimation of the trajectories followed by certain points of interest in the speech production system. Our experiments show that exploiting both audio and visual modalities clearly improves performance relative to either audio-only or visual-only estimation. We report experiments on the QSMT database which contains audio, video, and electromagnetic articulography data recorded in parallel. |
P. Maragos, Georgios Evangelopoulos Leveling cartoons, texture energy markers, and image decomposition Conference 8th Int. Symp. on Mathematical Morphology, 1 , 2007. @conference{215, title = {Leveling cartoons, texture energy markers, and image decomposition}, author = { P. Maragos and Georgios Evangelopoulos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosEvangelopoulos_LevelCartoons-TextureEnergyMarkers-ImageDecompos_ISMM2007.pdf}, year = {2007}, date = {2007-01-01}, booktitle = {8th Int. Symp. on Mathematical Morphology}, volume = {1}, pages = {125--138}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Nikos Mitsou, Costas Tzafestas Maximum likelihood SLAM in dynamic environments Conference Proceedings - International Conference on Tools with Artificial Intelligence, ICTAI, 1 , 2007, ISSN: 10823409. Abstract | BibTeX | Links: [PDF] @conference{Mitsou2007b, title = {Maximum likelihood SLAM in dynamic environments}, author = { Nikos Mitsou and Costas Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Maximum_Likelihood_SLAM_in_Dynamic_Environments.pdf}, doi = {10.1109/ICTAI.2007.168}, issn = {10823409}, year = {2007}, date = {2007-01-01}, booktitle = {Proceedings - International Conference on Tools with Artificial Intelligence, ICTAI}, volume = {1}, pages = {152--156}, abstract = {Simultaneous Localization and Mapping in dynamic environments is an open issue in the field of robotics. Traditionally, the related approaches assume that the environment remains static during the robot's exploration phase. In this work, we overcome this assumption and propose an algorithm that exploits the dynamic nature of the environment during robot exploration so as to improve the localization process. We use a Histogram Grid to store all the past occupancy values of every cell and thus to select the most probable pose of the robot based on the occupancy evolution. Experiments on a simulated robot indicate the effectiveness of the proposed approach.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Simultaneous Localization and Mapping in dynamic environments is an open issue in the field of robotics. Traditionally, the related approaches assume that the environment remains static during the robot's exploration phase. In this work, we overcome this assumption and propose an algorithm that exploits the dynamic nature of the environment during robot exploration so as to improve the localization process. We use a Histogram Grid to store all the past occupancy values of every cell and thus to select the most probable pose of the robot based on the occupancy evolution. Experiments on a simulated robot indicate the effectiveness of the proposed approach. |
George Papandreou, Athanassios Katsamanis, Vassilis Pitsikalis, Perros Maragos Multimodal fusion and learning with uncertain features applied to audiovisual speech recognition Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, (November 2015), 2007, ISBN: 1424412749. Abstract | BibTeX | Links: [PDF] @conference{218, title = {Multimodal fusion and learning with uncertain features applied to audiovisual speech recognition}, author = { George Papandreou and Athanassios Katsamanis and Vassilis Pitsikalis and Perros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PapandreouKatsamanisPitsikalisMaragos_AudiovisualUncertaintyLearning_mmsp07.pdf}, doi = {10.1109/MMSP.2007.4412868}, isbn = {1424412749}, year = {2007}, date = {2007-01-01}, booktitle = {2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings}, number = {November 2015}, pages = {264--267}, abstract = {We study the effect of uncertain feature measurements and show how classification and learning rules should be adjusted to compensate for it. Our approach is particularly fruitful in multimodal fusion scenarios, such as audio-visual speech recognition, where multiple streams of complementary features whose reliability is time-varying are integrated. For such applications, by taking the measurement noise uncertainty of each feature stream into account, the proposed framework leads to highly adaptive multimodal fusion rules for classification and learning which are widely applicable and easy to implement. We further show that previous multimodal fusion methods relying on stream weights fall under our scheme under certain assumptions; this provides novel insights into their applicability for various tasks and suggests new practical ways for estimating the stream weights adaptively. The potential of our approach is demonstrated in audio-visual speech recognition experiments.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We study the effect of uncertain feature measurements and show how classification and learning rules should be adjusted to compensate for it. Our approach is particularly fruitful in multimodal fusion scenarios, such as audio-visual speech recognition, where multiple streams of complementary features whose reliability is time-varying are integrated. For such applications, by taking the measurement noise uncertainty of each feature stream into account, the proposed framework leads to highly adaptive multimodal fusion rules for classification and learning which are widely applicable and easy to implement. We further show that previous multimodal fusion methods relying on stream weights fall under our scheme under certain assumptions; this provides novel insights into their applicability for various tasks and suggests new practical ways for estimating the stream weights adaptively. The potential of our approach is demonstrated in audio-visual speech recognition experiments. |
Konstantinos Rapantzikos, Georgios Evangelopoulos, Petros Maragos, Yannis Avrithis An audio-visual saliency model for movie summarization Conference 2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings, 2007, ISBN: 1424412749. Abstract | BibTeX | Links: [PDF] @conference{219, title = {An audio-visual saliency model for movie summarization}, author = { Konstantinos Rapantzikos and Georgios Evangelopoulos and Petros Maragos and Yannis Avrithis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RapantzikosEvangelopoulosMaragosEtAl_AVSaliencyDetectionSummarization_mmsp07.pdf}, doi = {10.1109/MMSP.2007.4412882}, isbn = {1424412749}, year = {2007}, date = {2007-01-01}, booktitle = {2007 IEEE 9Th International Workshop on Multimedia Signal Processing, MMSP 2007 - Proceedings}, pages = {320--323}, abstract = {A saliency-based method for generating video summaries is presented, which exploits coupled audiovisual information from both media streams. Efficient and advanced speech and image processing algorithms to detect key frames that are acoustically and visually salient are used. Promising results are shown from experiments on a movie database.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } A saliency-based method for generating video summaries is presented, which exploits coupled audiovisual information from both media streams. Efficient and advanced speech and image processing algorithms to detect key frames that are acoustically and visually salient are used. Promising results are shown from experiments on a movie database. |
Anastasios Roussos, Petros Maragos Vector-Valued Image Interpolation by an Anisotropic Diffusion-Projection PDE Conference Ssvm, LNCS 4885 , 2007, ISSN: 03029743. Abstract | BibTeX | Links: [PDF] @conference{222, title = {Vector-Valued Image Interpolation by an Anisotropic Diffusion-Projection PDE}, author = { Anastasios Roussos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/RoussosMaragos_VectorImgInterpolProjDiffPDE_SSVM2007.pdf}, issn = {03029743}, year = {2007}, date = {2007-01-01}, booktitle = {Ssvm}, volume = {LNCS 4885}, pages = {104--115}, abstract = {We propose a nonlinear image interpolation method, based on an anisotropic diffusion PDE and designed for the general case of vector-valued images. The interpolation solution is restricted to the sub- space of functions that can recover the discrete input image, after an appropriate smoothing and sampling. The proposed nonlinear diffusion flow lies on this subspace and its strength and anisotropy effectively adapt to the local variations and geometry of image structures. The de- rived model efficiently reconstructs the real image structures, leading to a natural interpolation, with reduced blurring, staircase and ringing ar- tifacts of classic methods. This method also outperforms other existing PDE-based interpolation methods.We present experimental results that prove the potential and efficacy of the method as applied to graylevel and color images.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } We propose a nonlinear image interpolation method, based on an anisotropic diffusion PDE and designed for the general case of vector-valued images. The interpolation solution is restricted to the sub- space of functions that can recover the discrete input image, after an appropriate smoothing and sampling. The proposed nonlinear diffusion flow lies on this subspace and its strength and anisotropy effectively adapt to the local variations and geometry of image structures. The de- rived model efficiently reconstructs the real image structures, leading to a natural interpolation, with reduced blurring, staircase and ringing ar- tifacts of classic methods. This method also outperforms other existing PDE-based interpolation methods.We present experimental results that prove the potential and efficacy of the method as applied to graylevel and color images. |
Anastasia Sofou, Petros Maragos Generalized Watershed and PDEs for Geometric- Textural Segmentation Conference Proceedings of 8th International Symposium on Mathematical Morphology (ISMM 2007), Rio de Janeiro, Brazil, Oct. 2007. Published in Mathematical Morphology and its Applications to Signal and Image Processing, edited by G.J.F. Banon, J. Barrera and U. Braga-Neto, MCT/INPE, Brazil, 2007, pp.411-422, 2007. Abstract | BibTeX | Links: [PDF] @conference{216, title = {Generalized Watershed and PDEs for Geometric- Textural Segmentation}, author = { Anastasia Sofou and Petros Maragos}, url = {SofouMaragos_GeneralWshedPDE-GeomTexturSegm_ISMM2007.pdf}, year = {2007}, date = {2007-01-01}, booktitle = {Proceedings of 8th International Symposium on Mathematical Morphology (ISMM 2007), Rio de Janeiro, Brazil, Oct. 2007. Published in Mathematical Morphology and its Applications to Signal and Image Processing, edited by G.J.F. Banon, J. Barrera and U. Braga-Neto, MCT/INPE, Brazil, 2007, pp.411-422}, abstract = {In this paper we approach the segmentation problem by attempt-ing to incorporate cues such as intensity contrast, region size and texture in the segmentation procedure and derive improved results compared to using individual cues separately. We propose efficient simplification operators and feature extraction schemes, capable of quantifying important characteristics like geometrical complexity, rate of change in local contrast variations and orientation, that eventually favor the final segmentation result. Based on the mor-phological paradigm of watershed transform we investigate and ex-tend its PDE formulation in order to satisfy various flooding cri-teria, and couple them with texture information thus making it applicable to a wider range of images.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we approach the segmentation problem by attempt-ing to incorporate cues such as intensity contrast, region size and texture in the segmentation procedure and derive improved results compared to using individual cues separately. We propose efficient simplification operators and feature extraction schemes, capable of quantifying important characteristics like geometrical complexity, rate of change in local contrast variations and orientation, that eventually favor the final segmentation result. Based on the mor-phological paradigm of watershed transform we investigate and ex-tend its PDE formulation in order to satisfy various flooding cri-teria, and couple them with texture information thus making it applicable to a wider range of images. |
2006 |
Costas S Tzafestas, Nektaria Palaiologou, Manthos Alifragis Virtual and remote robotic laboratory: Comparative experimental evaluation Journal Article IEEE Transactions on Education, 49 (3), pp. 360–369, 2006, ISSN: 00189359. Abstract | BibTeX | Links: [PDF] @article{3_64, title = {Virtual and remote robotic laboratory: Comparative experimental evaluation}, author = {Costas S Tzafestas and Nektaria Palaiologou and Manthos Alifragis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Virtual_and_remote_robotic_laboratory_comparative_experimental_evaluation.pdf}, doi = {10.1109/TE.2006.879255}, issn = {00189359}, year = {2006}, date = {2006-01-01}, journal = {IEEE Transactions on Education}, volume = {49}, number = {3}, pages = {360--369}, abstract = {This paper describes the development and experimental evaluation of an e-laboratory platform in the field of robotics. The system in its current configuration is designed to enable distance training of students in real scenarios of robot manipulator programming. From a technological perspective, the research work presented in this paper is directed towards the adaptation of concepts and techniques developed in the field of telerobotics and virtual reality, and their integration in such e-laboratory settings. This paper focuses particularly on the educational impact of such systems. The goal is to assess the performance of e-laboratory scenarios in terms of the efficacy of training provided to students. The results of a pilot experimental study are presented, providing a comparative evaluation for three training modalities: real, remote, and virtual training on robot manipulator programming. The experiments were conducted according to an evaluation protocol specially designed for the considered target training task, using scoring charts to obtain quantitative performance measures and assess the performance of the student groups participating in the course. Training, as a dynamic process, is approached according to a classical three dimensional model, and performance scores are accordingly assessed in these dimensions (namely: low-level versus mid and high-level skills and understanding). The obtained results reveal certain differences between the three groups, particularly as related to the low-level skill training score, giving some insight about the training `dimensions' that are expected to be mostly affected by the absence of physical (or realistic virtual) presence in a real hands-on experimentation. Statistical analysis indicates, however, that, despite these apparent differences, such e-laboratory modules can be integrated quite effectively in practical scenarios, creating virtual training environments that can provide adequate learning elements, as related p-$backslash$n-$backslash$narticularly to mid and high-level skill acquisition. Further work and large-scale studies are still needed, though, in order to explore the extent to which such a general conclusion is valid in different training settings, and to form the basis of a more theoretical evaluation for a comprehensive understanding of the pedagogical differences between real, virtual, and remote learning/training methodologies and experiences}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper describes the development and experimental evaluation of an e-laboratory platform in the field of robotics. The system in its current configuration is designed to enable distance training of students in real scenarios of robot manipulator programming. From a technological perspective, the research work presented in this paper is directed towards the adaptation of concepts and techniques developed in the field of telerobotics and virtual reality, and their integration in such e-laboratory settings. This paper focuses particularly on the educational impact of such systems. The goal is to assess the performance of e-laboratory scenarios in terms of the efficacy of training provided to students. The results of a pilot experimental study are presented, providing a comparative evaluation for three training modalities: real, remote, and virtual training on robot manipulator programming. The experiments were conducted according to an evaluation protocol specially designed for the considered target training task, using scoring charts to obtain quantitative performance measures and assess the performance of the student groups participating in the course. Training, as a dynamic process, is approached according to a classical three dimensional model, and performance scores are accordingly assessed in these dimensions (namely: low-level versus mid and high-level skills and understanding). The obtained results reveal certain differences between the three groups, particularly as related to the low-level skill training score, giving some insight about the training `dimensions' that are expected to be mostly affected by the absence of physical (or realistic virtual) presence in a real hands-on experimentation. Statistical analysis indicates, however, that, despite these apparent differences, such e-laboratory modules can be integrated quite effectively in practical scenarios, creating virtual training environments that can provide adequate learning elements, as related p-$backslash$n-$backslash$narticularly to mid and high-level skill acquisition. Further work and large-scale studies are still needed, though, in order to explore the extent to which such a general conclusion is valid in different training settings, and to form the basis of a more theoretical evaluation for a comprehensive understanding of the pedagogical differences between real, virtual, and remote learning/training methodologies and experiences |
Dimitrios Dimitriadis, Petros Maragos Continuous energy demodulation methods and application to speech analysis Journal Article Speech Communication, 48 (7), pp. 819–837, 2006, ISSN: 01676393. Abstract | BibTeX | Links: [PDF] @article{121, title = {Continuous energy demodulation methods and application to speech analysis}, author = {Dimitrios Dimitriadis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMaragos_ContEnergDemodMethodApplicSpeechAnalysis_SpeCom2006.pdf}, doi = {10.1016/j.specom.2005.08.007}, issn = {01676393}, year = {2006}, date = {2006-01-01}, journal = {Speech Communication}, volume = {48}, number = {7}, pages = {819--837}, abstract = {Speech resonance signals appear to contain significant amplitude and frequency modulations. An efficient demodulation approach is based on energy operators. In this paper, we develop two new robust methods for energy-based speech demodulation and compare their performance on both test and actual speech signals. The first method uses smoothing splines for discrete-to-continuous signal approximation. The second (and best) method uses time-derivatives of Gabor filters. Further, we apply the best demodulation method to explore the statistical distribution of speech modulation features and study their properties regarding applications of speech classification and recognition. Finally, we present some preliminary recognition results and underline their improvements when compared to the corresponding MFCC results. ?? 2005 Elsevier B.V. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Speech resonance signals appear to contain significant amplitude and frequency modulations. An efficient demodulation approach is based on energy operators. In this paper, we develop two new robust methods for energy-based speech demodulation and compare their performance on both test and actual speech signals. The first method uses smoothing splines for discrete-to-continuous signal approximation. The second (and best) method uses time-derivatives of Gabor filters. Further, we apply the best demodulation method to explore the statistical distribution of speech modulation features and study their properties regarding applications of speech classification and recognition. Finally, we present some preliminary recognition results and underline their improvements when compared to the corresponding MFCC results. ?? 2005 Elsevier B.V. All rights reserved. |
Georgios Evangelopoulos, Petros Maragos Multiband modulation energy tracking for noisy speech detection Journal Article IEEE Transactions on Audio, Speech and Language Processing, 14 (6), pp. 2024–2038, 2006, ISSN: 15587916. Abstract | BibTeX | Links: [PDF] @article{122, title = {Multiband modulation energy tracking for noisy speech detection}, author = {Georgios Evangelopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EvangelopoulosMaragos_MultibandModEnergyTrackNoisSpeechDetect_ieeetASLP2006.pdf}, doi = {10.1109/TASL.2006.872625}, issn = {15587916}, year = {2006}, date = {2006-01-01}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {14}, number = {6}, pages = {2024--2038}, abstract = {The ability to accurately locate the boundaries of speech activity is an important attribute of any modern speech recognition, processing, or transmission system. The effort in this paper is the development of efficient, sophisticated features for speech detection in noisy environments, using ideas and techniques from recent advances in speech modeling and analysis, like presence of modulations in speech formants, energy separation and multiband filtering. First we present a method, conceptually based on a classic speech-silence discrimination procedure, that uses some newly developed, short-time signal analysis tools and provide for it a detection theoretic motivation. The new energy and spectral content representations are derived through filtering the signal in various frequency bands, estimating the Teager-Kaiser energy for each and demodulating the most active one in order to derive the signal's dominant AM-FM components. This modulation approach demonstrated an improved robustness in noise over the classic algorithm, reaching an average error reduction of 33.5% under 5-30-dB noise. Second, by incorporating alternative modulation energy features in voice activity detection, improvement in overall misclassification error of a high hit rate detector reached 7.5% and 9.5% on different benchmarks}, keywords = {}, pubstate = {published}, tppubtype = {article} } The ability to accurately locate the boundaries of speech activity is an important attribute of any modern speech recognition, processing, or transmission system. The effort in this paper is the development of efficient, sophisticated features for speech detection in noisy environments, using ideas and techniques from recent advances in speech modeling and analysis, like presence of modulations in speech formants, energy separation and multiband filtering. First we present a method, conceptually based on a classic speech-silence discrimination procedure, that uses some newly developed, short-time signal analysis tools and provide for it a detection theoretic motivation. The new energy and spectral content representations are derived through filtering the signal in various frequency bands, estimating the Teager-Kaiser energy for each and demodulating the most active one in order to derive the signal's dominant AM-FM components. This modulation approach demonstrated an improved robustness in noise over the classic algorithm, reaching an average error reduction of 33.5% under 5-30-dB noise. Second, by incorporating alternative modulation energy features in voice activity detection, improvement in overall misclassification error of a high hit rate detector reached 7.5% and 9.5% on different benchmarks |
Vassilis Pitsikalis, Petros Maragos Filtered dynamics and fractal dimensions for noisy speech recognition Journal Article IEEE Signal Processing Letters, 13 (11), pp. 711–714, 2006, ISSN: 10709908. Abstract | BibTeX | Links: [PDF] @article{123, title = {Filtered dynamics and fractal dimensions for noisy speech recognition}, author = {Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PitsikalisMaragos_FilteredDynamicsFractalDimensionsNoisySpeechRecognition_ieeeSPL2006.pdf}, doi = {10.1109/LSP.2006.879424}, issn = {10709908}, year = {2006}, date = {2006-01-01}, journal = {IEEE Signal Processing Letters}, volume = {13}, number = {11}, pages = {711--714}, abstract = {We explore methods from fractals and dynamical systems theory for robust processing and recognition of noisy speech. A speech signal is embedded in a multidimensional phase-space and is subsequently filtered exploiting aspects of its unfolded dynamics. Invariant measures (fractal dimensions) of the filtered signal are used as features in automatic speech recognition (ASR). We evaluate the new proposed features as well as the previously proposed multiscale fractal dimension via ASR experiments on the Aurora 2 database. The conducted experiments demonstrate relative improved word accuracy for the fractal features, especially at lower signal-to-noise ratio, when they are combined with the mel-frequency cepstral coefficients}, keywords = {}, pubstate = {published}, tppubtype = {article} } We explore methods from fractals and dynamical systems theory for robust processing and recognition of noisy speech. A speech signal is embedded in a multidimensional phase-space and is subsequently filtered exploiting aspects of its unfolded dynamics. Invariant measures (fractal dimensions) of the filtered signal are used as features in automatic speech recognition (ASR). We evaluate the new proposed features as well as the previously proposed multiscale fractal dimension via ASR experiments on the Aurora 2 database. The conducted experiments demonstrate relative improved word accuracy for the fractal features, especially at lower signal-to-noise ratio, when they are combined with the mel-frequency cepstral coefficients |
A Katsamanis, G Papandreou, V Pitsikalis, P Maragos Multimodal fusion by adaptive compensation for feature uncertainty with application to audiovisual speech recognition Conference Proc. 14th European Signal Processing Conference (EUSIPCO-2006), Florence, Italy, Sep. 2006, 2006, ISBN: 22195491 (ISSN). Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Katsamanis2006, title = {Multimodal fusion by adaptive compensation for feature uncertainty with application to audiovisual speech recognition}, author = { A Katsamanis and G Papandreou and V Pitsikalis and P Maragos}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84862631884&partnerID=40&md5=ccaeee023c42f0923a6dcdec81ac7fdc http://robotics.ntua.gr/wp-content/uploads/sites/2/KatsamanisPapandreouPitsikalisMaragos_MultimodalFusion-AdaptCompens-FeaturUncertain-AV-ASR_EUSIPCO2006.pdf}, isbn = {22195491 (ISSN)}, year = {2006}, date = {2006-09-01}, booktitle = {Proc. 14th European Signal Processing Conference (EUSIPCO-2006), Florence, Italy, Sep. 2006}, abstract = {In pattern recognition one usually relies on measuring a set of informative features to perform tasks such as classification. While the accuracy of feature measurements heavily depends on changing environmental conditions, studying the consequences of this fact has received relatively little attention to date. In this work we explicitly take into account uncertainty in feature measurements and we show in a rigorous probabilistic framework how the models used for classification should be adjusted to compensate for this effect. Our approach proves to be particularly fruitful in multimodal fusion scenarios, such as audio-visual speech recognition, where multiple streams of complementary features are integrated. For such applications, provided that an estimate of the measurement noise uncertainty for each feature stream is available, we show that the proposed framework leads to highly adaptive multimodal fusion rules which are widely applicable and easy to implement. We further show that previous multimodal fusion methods relying on stream weights fall under our scheme if certain assumptions hold; this provides novel insights into their applicability for various tasks and suggests new practical ways for estimating the stream weights adaptively. Preliminary experimental results in audio-visual speech recognition demonstrate the potential of our approach.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In pattern recognition one usually relies on measuring a set of informative features to perform tasks such as classification. While the accuracy of feature measurements heavily depends on changing environmental conditions, studying the consequences of this fact has received relatively little attention to date. In this work we explicitly take into account uncertainty in feature measurements and we show in a rigorous probabilistic framework how the models used for classification should be adjusted to compensate for this effect. Our approach proves to be particularly fruitful in multimodal fusion scenarios, such as audio-visual speech recognition, where multiple streams of complementary features are integrated. For such applications, provided that an estimate of the measurement noise uncertainty for each feature stream is available, we show that the proposed framework leads to highly adaptive multimodal fusion rules which are widely applicable and easy to implement. We further show that previous multimodal fusion methods relying on stream weights fall under our scheme if certain assumptions hold; this provides novel insights into their applicability for various tasks and suggests new practical ways for estimating the stream weights adaptively. Preliminary experimental results in audio-visual speech recognition demonstrate the potential of our approach. |
Nikos C. Mitsou, Spyros V. Velanas, Costas S. Tzafestas Visuo-haptic interface for teleoperation of mobile robot exploration tasks Conference Proceedings - IEEE International Workshop on Robot and Human Interactive Communication, 2006, ISSN: 1-4244-0565-3. Abstract | BibTeX | Links: [PDF] @conference{Mitsou2006, title = {Visuo-haptic interface for teleoperation of mobile robot exploration tasks}, author = { Nikos C. Mitsou and Spyros V. Velanas and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Visuo-Haptic_Interface_for_Teleoperation_of_Mobile_Robot_Exploration_Tasks.pdf}, doi = {10.1109/ROMAN.2006.314411}, issn = {1-4244-0565-3}, year = {2006}, date = {2006-09-01}, booktitle = {Proceedings - IEEE International Workshop on Robot and Human Interactive Communication}, pages = {157--163}, abstract = {With the spread of low-cost haptic devices, haptic interfaces appear in many areas in the field of robotics. Recently, haptic devices have been used in the field of mobile robot teleoperation, where mobile robots operate in unknown and dangerous environments performing particular tasks. Haptic feedback is shown to improve operator perception of the environment without, however, improving exploration time. In this paper, we present a haptic interface that is used to teleoperate a mobile robot in exploring polygonal environments. The proposed visuo-haptic interface is found to improve navigation time and operator perception of the remote environment. The human-operator can simultaneously select two different commands, the first one being set as "active" motion command, while the second one is set as a "guarded" motion type of navigation command. The user can feel a haptic equivalent for both types of teleguidance motion commands, and can also observe in real-time the sequential creation of the remote environment map. Comparative evaluation experiments show that the proposed system makes the task of remote navigation of unknown environments easier}, keywords = {}, pubstate = {published}, tppubtype = {conference} } With the spread of low-cost haptic devices, haptic interfaces appear in many areas in the field of robotics. Recently, haptic devices have been used in the field of mobile robot teleoperation, where mobile robots operate in unknown and dangerous environments performing particular tasks. Haptic feedback is shown to improve operator perception of the environment without, however, improving exploration time. In this paper, we present a haptic interface that is used to teleoperate a mobile robot in exploring polygonal environments. The proposed visuo-haptic interface is found to improve navigation time and operator perception of the remote environment. The human-operator can simultaneously select two different commands, the first one being set as "active" motion command, while the second one is set as a "guarded" motion type of navigation command. The user can feel a haptic equivalent for both types of teleguidance motion commands, and can also observe in real-time the sequential creation of the remote environment map. Comparative evaluation experiments show that the proposed system makes the task of remote navigation of unknown environments easier |
Iasonas Kokkinos, Petros Maragos, Alan Yuille Bottom-up & top-down object detection using primal sketch features and graphical models Conference Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2 (2), 2006, ISSN: 10636919. Abstract | BibTeX | Links: [PDF] @conference{227, title = {Bottom-up & top-down object detection using primal sketch features and graphical models}, author = { Iasonas Kokkinos and Petros Maragos and Alan Yuille}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KokkinosMaragosYuille_BU-TD-ObjectDetection-PrimalSketch-GraphModels_CVPR2006.pdf}, doi = {10.1109/CVPR.2006.74}, issn = {10636919}, year = {2006}, date = {2006-01-01}, booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, volume = {2}, number = {2}, pages = {1893--1900}, abstract = {A combination of techniques that is becoming increasingly popular is the construction of part-based object representations using the outputs of interest-point detectors. Our contributions in this paper are twofold: first, we propose a primal-sketch-based set of image tokens that are used for object representation and detection. Second, top-down information is introduced based on an efficient method for the evaluation of the likelihood of hypothesized part locations. This allows us to use graphical model techniques to complement bottom-up detection, by proposing and finding the parts of the object that were missed by the front-end feature detection stage. Detection results for four object categories validate the merits of this joint top-down and bottom-up approach.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } A combination of techniques that is becoming increasingly popular is the construction of part-based object representations using the outputs of interest-point detectors. Our contributions in this paper are twofold: first, we propose a primal-sketch-based set of image tokens that are used for object representation and detection. Second, top-down information is introduced based on an efficient method for the evaluation of the likelihood of hypothesized part locations. This allows us to use graphical model techniques to complement bottom-up detection, by proposing and finding the parts of the object that were missed by the front-end feature detection stage. Detection results for four object categories validate the merits of this joint top-down and bottom-up approach. |
S Lefkimmiatis, Dimitrios Dimitriadis, Petros Maragos, Stamatis Leukimmiatis, Dimitrios Dimitriadis, Petros Maragos An optimum microphone array post-filter for speech applications. Conference Interspeech, (2), 2006, ISBN: 9781604234497. Abstract | BibTeX | Links: [PDF] @conference{224, title = {An optimum microphone array post-filter for speech applications.}, author = { S Lefkimmiatis and Dimitrios Dimitriadis and Petros Maragos and Stamatis Leukimmiatis and Dimitrios Dimitriadis and Petros Maragos}, url = {http://www.math.ucla.edu/~stamatis/publications/icslp2006.pdf}, isbn = {9781604234497}, year = {2006}, date = {2006-01-01}, booktitle = {Interspeech}, number = {2}, pages = {1--4}, abstract = {Abstract This paper proposes a post-filtering estimation scheme for multichannel noise reduction. The proposed method extends and improves the existing Zelinski's and, the most general and prominent, McCowan's post-filtering methods that use the auto-and ...}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Abstract This paper proposes a post-filtering estimation scheme for multichannel noise reduction. The proposed method extends and improves the existing Zelinski's and, the most general and prominent, McCowan's post-filtering methods that use the auto-and ... |
Stamatis Leukimmiatis, Petros Maragos Optimum post-filter estimation for noise reduction in multichannel speech processing Conference European Signal Processing Conference, 2006, ISSN: 22195491. @conference{226, title = {Optimum post-filter estimation for noise reduction in multichannel speech processing}, author = { Stamatis Leukimmiatis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LeukimmiatisMaragos_OptimPostfilter-NoisReduction-MultichanSpeech_EUSIPCO2006.pdf}, issn = {22195491}, year = {2006}, date = {2006-01-01}, booktitle = {European Signal Processing Conference}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D Christopoulos I Kaklamanos G Bonatsos Κ. Birbas C. Tzafestas Virtual paracentesis simulator (DEX): impact of haptic interaction on the learning curve Conference 10th World Congress of Endoscopic Surgery and 14th International Congress of the European Association for Endoscopic Surgery (EAES), Β$epsilon$$rho$ο$łambda$ί$nu$ο, 13-16 Sept. 2006., 2006. @conference{47b, title = {Virtual paracentesis simulator (DEX): impact of haptic interaction on the learning curve}, author = { D Christopoulos I Kaklamanos G Bonatsos {Κ. Birbas C. Tzafestas}}, year = {2006}, date = {2006-01-01}, booktitle = {10th World Congress of Endoscopic Surgery and 14th International Congress of the European Association for Endoscopic Surgery (EAES), Β$epsilon$$rho$ο$łambda$ί$nu$ο, 13-16 Sept. 2006.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I Kaklamanos A Vezakis G Polymeneas G Bonatsos Κ. Birbas C. Tzafestas Spatial ability can predict laparoscopic skill performance of novice surgeons Conference 10th World Congress of Endoscopic Surgery and 14th International Congress of the European Association for Endoscopic Surgery (EAES), Β$epsilon$$rho$ο$łambda$ί$nu$ο, 13-16 Sept. 2006., 2006. @conference{46b, title = {Spatial ability can predict laparoscopic skill performance of novice surgeons}, author = { I Kaklamanos A Vezakis G Polymeneas G Bonatsos {Κ. Birbas C. Tzafestas}}, year = {2006}, date = {2006-01-01}, booktitle = {10th World Congress of Endoscopic Surgery and 14th International Congress of the European Association for Endoscopic Surgery (EAES), Β$epsilon$$rho$ο$łambda$ί$nu$ο, 13-16 Sept. 2006.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
S Mclaughlin, Petros Maragos Nonlinear methods for speech analysis and synthesis Book Chapter Advances in nonlinear signal and image processing, 6 , 2006. @inbook{Mclaughlin2006, title = {Nonlinear methods for speech analysis and synthesis}, author = {S Mclaughlin and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Nonlinear_methods_for_speech_analysis_and_synthesi.pdf}, year = {2006}, date = {2006-01-01}, booktitle = {Advances in nonlinear signal and image processing}, volume = {6}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
2005 |
A Sofou, G Evangelopoulos, P Maragos Soil image segmentation and texture analysis: a computer vision approach Journal Article IEEE Geoscience and Remote Sensing Letters, 2 (4), pp. 394-398, 2005, ISSN: 1545-598X. Abstract | BibTeX | Links: [PDF] @article{1522208, title = {Soil image segmentation and texture analysis: a computer vision approach}, author = {A Sofou and G Evangelopoulos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/SofouEvangelopoulosMaragos_SoilImSegmentTexturAnalCompVision_ieeeGRSL2005.pdf}, doi = {10.1109/LGRS.2005.851752}, issn = {1545-598X}, year = {2005}, date = {2005-10-01}, journal = {IEEE Geoscience and Remote Sensing Letters}, volume = {2}, number = {4}, pages = {394-398}, abstract = {Automated processing of digitized soilsection images reveals elements of soil structure and draws primary estimates of bioecological importance, like ground fertility and changes in terrestrial ecosystems. We examine a sophisticated integration of some modern methods from computer vision for image feature extraction, texture analysis, and segmentation into homogeneous regions, relevant to soil micromorphology. First, we propose the use of a morphological partial differential equation-based segmentation scheme based on seeded region-growing and level curve evolution with speed depending on image contrast. Second, we analyze surface texture information by modeling image variations as local modulation components and using multifrequency filtering and instantaneous nonlinear energy-tracking operators to estimate spatial modulation energy. By separately exploiting contrast and texture information, through multiscale image smoothing, we propose a joint image segmentation method for further interpretation of soil images and feature measurements. Our experimental results in images digitized under different specifications and scales demonstrate the efficacy of our proposed computational methods for soil structure analysis. We also briefly demonstrate their applicability to remote sensing images.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Automated processing of digitized soilsection images reveals elements of soil structure and draws primary estimates of bioecological importance, like ground fertility and changes in terrestrial ecosystems. We examine a sophisticated integration of some modern methods from computer vision for image feature extraction, texture analysis, and segmentation into homogeneous regions, relevant to soil micromorphology. First, we propose the use of a morphological partial differential equation-based segmentation scheme based on seeded region-growing and level curve evolution with speed depending on image contrast. Second, we analyze surface texture information by modeling image variations as local modulation components and using multifrequency filtering and instantaneous nonlinear energy-tracking operators to estimate spatial modulation energy. By separately exploiting contrast and texture information, through multiscale image smoothing, we propose a joint image segmentation method for further interpretation of soil images and feature measurements. Our experimental results in images digitized under different specifications and scales demonstrate the efficacy of our proposed computational methods for soil structure analysis. We also briefly demonstrate their applicability to remote sensing images. |
D Dimitriadis, P Maragos, A Potamianos Robust AM-FM features for speech recognition Journal Article IEEE Signal Processing Letters, 12 (9), pp. 621-624, 2005, ISSN: 1070-9908. Abstract | BibTeX | Links: [PDF] @article{1495427, title = {Robust AM-FM features for speech recognition}, author = {D Dimitriadis and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/DimitriadisMaragosPotamianos_RobustAMFM_Features_SpeechRecognition_ieeeSPL2005.pdf}, doi = {10.1109/LSP.2005.853050}, issn = {1070-9908}, year = {2005}, date = {2005-09-01}, journal = {IEEE Signal Processing Letters}, volume = {12}, number = {9}, pages = {621-624}, abstract = {In this letter, a nonlinear AM-FM speech model is used to extract robust features for speech recognition. The proposed features measure the amount of amplitude and frequency modulation that exists in speech resonances and attempt to model aspects of the speech acoustic information that the commonly used linear source-filter model fails to capture. The robustness and discriminability of the AM-FM features is investigated in combination with mel cepstrum coefficients (MFCCs). It is shown that these hybrid features perform well in the presence of noise, both in terms of phoneme-discrimination (J-measure) and in terms of speech recognition performance in several different tasks. Average relative error rate reduction up to 11% for clean and 46% for mismatched noisy conditions is achieved when AM-FM features are combined with MFCCs.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this letter, a nonlinear AM-FM speech model is used to extract robust features for speech recognition. The proposed features measure the amount of amplitude and frequency modulation that exists in speech resonances and attempt to model aspects of the speech acoustic information that the commonly used linear source-filter model fails to capture. The robustness and discriminability of the AM-FM features is investigated in combination with mel cepstrum coefficients (MFCCs). It is shown that these hybrid features perform well in the presence of noise, both in terms of phoneme-discrimination (J-measure) and in terms of speech recognition performance in several different tasks. Average relative error rate reduction up to 11% for clean and 46% for mismatched noisy conditions is achieved when AM-FM features are combined with MFCCs. |
N Palaiologou, C Tzafestas Training scenarios for students on virtual and remote robotic laboratory platforms Journal Article 2005. @article{4_50, title = {Training scenarios for students on virtual and remote robotic laboratory platforms}, author = {N Palaiologou and C Tzafestas}, url = {https://imanagerpublications.com/assets/htmlfiles/JFET()957.html }, year = {2005}, date = {2005-01-01}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Iasonas Kokkino, Petros Maragos Nonlinear Speech Analysis Using Models for Chaotic Systems Journal Article IEEE Transactions on Speech and Audio Processing, 13 (6), pp. 1098–1109, 2005. @article{78c, title = {Nonlinear Speech Analysis Using Models for Chaotic Systems}, author = {Iasonas Kokkino and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KokkinosMaragos_NonlinSpeechAnalysChaoticSystems_ieeetSAP2005.pdf}, year = {2005}, date = {2005-01-01}, journal = {IEEE Transactions on Speech and Audio Processing}, volume = {13}, number = {6}, pages = {1098--1109}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P Maragos Lattice Image Precessing: A Unification of Morphological and Fuzzy Algebric Systems Journal Article Journal of Mathematical Imaging and Vision, 22 (0), pp. 333–353, 2005. @article{119, title = {Lattice Image Precessing: A Unification of Morphological and Fuzzy Algebric Systems}, author = {P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Maragos_LatImProcUnifMorfFuzAlgSyst_JMIV2005.pdf}, year = {2005}, date = {2005-01-01}, journal = {Journal of Mathematical Imaging and Vision}, volume = {22}, number = {0}, pages = {333--353}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Alexandros G Dimakis, Petros Maragos Phase-modulated resonances modeled as self-similar processes with application to turbulent sounds Journal Article IEEE Transactions on Signal Processing, 53 (11), pp. 4261–4272, 2005, ISSN: 1053587X. Abstract | BibTeX | Links: [PDF] @article{120, title = {Phase-modulated resonances modeled as self-similar processes with application to turbulent sounds}, author = {Alexandros G Dimakis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimakisMaragos_PhaseModulResonSelfSimTurbulSound_ieeetSP2005.pdf}, doi = {10.1109/TSP.2005.857010}, issn = {1053587X}, year = {2005}, date = {2005-01-01}, journal = {IEEE Transactions on Signal Processing}, volume = {53}, number = {11}, pages = {4261--4272}, abstract = {In this paper, we propose a nonlinear stochastic model for time-varying resonances where the instantaneous phase (and frequency) of a sinusoidal oscillation is allowed to vary proportionally to an α-stable self-similar stochastic processes. The main motivation of our work stems from previous experimental and theoretical evidence that speech resonances in fricative sounds can be modeled phenomenologically as AM-FM signals with randomly varying instantaneous frequencies and that several signal classes related to turbulent phenomena are self-similar 1/f processes. Our general approach is to model the instantaneous phase of an AM-FM resonance as a self-similar α-stable process. As a special case, this random phase model includes the class of random fractal signals known as fractional Brownian motion. We theoretically explore this random modulation model and analytically derive its autocorrelation and power spectrum. We also propose an algorithm to fit this model to arbitrary resonances with random phase modulation. Further, we apply the above ideas to real speech data and demonstrate that this model is suitable for resonances of fricative sounds.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper, we propose a nonlinear stochastic model for time-varying resonances where the instantaneous phase (and frequency) of a sinusoidal oscillation is allowed to vary proportionally to an α-stable self-similar stochastic processes. The main motivation of our work stems from previous experimental and theoretical evidence that speech resonances in fricative sounds can be modeled phenomenologically as AM-FM signals with randomly varying instantaneous frequencies and that several signal classes related to turbulent phenomena are self-similar 1/f processes. Our general approach is to model the instantaneous phase of an AM-FM resonance as a self-similar α-stable process. As a special case, this random phase model includes the class of random fractal signals known as fractional Brownian motion. We theoretically explore this random modulation model and analytically derive its autocorrelation and power spectrum. We also propose an algorithm to fit this model to arbitrary resonances with random phase modulation. Further, we apply the above ideas to real speech data and demonstrate that this model is suitable for resonances of fricative sounds. |
D Dimitriadis, A Katsamanis, P Maragos Towards Automatic Speech Recognition In Adverse Environments Conference Proc. HERCMA 2005 -- 7th Hellenic European Conference on Research on Computer Mathematics and its Applications, Athens, Greece, September 2005, 2005. @conference{Dimitriadis2005, title = {Towards Automatic Speech Recognition In Adverse Environments}, author = { D Dimitriadis and A Katsamanis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/DKMPP_AutomaticSpeechRecognitionAdverseEnvironments_HERCMA2005_footer.pdf}, year = {2005}, date = {2005-09-01}, booktitle = {Proc. HERCMA 2005 -- 7th Hellenic European Conference on Research on Computer Mathematics and its Applications, Athens, Greece, September 2005}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos A Variational Formulation of PDE's for Dilations and Levelings Conference Proc. Intl Symposium on Mathematical Morphology (ISMM 2005), Paris, France, Apr. 2005. Lecture Notes on Computer Science, Springer-Verlag, 2005. @conference{Maragos2005b, title = {A Variational Formulation of PDE's for Dilations and Levelings}, author = { P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/maragos2005.pdf}, year = {2005}, date = {2005-04-01}, booktitle = {Proc. Intl Symposium on Mathematical Morphology (ISMM 2005), Paris, France, Apr. 2005. Lecture Notes on Computer Science, Springer-Verlag}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
K Birbas C. Tzafestas Haptic Interaction in Simulating the Paracentesis of Great Vessels for Clinical Skill Enhancement Conference WorldHaptics'2005, First Joint Eurohaptics Conference and Symposium on Haptic Interfaces for Virtual Environment and Teleoperator Systems, Pisa, Italy March 18-20, 2005, 2005. @conference{52b, title = {Haptic Interaction in Simulating the Paracentesis of Great Vessels for Clinical Skill Enhancement}, author = { K Birbas {C. Tzafestas}}, year = {2005}, date = {2005-01-01}, booktitle = {WorldHaptics'2005, First Joint Eurohaptics Conference and Symposium on Haptic Interfaces for Virtual Environment and Teleoperator Systems, Pisa, Italy March 18-20, 2005}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Dimitrios Dimitriadis, Petros Maragos, Alexandros Potamianos Auditory Teager Energy Cepstrum Coefficients for Robust Speech Recognition Conference Proc. of European Speech Processing Conference, (2), 2005. Abstract | BibTeX | Links: [PDF] @conference{233, title = {Auditory Teager Energy Cepstrum Coefficients for Robust Speech Recognition}, author = { Dimitrios Dimitriadis and Petros Maragos and Alexandros Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMaragosPotamianos_AuditTeagEnergCepstrumRobustSpeechRecogn_Interspeech2005.pdf}, year = {2005}, date = {2005-01-01}, booktitle = {Proc. of European Speech Processing Conference}, number = {2}, pages = {3013--3016}, abstract = {In this paper, a feature extraction algorithm for robust speech recognition is introduced. The feature extraction algorithm is motivated by the human auditory processing and the nonlinear Teager-Kaiser energy operator that estimates the true energy of the source of a resonance. The proposed features are labeled as Teager Energy Cepstrum Coefficients (TECCs). TECCs are computed by first filtering the speech signal through a dense non constant-Q Gammatone filterbank and then by estimating the " true " energy of the signal's source, i.e., the short-time average of the output of the Teager-Kaiser energy operator. Error anal-ysis and speech recognition experiments show that the TECCs and the mel frequency cepstrum coefficients (MFCCs) perform similarly for clean recording conditions; while the TECCs per-form significantly better than the MFCCs for noisy recognition tasks. Specifically, relative word error rate improvement of 60% over the MFCC baseline is shown for the Aurora-3 database for the high-mismatch condition. Absolute error rate improvement ranging from 5% to 20% is shown for a phone recognition task in (various types of additive) noise.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, a feature extraction algorithm for robust speech recognition is introduced. The feature extraction algorithm is motivated by the human auditory processing and the nonlinear Teager-Kaiser energy operator that estimates the true energy of the source of a resonance. The proposed features are labeled as Teager Energy Cepstrum Coefficients (TECCs). TECCs are computed by first filtering the speech signal through a dense non constant-Q Gammatone filterbank and then by estimating the " true " energy of the signal's source, i.e., the short-time average of the output of the Teager-Kaiser energy operator. Error anal-ysis and speech recognition experiments show that the TECCs and the mel frequency cepstrum coefficients (MFCCs) perform similarly for clean recording conditions; while the TECCs per-form significantly better than the MFCCs for noisy recognition tasks. Specifically, relative word error rate improvement of 60% over the MFCC baseline is shown for the Aurora-3 database for the high-mismatch condition. Absolute error rate improvement ranging from 5% to 20% is shown for a phone recognition task in (various types of additive) noise. |
G Evangelopoulos, P Maragos Multiband Modulation Energy Features for Voice Event Detection Conference Proc. Interspeech 2005 - Eurospeech -- 9th European Conference on Speech Communication and Technology, Lisbon, Portugal, September 2005, 2005. @conference{235, title = {Multiband Modulation Energy Features for Voice Event Detection}, author = { G Evangelopoulos and P Maragos}, year = {2005}, date = {2005-01-01}, booktitle = {Proc. Interspeech 2005 - Eurospeech -- 9th European Conference on Speech Communication and Technology, Lisbon, Portugal, September 2005}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Georgios Evangelopoulos, Iasonas Kokkinos, Petros Maragos Advances in variational image segmentation using AM-FM models: Regularized demodulation and probabilistic cue integration Conference Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 3752 LNCS , 2005, ISSN: 03029743. Abstract | BibTeX | Links: [PDF] @conference{229, title = {Advances in variational image segmentation using AM-FM models: Regularized demodulation and probabilistic cue integration}, author = { Georgios Evangelopoulos and Iasonas Kokkinos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/EvangelopoulosKokkinosMaragos_Advances-VariationalImagSegm-AMFM_VLSM2005.pdf}, doi = {10.1007/11567646_11}, issn = {03029743}, year = {2005}, date = {2005-01-01}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {3752 LNCS}, pages = {121--136}, abstract = {Current state-of-the-art methods in variational image segmentation using level set methods are able to robustly segment complex textured images in an unsupervised manner. In recent work, [18,19] we have explored the potential of AM-FM features for driving the unsupervised segmentation of a wide variety of textured images. Our first contribution in this work is at the feature extraction level, where we introduce a regularized approach to the demodulation of the AM-FM -modelled signals. By replacing the cascade of multiband filtering and subsequent differentiation with analytically derived equivalent filtering operations, increased noise-robustness can be achieved, while discretization problems in the implementation of the demodulation algorithm are alleviated. Our second contribution is based on a generative model we have recently proposed [18,20] that offers a measure related to the local prominence of a specific class of features, like edges and textures. The introduction of these measures as weighting terms in the evolution equations facilitates the fusion of different cues in a simple and efficient manner. Our systematic evaluation on the Berkeley segmentation benchmark demonstrates that this fusion method offers improved results when compared to our previous work as well as current state-of-the-art methods.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Current state-of-the-art methods in variational image segmentation using level set methods are able to robustly segment complex textured images in an unsupervised manner. In recent work, [18,19] we have explored the potential of AM-FM features for driving the unsupervised segmentation of a wide variety of textured images. Our first contribution in this work is at the feature extraction level, where we introduce a regularized approach to the demodulation of the AM-FM -modelled signals. By replacing the cascade of multiband filtering and subsequent differentiation with analytically derived equivalent filtering operations, increased noise-robustness can be achieved, while discretization problems in the implementation of the demodulation algorithm are alleviated. Our second contribution is based on a generative model we have recently proposed [18,20] that offers a measure related to the local prominence of a specific class of features, like edges and textures. The introduction of these measures as weighting terms in the evolution equations facilitates the fusion of different cues in a simple and efficient manner. Our systematic evaluation on the Berkeley segmentation benchmark demonstrates that this fusion method offers improved results when compared to our previous work as well as current state-of-the-art methods. |
John N. Karigiannis, Costas S. Tzafestas Multi-agent architecture with continuous reinforcement learning in fuzzy state-space for robot manipulation control Conference International Conference on Integrated Modeling and Analysis in Applied Control and Automation, IMAACA 2005, Held at the International Mediterranean Modeling Multiconference, I3M 2005, 2005. Abstract | BibTeX | Links: [Webpage] @conference{50b, title = {Multi-agent architecture with continuous reinforcement learning in fuzzy state-space for robot manipulation control}, author = { John N. Karigiannis and Costas S. Tzafestas}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84899464150&partnerID=tZOtx3y1}, year = {2005}, date = {2005-01-01}, booktitle = {International Conference on Integrated Modeling and Analysis in Applied Control and Automation, IMAACA 2005, Held at the International Mediterranean Modeling Multiconference, I3M 2005}, pages = {31--39}, abstract = {In the context of multi-agent systems, we are proposing a distributed robot control architecture that comprises artificial intelligence (AI) techniques and traditional control methodologies, based on the realization of a learning team of agents in a continuous problem setting. In a multi-agent system, action selection is important for cooperation and coordination among the agents. By employing reinforcement learning (RL) methods in a fuzzified state-space, we accomplish to design a control methodology, engaged in a continuous space, which enables the agents to learn, over a period of time, to perform sequences of continuous actions in a cooperative manner, in order to reach their goal without any prior knowledge of the task. The agents, corresponding to independent degrees of freedom of the system, achieve to gain experience over the task that they collaboratively perform, by exploring and exploiting their state-to-action mapping space. Although the first set of experiments for evaluating our methodology has been performed on a simulated planar 2 degrees of freedom (dof) manipulator, we anticipate that such an approach is highly scalable to the control of robotic systems that are kinematically more complex, comprising multiple dofs and potentially redundancies in open or closed kinematic chains, particularly dexterous manipulators.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In the context of multi-agent systems, we are proposing a distributed robot control architecture that comprises artificial intelligence (AI) techniques and traditional control methodologies, based on the realization of a learning team of agents in a continuous problem setting. In a multi-agent system, action selection is important for cooperation and coordination among the agents. By employing reinforcement learning (RL) methods in a fuzzified state-space, we accomplish to design a control methodology, engaged in a continuous space, which enables the agents to learn, over a period of time, to perform sequences of continuous actions in a cooperative manner, in order to reach their goal without any prior knowledge of the task. The agents, corresponding to independent degrees of freedom of the system, achieve to gain experience over the task that they collaboratively perform, by exploring and exploiting their state-to-action mapping space. Although the first set of experiments for evaluating our methodology has been performed on a simulated planar 2 degrees of freedom (dof) manipulator, we anticipate that such an approach is highly scalable to the control of robotic systems that are kinematically more complex, comprising multiple dofs and potentially redundancies in open or closed kinematic chains, particularly dexterous manipulators. |
Athanassios Katsamanis, Petros Maragos Advances in Statistical Estimation and Tracking of AM-FM Speech Components Conference Computer Engineering, 2005. Abstract | BibTeX | Links: [PDF] @conference{Katsamanis2005, title = {Advances in Statistical Estimation and Tracking of AM-FM Speech Components}, author = { Athanassios Katsamanis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/KatsamanisMaragos_SpeechAM-FMStatisticalTracking_IS2005.pdf}, year = {2005}, date = {2005-01-01}, booktitle = {Computer Engineering}, pages = {1125--1128}, abstract = {In this paper we present two extensions of a statistical framework to demodulate speech resonances, which are modeled as AM-FM signals. The first approach utilizes bandpass filtering and a standard demodulation algorithm which regularizes instantaneous amplitude and frequency estimates. The second employs particle filtering techniques to allow temporal variations of the parameters that are connected with spectral characteristics of the analyzed signal. Results are presented on both synthetic and real speech signals and improved performance is demonstrated. Both approaches appear to cope quite satisfactorily with the nonstationarity of speech signals.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we present two extensions of a statistical framework to demodulate speech resonances, which are modeled as AM-FM signals. The first approach utilizes bandpass filtering and a standard demodulation algorithm which regularizes instantaneous amplitude and frequency estimates. The second employs particle filtering techniques to allow temporal variations of the parameters that are connected with spectral characteristics of the analyzed signal. Results are presented on both synthetic and real speech signals and improved performance is demonstrated. Both approaches appear to cope quite satisfactorily with the nonstationarity of speech signals. |
Iasonas Kokkinos, Petros Maragos An Expectation Maximization Approach to the Synergy between Object Categorization and Image Segmentation Conference Iccv, 2005. @conference{228, title = {An Expectation Maximization Approach to the Synergy between Object Categorization and Image Segmentation}, author = { Iasonas Kokkinos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KokkinosMaragos_EM-Synergy-ImSegm-ObjectDetect_ICCV05.pdf}, year = {2005}, date = {2005-01-01}, booktitle = {Iccv}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
George Papandreou Image Denoising in Nonlinear Scale − Spaces : Automatic Scale Selection Via Cross − Validation Conference Proceedings of International Conference on Image Processing (ICIP-2005), Genoa, Italy, Sep. 2005, 1 (September), 2005. Abstract | BibTeX | Links: [PDF] @conference{232, title = {Image Denoising in Nonlinear Scale − Spaces : Automatic Scale Selection Via Cross − Validation}, author = { George Papandreou}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1529792%5Cnhttp://cvsp.cs.ntua.gr/publications/confr/PapandreouMaragos_ScaleSpaceScaleSelection_icip05.pdf}, year = {2005}, date = {2005-01-01}, booktitle = {Proceedings of International Conference on Image Processing (ICIP-2005), Genoa, Italy, Sep. 2005}, volume = {1}, number = {September}, pages = {6}, abstract = {Multiscale, i.e. scale-space image analysis is a powerful frame- work for many image processing tasks. A fundamental issue with such scale-space techniques is the automatic selection of the most salient scale for a particular application. This paper considers op- timal scale selection when nonlinear diffusion and morphologi- cal scale-spaces are utilized for image denoising. The problem is studied from a statistical model selection viewpoint and cross- validation techniques are utilized to address it in a principled way. The proposed novel algorithms do not require knowledge of the noise variance, have acceptable computational cost and are read- ily integrated with a wide class of scale-space inducing processes which require setting of a scale parameter. Our experiments show that this methodology leads to robust algorithms, which outper- form existing scale-selection techniques for a wide range of noise types and noise levels.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Multiscale, i.e. scale-space image analysis is a powerful frame- work for many image processing tasks. A fundamental issue with such scale-space techniques is the automatic selection of the most salient scale for a particular application. This paper considers op- timal scale selection when nonlinear diffusion and morphologi- cal scale-spaces are utilized for image denoising. The problem is studied from a statistical model selection viewpoint and cross- validation techniques are utilized to address it in a principled way. The proposed novel algorithms do not require knowledge of the noise variance, have acceptable computational cost and are read- ily integrated with a wide class of scale-space inducing processes which require setting of a scale parameter. Our experiments show that this methodology leads to robust algorithms, which outper- form existing scale-selection techniques for a wide range of noise types and noise levels. |
George Papandreou, Petros Maragos A cross-validatory statistical approach to scale selection for image denoising by nonlinear diffusion Conference Proceedings - 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, CVPR 2005, I , 2005, ISSN: 1063-6919. Abstract | BibTeX | Links: [PDF] @conference{236, title = {A cross-validatory statistical approach to scale selection for image denoising by nonlinear diffusion}, author = { George Papandreou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PapandreouMaragos_DiffusionStoppingTime_cvpr05.pdf}, doi = {10.1109/CVPR.2005.21}, issn = {1063-6919}, year = {2005}, date = {2005-01-01}, booktitle = {Proceedings - 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, CVPR 2005}, volume = {I}, pages = {625--630}, abstract = {Scale-spaces induced by diffusion processes play an important role in many computer vision tasks. Automatically selecting the most appropriate scale for a particular problem is a central issue for the practical applicability of such scale-space techniques. This paper concentrates on automatic scale selection when nonlinear diffusion scale-spaces are utilized for image denoising. The problem is studied in a statistical model selection framework and cross-validation techniques are utilized to address it in a principled way. The proposed novel algorithms do not require knowledge of the noise variance and have acceptable computational cost. Extensive experiments on natural images show that the proposed methodology leads to robust algorithms, which outperform existing techniques for a wide range of noise types and noise levels.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Scale-spaces induced by diffusion processes play an important role in many computer vision tasks. Automatically selecting the most appropriate scale for a particular problem is a central issue for the practical applicability of such scale-space techniques. This paper concentrates on automatic scale selection when nonlinear diffusion scale-spaces are utilized for image denoising. The problem is studied in a statistical model selection framework and cross-validation techniques are utilized to address it in a principled way. The proposed novel algorithms do not require knowledge of the noise variance and have acceptable computational cost. Extensive experiments on natural images show that the proposed methodology leads to robust algorithms, which outperform existing techniques for a wide range of noise types and noise levels. |
Anastasia Sofou, Georgios Evangelopoulos, Petros Maragos Coupled geometric and texture PDE-based segmentation Conference Proceedings - International Conference on Image Processing, ICIP, 2 , 2005, ISSN: 15224880. @conference{231, title = {Coupled geometric and texture PDE-based segmentation}, author = { Anastasia Sofou and Georgios Evangelopoulos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SofouEvangelopoulosMaragos_CoupledGeomTextSegmPDE_ICIP2005.pdf}, doi = {10.1109/ICIP.2005.1530139}, issn = {15224880}, year = {2005}, date = {2005-01-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2}, pages = {650--653}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
CS Tzafestas, N Palalologou, M Alifragis Experimental evaluation and pilot assessment study of a virtual and remote laboratory on robotic manipulation Conference IEEE International Symposium on Industrial Electronics (ISIE'05), p.1677-1683, Dubrovnik, Croatia, Jun 20-23, 2005, 2005, ISBN: 0-7803-8738-4. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas2005, title = {Experimental evaluation and pilot assessment study of a virtual and remote laboratory on robotic manipulation}, author = { CS Tzafestas and N Palalologou and M Alifragis}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/ISIE2005_Tzafestas_Palaiologou.pdf}, isbn = {0-7803-8738-4}, year = {2005}, date = {2005-01-01}, booktitle = {IEEE International Symposium on Industrial Electronics (ISIE'05), p.1677-1683, Dubrovnik, Croatia, Jun 20-23, 2005}, pages = {1677--1683}, abstract = {Practical training in experimental laboratory scenarios is indeed of great importance since mere lecturing is not sufficient enough to complete students' education in many engineering disciplines. Synchronous and asynchronous distance learning platforms have many advantages such as attending courses from a distance (e.g. in virtual classroom environments). However remote “elaboratory” systems are just now beginning to develop. In this paper, the development of a “virtual and remote laboratory platform” in the field of robotics and the methodology of its experimental evaluation are discussed. In the past, in our prior work [10], a first pilot experimental study was conducted according to a special evaluation protocol, in order to evaluate system performance regarding remotely training students to program robot manipulation tasks using the robot's Teach Pendant. The results of the first pilot study are encouraging enough. In this paper, we are focusing on the methodology of the evaluation protocol and discuss ways to extend this study amongst three groups: group-I trained the “classical way” on the real robot, group-II (remote) trained remotely on the graphical user interface of the remote laboratory platform, and group-III (virtual) also trained on the user interface, but using only the “virtual robot” functionalities of the platform with no remote real robot connection on the loop. Initial results are showing the need for developing real training scenarios in the frame of remote laboratory education aiming to achieve effective learning schemes for students in the engineering field.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Practical training in experimental laboratory scenarios is indeed of great importance since mere lecturing is not sufficient enough to complete students' education in many engineering disciplines. Synchronous and asynchronous distance learning platforms have many advantages such as attending courses from a distance (e.g. in virtual classroom environments). However remote “elaboratory” systems are just now beginning to develop. In this paper, the development of a “virtual and remote laboratory platform” in the field of robotics and the methodology of its experimental evaluation are discussed. In the past, in our prior work [10], a first pilot experimental study was conducted according to a special evaluation protocol, in order to evaluate system performance regarding remotely training students to program robot manipulation tasks using the robot's Teach Pendant. The results of the first pilot study are encouraging enough. In this paper, we are focusing on the methodology of the evaluation protocol and discuss ways to extend this study amongst three groups: group-I trained the “classical way” on the real robot, group-II (remote) trained remotely on the graphical user interface of the remote laboratory platform, and group-III (virtual) also trained on the user interface, but using only the “virtual robot” functionalities of the platform with no remote real robot connection on the loop. Initial results are showing the need for developing real training scenarios in the frame of remote laboratory education aiming to achieve effective learning schemes for students in the engineering field. |
Frederic Guichard, Petros Maragos, Jean-Michel Morel Partial Differential Equations for Morphological Operators Book Chapter Bilodeau, Michel; Meyer, Fernand; Schmitt, Michel (Ed.): Space, Structure and Randomness: Contributions in Honor of Georges Matheron in the Field of Geostatistics, Random Sets and Mathematical Morphology, pp. 369–390, Springer New York, New York, NY, 2005, ISBN: 978-0-387-29115-4. Abstract | BibTeX | Links: [Webpage] [PDF] @inbook{Guichard2005b, title = {Partial Differential Equations for Morphological Operators}, author = {Frederic Guichard and Petros Maragos and Jean-Michel Morel}, editor = {Michel Bilodeau and Fernand Meyer and Michel Schmitt}, url = {https://doi.org/10.1007/0-387-29115-6_15 http://robotics.ntua.gr/wp-content/uploads/sites/2/GuichardMaragos_2005.pdf}, doi = {10.1007/0-387-29115-6_15}, isbn = {978-0-387-29115-4}, year = {2005}, date = {2005-01-01}, booktitle = {Space, Structure and Randomness: Contributions in Honor of Georges Matheron in the Field of Geostatistics, Random Sets and Mathematical Morphology}, pages = {369--390}, publisher = {Springer New York}, address = {New York, NY}, abstract = {Two of G. Matheron's seminal contributions have been his development of size distributions (else called `granulometries') and his kernel representation theory. The first deals with semigroups of multiscale openings and closings of binary images (shapes) by compact convex sets, a basic ingredient of which are the multiscale Minkowski dilations and erosions. The second deals with representing increasing and translation-invariant set operators as union of erosions by its kernel sets or as an intersection of dilations.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } Two of G. Matheron's seminal contributions have been his development of size distributions (else called `granulometries') and his kernel representation theory. The first deals with semigroups of multiscale openings and closings of binary images (shapes) by compact convex sets, a basic ingredient of which are the multiscale Minkowski dilations and erosions. The second deals with representing increasing and translation-invariant set operators as union of erosions by its kernel sets or as an intersection of dilations. |
P Maragos Morphological Filtering for Image Enhancement and Feature Detection Book Chapter Bovik, A C (Ed.): Image and Video Processing Handbook (Second Edition), pp. 135-156, Academic Press, 2nd, 2005. @inbook{Maragos2005, title = {Morphological Filtering for Image Enhancement and Feature Detection}, author = {P Maragos}, editor = {A C Bovik}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2005_Maragos_MFIEFD_ImVidProc-Handbook2_chap3-3_AcadPress.pdf}, doi = {https://doi.org/10.1016/B978-012119792-6/50072-3}, year = {2005}, date = {2005-01-01}, booktitle = {Image and Video Processing Handbook (Second Edition)}, pages = {135-156}, publisher = {Academic Press}, edition = {2nd}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
P Maragos Partial Differential Equations for Morphological Scale-Spaces and Eikonal Applications Book Chapter Bovik, A C (Ed.): Image and Video Processing Handbook (Second Edition), pp. 587-612, Academic Press, 2nd, 2005. @inbook{Maragos2005b, title = {Partial Differential Equations for Morphological Scale-Spaces and Eikonal Applications}, author = {P Maragos}, editor = {A C Bovik}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2005_Maragos_PDEMSEA_ImVidProc-Handbook2_chap4-16_AcadPress.pdf}, doi = {https://doi.org/10.1016/B978-012119792-6/50098-X}, year = {2005}, date = {2005-01-01}, booktitle = {Image and Video Processing Handbook (Second Edition)}, pages = {587-612}, publisher = {Academic Press}, edition = {2nd}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
2004 |
Petros Maragos, Anastasia Sofou, Giorgos B Stamou, Vassilis Tzouvaras, Efimia Papatheodorou, George P Stamou Image analysis of soil micromorphology: Feature extraction, segmentation, and quality inference Journal Article Eurasip Journal on Applied Signal Processing, 2004 (6), pp. 902–912, 2004, ISSN: 11108657. Abstract | BibTeX | Links: [PDF] @article{118, title = {Image analysis of soil micromorphology: Feature extraction, segmentation, and quality inference}, author = {Petros Maragos and Anastasia Sofou and Giorgos B Stamou and Vassilis Tzouvaras and Efimia Papatheodorou and George P Stamou}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/2004_Maragos-et-al_ImageAnalSoilMicromorphologyFeatureExtractSegmentQualityInfer_jasp.pdf}, doi = {10.1155/S1110865704402054}, issn = {11108657}, year = {2004}, date = {2004-01-01}, journal = {Eurasip Journal on Applied Signal Processing}, volume = {2004}, number = {6}, pages = {902--912}, abstract = {We present an automated system that we have developed for estimation of the bioecological quality of soils using various image analysis methodologies. Its goal is to analyze soilsection images, extract features related to their micromorphology, and relate the visual features to various degrees of soil fertility inferred from biochemical characteristics of the soil. The image methodologies used range from low-level image processing tasks, such as nonlinear enhancement, multiscale analysis, geometric feature detection, and size distributions, to object-oriented analysis, such as segmentation, region texture, and shape analysis.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We present an automated system that we have developed for estimation of the bioecological quality of soils using various image analysis methodologies. Its goal is to analyze soilsection images, extract features related to their micromorphology, and relate the visual features to various degrees of soil fertility inferred from biochemical characteristics of the soil. The image methodologies used range from low-level image processing tasks, such as nonlinear enhancement, multiscale analysis, geometric feature detection, and size distributions, to object-oriented analysis, such as segmentation, region texture, and shape analysis. |
C S Tzafestas, Y Koumpouros, K Birbas Paracentesis modeling and VR-based interactive simulation with haptic display for clinical skill training and assessment Conference in Proceedings of The International Conference on Integrated Modeling & Analysis in Applied Control & Automation (IMAACA'2004), Special Session on: Virtual Reality and Haptics for Modeling, Design and Interactive Simulation. I3M: International Mediterranean Modeling Multiconference, Genoa, Italy, October 28-31 2004, 2004. @conference{Tzafestas2003, title = {Paracentesis modeling and VR-based interactive simulation with haptic display for clinical skill training and assessment}, author = { C S Tzafestas and Y Koumpouros and K Birbas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/IMAACA_Haptics02_final.pdf}, year = {2004}, date = {2004-10-01}, booktitle = {in Proceedings of The International Conference on Integrated Modeling & Analysis in Applied Control & Automation (IMAACA'2004), Special Session on: Virtual Reality and Haptics for Modeling, Design and Interactive Simulation. I3M: International Mediterranean Modeling Multiconference, Genoa, Italy, October 28-31 2004}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I. Kokkinos, G. Evangelopoidos, P. Maragos Modulation-feature based textured image segmentation using curve evolution Conference 2004 International Conference on Image Processing, 2004. ICIP '04., 2 , 2004, ISSN: 15224880. BibTeX | Links: [Webpage] [PDF] @conference{Kokkinos2004, title = {Modulation-feature based textured image segmentation using curve evolution}, author = { I. Kokkinos and G. Evangelopoidos and P. Maragos}, url = {http://ieeexplore.ieee.org/document/1419520/ http://robotics.ntua.gr/wp-content/uploads/sites/2/modulationfeature-based-textured-image-segmentation-using-curve-.pdf}, doi = {10.1109/ICIP.2004.1419520}, issn = {15224880}, year = {2004}, date = {2004-10-01}, booktitle = {2004 International Conference on Image Processing, 2004. ICIP '04.}, volume = {2}, pages = {1201--1204}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
X.C. Halkias, P. Maragos Analysis of kirlian images: feature extraction and segmentation Conference Proceedings 7th International Conference on Signal Processing, 2004. Proceedings. ICSP '04. 2004., 1 , 2004, ISBN: 0-7803-8406-7. BibTeX | Links: [Webpage] [PDF] @conference{Halkias2004, title = {Analysis of kirlian images: feature extraction and segmentation}, author = { X.C. Halkias and P. Maragos}, url = {http://ieeexplore.ieee.org/document/1452775/ http://robotics.ntua.gr/wp-content/uploads/sites/2/analysis-of-kirlian-images-feature-extraction-and-segmentation.pdf}, doi = {10.1109/ICOSP.2004.1452775}, isbn = {0-7803-8406-7}, year = {2004}, date = {2004-08-31}, booktitle = {Proceedings 7th International Conference on Signal Processing, 2004. Proceedings. ICSP '04. 2004.}, volume = {1}, pages = {765--768}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G. Papandreou, P. Maragos A fast multigrid implicit algorithm for the evolution of geodesic active contours Conference Proceedings of the 2004 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2004. CVPR 2004., 2 , 2004, ISSN: 1063-6919. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Papandreou2004, title = {A fast multigrid implicit algorithm for the evolution of geodesic active contours}, author = { G. Papandreou and P. Maragos}, url = {http://ieeexplore.ieee.org/document/1315231/ http://robotics.ntua.gr/wp-content/uploads/sites/2/PapandreouMaragos_MultigridGAC_CVPR04.pdf}, doi = {10.1109/CVPR.2004.1315231}, issn = {1063-6919}, year = {2004}, date = {2004-06-01}, booktitle = {Proceedings of the 2004 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2004. CVPR 2004.}, volume = {2}, pages = {689--694}, abstract = {Poster Session 5: Statistical Methods}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Poster Session 5: Statistical Methods |
C. S. Tzafestas, Y. Koumpouros, K. Birbas Haptic interaction in VR-based paracentesis simulation for dexterity enhancement and assessment Conference International Congress Series, 1268 (C), 2004, ISSN: 05315131. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas2004b, title = {Haptic interaction in VR-based paracentesis simulation for dexterity enhancement and assessment}, author = { C. S. Tzafestas and Y. Koumpouros and K. Birbas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Haptic_Tzafestas_2004.pdf}, doi = {10.1016/j.ics.2004.03.254}, issn = {05315131}, year = {2004}, date = {2004-06-01}, booktitle = {International Congress Series}, volume = {1268}, number = {C}, pages = {413--418}, abstract = {In this paper, we describe the development of an interactive virtual reality (VR) system that aims to realistically simulate specific paracentesis clinical procedures (particularly the procedure involved in the catheterization of the subclavian vein). A simplified elasto-static finite-element model is used for the physically based simulation of the deformable tissues, particularly skin deflection during needle insertion. The VR-based simulation is being coupled with a haptic feedback device to provide to the user realistic feeling of the interaction forces applied during the simulated paracentesis procedure. The system described is developed in the frames of a research project aiming to develop a larger-scale virtual environment simulator of emergency room (ER) scenarios and protocols for clinical skill training and assessment. textcopyright 2004, CARS and Elsevier B.V. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, we describe the development of an interactive virtual reality (VR) system that aims to realistically simulate specific paracentesis clinical procedures (particularly the procedure involved in the catheterization of the subclavian vein). A simplified elasto-static finite-element model is used for the physically based simulation of the deformable tissues, particularly skin deflection during needle insertion. The VR-based simulation is being coupled with a haptic feedback device to provide to the user realistic feeling of the interaction forces applied during the simulated paracentesis procedure. The system described is developed in the frames of a research project aiming to develop a larger-scale virtual environment simulator of emergency room (ER) scenarios and protocols for clinical skill training and assessment. textcopyright 2004, CARS and Elsevier B.V. All rights reserved. |
Iasonas Kokkinos, Rachid Deriche, Petros Maragos, Olivier Faugeras A Biologically Motivated and Computationally Tractable Model of Low and Mid-Level Vision Tasks Conference Proc. European Conf. on Computer Vision (ECCV-2004), Prague, May 2004, 2004, ISSN: 03029743. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Kokkinos2004b, title = {A Biologically Motivated and Computationally Tractable Model of Low and Mid-Level Vision Tasks}, author = { Iasonas Kokkinos and Rachid Deriche and Petros Maragos and Olivier Faugeras}, url = {http://link.springer.com/10.1007/978-3-540-24671-8_40 http://robotics.ntua.gr/wp-content/uploads/sites/2/KokkinosDericheMaragosFaugeras_BMCT_ECCV04.pdf}, doi = {10.1007/978-3-540-24671-8_40}, issn = {03029743}, year = {2004}, date = {2004-05-01}, booktitle = {Proc. European Conf. on Computer Vision (ECCV-2004), Prague, May 2004}, pages = {506--517}, abstract = {This paper presents a biologically motivated model for low and mid-level vision tasks and its interpretation in computer vision terms. Initially we briefly present the biologically plausible model of image segmentation developed by Stephen Grossberg and his collaborators during the last two decades, that has served as the backbone of many researchers' work. Subsequently we describe a novel version of this model with a simpler architecture but superior performance to the original system using nonlinear recurrent neural dynamics. This model integrates multi-scale contour, surface and saliency information in an efficient way, and results in smooth surfaces and thin edge maps, without posterior edge thinning or some sophisticated thresholding process. When applied to both synthetic and true images it gives satisfactory results, favorably comparable to those of classical computer vision algorithms. Analogies between the functions performed by this system and commonly used techniques for low- and mid-level computer vision tasks are presented. Further, by interpreting the network as minimizing a cost functional, links with the variational approach to computer vision are established.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper presents a biologically motivated model for low and mid-level vision tasks and its interpretation in computer vision terms. Initially we briefly present the biologically plausible model of image segmentation developed by Stephen Grossberg and his collaborators during the last two decades, that has served as the backbone of many researchers' work. Subsequently we describe a novel version of this model with a simpler architecture but superior performance to the original system using nonlinear recurrent neural dynamics. This model integrates multi-scale contour, surface and saliency information in an efficient way, and results in smooth surfaces and thin edge maps, without posterior edge thinning or some sophisticated thresholding process. When applied to both synthetic and true images it gives satisfactory results, favorably comparable to those of classical computer vision algorithms. Analogies between the functions performed by this system and commonly used techniques for low- and mid-level computer vision tasks are presented. Further, by interpreting the network as minimizing a cost functional, links with the variational approach to computer vision are established. |
C S Tzafestas Y Koumpouros G Bonatsos K. Birbas I.G. Kaklamanos Dexterity enhancement and assessment of training in paracentesis of great vessels by using a virtual reality based interactive system (DEX system) Conference (poster), 90th Annual Clinical Congress, American College of Surgeons, October 10-14, 2004, New Orleans, LA, USA, 2004. @conference{53b, title = {Dexterity enhancement and assessment of training in paracentesis of great vessels by using a virtual reality based interactive system (DEX system)}, author = { C S Tzafestas Y Koumpouros G Bonatsos {K. Birbas I.G. Kaklamanos}}, year = {2004}, date = {2004-01-01}, booktitle = {(poster), 90th Annual Clinical Congress, American College of Surgeons, October 10-14, 2004, New Orleans, LA, USA}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
I Kokkinos, G Evangelopoulos, P Maragos Advances in Texture Analysis: Energy Dominant Components and Multiple Hypothesis Testing Conference Proc. Int.l Conf. on Image Processing, 2004. @conference{238, title = {Advances in Texture Analysis: Energy Dominant Components and Multiple Hypothesis Testing}, author = { I Kokkinos and G Evangelopoulos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/KokkinosEvangelopoulosMaragos_DCA_detection_theory_ICIP04.pdf}, year = {2004}, date = {2004-01-01}, booktitle = {Proc. Int.l Conf. on Image Processing}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Modeling Resonances, With Phase, Modulated Self-similar Processes Modeling resonances with phase modulated self-similar processes Conference Spectrum, (1), 2004, ISSN: 15206149. Abstract | BibTeX | Links: [PDF] @conference{242, title = {Modeling resonances with phase modulated self-similar processes}, author = { Modeling Resonances and With Phase and Modulated Self-similar Processes}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimakisMaragos_ModelResonPhaseModulSelfSimil_ICASSP2004.pdf}, issn = {15206149}, year = {2004}, date = {2004-01-01}, booktitle = {Spectrum}, number = {1}, pages = {877--880}, abstract = {In this paper we propose a nonlinear model for time-varying random resonances where the instantaneous phase (and frequency) of a sinusoidal oscillation is allowed to vary proportionally to a random process that belongs to the class of $alpha$-stable self-similar stochastic processes. This is a general model that includes phase modulations by fractional Brownian motion or fractional stable Levy motion as special cases. We explore theoretically this ran-dom modulation model and derive analytically its autocorrelation and power spectrum. We also propose an algorithm to fit this model to arbitrary resonances with random phase modulation. Fur-ther, we apply the above ideas to some speech data and demon-strate that the model is suitable for fricative sounds.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we propose a nonlinear model for time-varying random resonances where the instantaneous phase (and frequency) of a sinusoidal oscillation is allowed to vary proportionally to a random process that belongs to the class of $alpha$-stable self-similar stochastic processes. This is a general model that includes phase modulations by fractional Brownian motion or fractional stable Levy motion as special cases. We explore theoretically this ran-dom modulation model and derive analytically its autocorrelation and power spectrum. We also propose an algorithm to fit this model to arbitrary resonances with random phase modulation. Fur-ther, we apply the above ideas to some speech data and demon-strate that the model is suitable for fricative sounds. |
C S Tzafestas, M Alifragis, N Palaiologou, S.C.A Thomopoulos, A -E Exarchou, A Kroys, R Kunicke Virtual Robotic Laboratory : Applying web-based teleoperation technologies to distance training in robot manipulator programming Conference Rev 2004: 1st International Conference on Remote Engineering and Virtual Instrumentation, 2004. @conference{Tzafestas2004, title = {Virtual Robotic Laboratory : Applying web-based teleoperation technologies to distance training in robot manipulator programming}, author = { C S Tzafestas and M Alifragis and N Palaiologou and S.C.A Thomopoulos and A -E Exarchou and A Kroys and R Kunicke}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Virtual_Robotic_Laboratory_Applying_web.pdf}, year = {2004}, date = {2004-01-01}, booktitle = {Rev 2004: 1st International Conference on Remote Engineering and Virtual Instrumentation}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
CS Tzafestas, M Alifragis Development and experimental evaluation of a remote laboratory platform for teaching robot manipulator programming Conference Proc. Int. Conf. łdots, (c), 2004. @conference{54b, title = {Development and experimental evaluation of a remote laboratory platform for teaching robot manipulator programming}, author = { CS Tzafestas and M Alifragis}, url = {http://ineer.org/Events/ICEE2004/Proceedings/Abstracts/307_ICEE_2004_abstract_ktzaf_(1).pdf}, year = {2004}, date = {2004-01-01}, booktitle = {Proc. Int. Conf. łdots}, number = {c}, pages = {1--5}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Geoffrey Zweig Advances in Large Vocabulary Continuous Speech Recognition Conference Proc. 21st European Signal Processing Conference (EUSIPCO-2013), Marrakech, Morocco, Sep. 2013, 60 , 2004, ISSN: 00652458. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{173, title = {Advances in Large Vocabulary Continuous Speech Recognition}, author = { Geoffrey Zweig}, url = {http://www.sciencedirect.com/science/article/pii/S0065245803600070 http://robotics.ntua.gr/wp-content/uploads/sites/2/Zweig_2004.pdf}, doi = {10.1016/S0065-2458(03)60007-0}, issn = {00652458}, year = {2004}, date = {2004-01-01}, booktitle = {Proc. 21st European Signal Processing Conference (EUSIPCO-2013), Marrakech, Morocco, Sep. 2013}, volume = {60}, pages = {249--291}, abstract = {The development of robust, accurate and efficient speech recognition systems is critical to the widespread adoption of a large number of commercial applications. These include automated customer service, broadcast news transcription and indexing, voice-activated automobile accessories, large-vocabulary voice-activated cell-phone dialing, and automated directory assistance. This article provides a review of the current state-of-the-art, and the recent research performed in pursuit of these goals.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The development of robust, accurate and efficient speech recognition systems is critical to the widespread adoption of a large number of commercial applications. These include automated customer service, broadcast news transcription and indexing, voice-activated automobile accessories, large-vocabulary voice-activated cell-phone dialing, and automated directory assistance. This article provides a review of the current state-of-the-art, and the recent research performed in pursuit of these goals. |
2003 |
Costas S Tzafestas Whole-Hand Kinesthetic Feedback and Haptic Perception in Dextrous Virtual Manipulation Journal Article IEEE Transactions on Systems, Man, and Cybernetics Part A:Systems and Humans., 33 (1), pp. 100–113, 2003, ISSN: 10834427. Abstract | BibTeX | Links: [PDF] @article{0_49, title = {Whole-Hand Kinesthetic Feedback and Haptic Perception in Dextrous Virtual Manipulation}, author = {Costas S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Tzafestas_TSMC-A-2003_01206459.pdf}, doi = {10.1109/TSMCA.2003.812600}, issn = {10834427}, year = {2003}, date = {2003-01-01}, journal = {IEEE Transactions on Systems, Man, and Cybernetics Part A:Systems and Humans.}, volume = {33}, number = {1}, pages = {100--113}, abstract = {One of the key requirements for a Virtual Reality system is the multimodal, real-time interaction between the human operator and a computer simulated and animated environment. This paper investigates problems related particularly to the haptic interaction between the human operator and a virtual environment. The work presented here focuses on two issues: 1) the synthesis of whole-hand kinesthetic feedback, based on the application of forces (torques) on individual phalanges (joints) of the human hand, and 2) the experimental evaluation of this haptic feedback system, in terms of human haptic perception of virtual physical properties (such as the weight of a virtual manipulated object), using psychophysical methods. The proposed kinesthetic feedback methodology is based on the solution of a generalized force distribution problem for the human hand during virtual manipulation tasks. The solution is computationally efficient and has been experimentally implemented using an exoskeleton force-feedback glove. A series of experiments is reported concerning the perception of weight of manipulated virtual objects and the obtained results demonstrate the feasibility of the concept. Issues related to the use of sensory substitution techniques for the application of haptic feedback on the human hand are also discussed.}, keywords = {}, pubstate = {published}, tppubtype = {article} } One of the key requirements for a Virtual Reality system is the multimodal, real-time interaction between the human operator and a computer simulated and animated environment. This paper investigates problems related particularly to the haptic interaction between the human operator and a virtual environment. The work presented here focuses on two issues: 1) the synthesis of whole-hand kinesthetic feedback, based on the application of forces (torques) on individual phalanges (joints) of the human hand, and 2) the experimental evaluation of this haptic feedback system, in terms of human haptic perception of virtual physical properties (such as the weight of a virtual manipulated object), using psychophysical methods. The proposed kinesthetic feedback methodology is based on the solution of a generalized force distribution problem for the human hand during virtual manipulation tasks. The solution is computationally efficient and has been experimentally implemented using an exoskeleton force-feedback glove. A series of experiments is reported concerning the perception of weight of manipulated virtual objects and the obtained results demonstrate the feasibility of the concept. Issues related to the use of sensory substitution techniques for the application of haptic feedback on the human hand are also discussed. |
Petros Maragos Algebraic and PDE approaches for lattice scale-spaces with global constraints Journal Article International Journal of Computer Vision, 52 (2-3), pp. 121–137, 2003, ISSN: 09205691. @article{82c, title = {Algebraic and PDE approaches for lattice scale-spaces with global constraints}, author = {Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Maragos_AlgPDELattScaleSpaceGlobal_IJCV2003.pdf}, doi = {10.1023/A:1022999923439}, issn = {09205691}, year = {2003}, date = {2003-01-01}, journal = {International Journal of Computer Vision}, volume = {52}, number = {2-3}, pages = {121--137}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Dimitrios Dimitriadis, Petros Maragos Robust Energy Demodulation Based on Continuous Models with Application to Speech Recognition Conference Speech Communication, (September), 2003. @conference{245, title = {Robust Energy Demodulation Based on Continuous Models with Application to Speech Recognition}, author = { Dimitrios Dimitriadis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMaragos_ContinDemodSpeecRecgn_Eurospeech2003.pdf}, year = {2003}, date = {2003-01-01}, booktitle = {Speech Communication}, number = {September}, pages = {3--6}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Maragos, V. Tzouvaras, G. Stamou Lattice fuzzy signal operators and generalized image gradients Conference Lecture Notes in Artificial Intelligence (Subseries of Lecture Notes in Computer Science), 2715 , 2003, ISSN: 03029743. Abstract | BibTeX | Links: [PDF] @conference{247, title = {Lattice fuzzy signal operators and generalized image gradients}, author = { P. Maragos and V. Tzouvaras and G. Stamou}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosTzouvarasStamou_LatFuzSigOperImagGrad_IFSA2003.pdf}, issn = {03029743}, year = {2003}, date = {2003-01-01}, booktitle = {Lecture Notes in Artificial Intelligence (Subseries of Lecture Notes in Computer Science)}, volume = {2715}, abstract = {textcopyright 2003 Springer-Verlag Berlin Heidelberg. In this paper we use concepts from the lattice-based theory of morphological operators and fuzzy sets to develop generalized lattice image operators that are nonlinear convolutions that can be expressed as supremum (resp. infimum) of fuzzy intersection (resp. union) norms. Our emphasis and differences with many previous works is the construction of pairs of fuzzy dilation (sup of fuzzy intersection) and erosion (inf of fuzzy implication) operators that form lattice adjunctions. This guarantees that their composition will be a valid algebraic opening or closing. We have experimented with applying these fuzzy operators to various nonlinear filtering and image analysis tasks, attempting to understand the effect that the type of fuzzy norm and the shape-size of structuring function have on the resulting new image operators. We also present some theoretical and experimental results on using the lattice fuzzy operators, in combination with morphological systems or by themselves, to develop some new edge detection gradients which show improved performance in noise.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } textcopyright 2003 Springer-Verlag Berlin Heidelberg. In this paper we use concepts from the lattice-based theory of morphological operators and fuzzy sets to develop generalized lattice image operators that are nonlinear convolutions that can be expressed as supremum (resp. infimum) of fuzzy intersection (resp. union) norms. Our emphasis and differences with many previous works is the construction of pairs of fuzzy dilation (sup of fuzzy intersection) and erosion (inf of fuzzy implication) operators that form lattice adjunctions. This guarantees that their composition will be a valid algebraic opening or closing. We have experimented with applying these fuzzy operators to various nonlinear filtering and image analysis tasks, attempting to understand the effect that the type of fuzzy norm and the shape-size of structuring function have on the resulting new image operators. We also present some theoretical and experimental results on using the lattice fuzzy operators, in combination with morphological systems or by themselves, to develop some new edge detection gradients which show improved performance in noise. |
V Pitsikalis, I Kokkinos, P Maragos Nonlinear Analysis of Speech Signals: Generalized Dimensions and Lyapunov Exponents Conference Proc. European Conf. on Speech Communication and Technology, 2003. @conference{246, title = {Nonlinear Analysis of Speech Signals: Generalized Dimensions and Lyapunov Exponents}, author = { V Pitsikalis and I Kokkinos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PitsikalisKokinosMaragos_NonlinSpeecGDimLE_Eurospeech2003.pdf}, year = {2003}, date = {2003-01-01}, booktitle = {Proc. European Conf. on Speech Communication and Technology}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Vassilis Pitsikalis, Petros Maragos Some Advances on Speech Analysis using Generalized Dimensions Conference Dimension Contemporary German Arts And Letters, (Nolisp 03), 2003. @conference{248, title = {Some Advances on Speech Analysis using Generalized Dimensions}, author = { Vassilis Pitsikalis and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PitsikalisMaragos_AdvancesSpeechAnalysisGeneralizedDimensions_ISCANoLisp2003.pdf}, year = {2003}, date = {2003-01-01}, booktitle = {Dimension Contemporary German Arts And Letters}, number = {Nolisp 03}, pages = {3--6}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Anastasia Sofou, Petros Maragos PDE-BASED MODELING OF IMAGE SEGMENTATION USING VOLUMIC nOODING Conference Proc. Int'l Conf. on Image Processing (ICIP-2003), Barcelona, Spain, Sep. 2003, 2003, ISBN: 0780377508. @conference{244, title = {PDE-BASED MODELING OF IMAGE SEGMENTATION USING VOLUMIC nOODING}, author = { Anastasia Sofou and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SofouMaragos_VolumeFloodImSegmPDE_ICIP2003.pdf}, isbn = {0780377508}, year = {2003}, date = {2003-01-01}, booktitle = {Proc. Int'l Conf. on Image Processing (ICIP-2003), Barcelona, Spain, Sep. 2003}, pages = {431--434}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos Morphological Global Reconstruction and Levelings: Lattice and PDE Approaches Book Chapter Geometric Level Set Methods in Imaging, Vision, and Graphics, pp. 141–155, Springer New York, New York, NY, 2003, ISBN: 978-0-387-21810-6. Abstract | BibTeX | Links: [Webpage] [PDF] @inbook{Maragos2003b, title = {Morphological Global Reconstruction and Levelings: Lattice and PDE Approaches}, author = {Petros Maragos}, url = {https://doi.org/10.1007/0-387-21810-6_8 http://robotics.ntua.gr/wp-content/uploads/sites/2/MorphologicalGlobalRec_maragos_2003.pdf}, doi = {10.1007/0-387-21810-6_8}, isbn = {978-0-387-21810-6}, year = {2003}, date = {2003-01-01}, booktitle = {Geometric Level Set Methods in Imaging, Vision, and Graphics}, pages = {141--155}, publisher = {Springer New York}, address = {New York, NY}, abstract = {This chapter begins with analyzing the theoretical connections between levelings on lattices and scale-space erosions on reference semilattices. They both represent large classes of self-dual morphological reconstruction operators that exhibit both local computation and global constraints. Such operators are useful in numerous image analysis and vision tasks including edge-preserving multiscale smoothing, image simplification, feature and object detection, segmentation, shape, texture and motion analysis. Previous definitions and constructions of levelings were either discrete or continuous using a PDE. We bridge this gap by introducing generalized levelings based on triphase operators that switch among three phases, one of which is a global constraint. The triphase operators include as special cases useful classes of semilattice erosions. Algebraically, levelings are created as limits of iterated or multiscale triphase operators. The subclass of multiscale geodesic triphase operators obeys a semigroup, which we exploit to find PDEs that can generate geodesic levelings and continuous-scale semilattice erosions. We discuss theoretical aspects of these PDEs, propose discrete algorithms for their numerical solution which converge as iterations of triphase operators, and provide insights via image experiments.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } This chapter begins with analyzing the theoretical connections between levelings on lattices and scale-space erosions on reference semilattices. They both represent large classes of self-dual morphological reconstruction operators that exhibit both local computation and global constraints. Such operators are useful in numerous image analysis and vision tasks including edge-preserving multiscale smoothing, image simplification, feature and object detection, segmentation, shape, texture and motion analysis. Previous definitions and constructions of levelings were either discrete or continuous using a PDE. We bridge this gap by introducing generalized levelings based on triphase operators that switch among three phases, one of which is a global constraint. The triphase operators include as special cases useful classes of semilattice erosions. Algebraically, levelings are created as limits of iterated or multiscale triphase operators. The subclass of multiscale geodesic triphase operators obeys a semigroup, which we exploit to find PDEs that can generate geodesic levelings and continuous-scale semilattice erosions. We discuss theoretical aspects of these PDEs, propose discrete algorithms for their numerical solution which converge as iterations of triphase operators, and provide insights via image experiments. |
2002 |
D Dimitriadis, P Maragos, V Pitsikalis, A Potamianos Modulation and Chaotic Acoustic Features for Speech Recognition Journal Article Control and Intelligent Systems, 30 (1), pp. 19–26, 2002, ISSN: 14801752. @article{115, title = {Modulation and Chaotic Acoustic Features for Speech Recognition}, author = {D Dimitriadis and P Maragos and V Pitsikalis and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Modulation_and_chaotic_acoustic_features.pdf}, issn = {14801752}, year = {2002}, date = {2002-01-01}, journal = {Control and Intelligent Systems}, volume = {30}, number = {1}, pages = {19--26}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Faundez-Zanuy, Marcos; McLaughlin, Stephen; Esposito, Anna; Hussain, Amir; Schoentgen, Jean; Kubin, Gernot; Kleijn, W Bastiaan; Maragos, Petros Nonlinear Speech Processing: Overview and Applications Journal Article (January), 2002. @article{116, title = {Nonlinear Speech Processing: Overview and Applications}, author = {Faundez-Zanuy, Marcos; McLaughlin, Stephen; Esposito, Anna; Hussain, Amir; Schoentgen, Jean; Kubin, Gernot; Kleijn, W Bastiaan; Maragos, Petros}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Nonlinear_speech_processing_overview_and.pdf}, year = {2002}, date = {2002-01-01}, number = {January}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Costas S Tzafestas, Petros Maragos Shape connectivity: Multiscale analysis and application to generalized granulometries Journal Article Journal of Mathematical Imaging and Vision, 17 (2), pp. 109–129, 2002, ISSN: 09249907. Abstract | BibTeX | Links: [PDF] @article{117, title = {Shape connectivity: Multiscale analysis and application to generalized granulometries}, author = {Costas S Tzafestas and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/TzafestasMaragos_ShapeConnectMscale_JMIV2002.pdf}, doi = {10.1023/A:1020629402912}, issn = {09249907}, year = {2002}, date = {2002-01-01}, journal = {Journal of Mathematical Imaging and Vision}, volume = {17}, number = {2}, pages = {109--129}, abstract = {This paper develops a multiscale connectivity theory for shapes based on the axiomatic definition of new generalized connectivity measures, which are obtained using morphology-based nonlinear scale-space operators. The concept of connectivity-tree for hierarchical image representation is introduced and used to define generalized connected morphological operators. This theoretical framework is then applied to establish a class of generalized granulometries, implemented at a particular problem concerning soilsection image analysis and evaluation of morphological properties such as size distributions. Comparative results demonstrate the power and versatility of the proposed methodology with respect to the application of typical connected operators (such as reconstruction openings). This multiscale connectivity analysis framework aims at a more reliable evaluation of shape/size information within complex images, with particular applications to generalized granulometries, connected operators, and segmentation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper develops a multiscale connectivity theory for shapes based on the axiomatic definition of new generalized connectivity measures, which are obtained using morphology-based nonlinear scale-space operators. The concept of connectivity-tree for hierarchical image representation is introduced and used to define generalized connected morphological operators. This theoretical framework is then applied to establish a class of generalized granulometries, implemented at a particular problem concerning soilsection image analysis and evaluation of morphological properties such as size distributions. Comparative results demonstrate the power and versatility of the proposed methodology with respect to the application of typical connected operators (such as reconstruction openings). This multiscale connectivity analysis framework aims at a more reliable evaluation of shape/size information within complex images, with particular applications to generalized granulometries, connected operators, and segmentation. |
D Dimitriadis, P Maragos, A Potamianos Modulation features for speech recognition Journal Article International Conference on Acoustics, 1 , pp. I–377–I–380, 2002. @article{76c, title = {Modulation features for speech recognition}, author = {D Dimitriadis and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/DimitriadisMaragosPotamianos_RobustAMFM_Features_SpeechRecognition_ieeeSPL2005.pdf}, year = {2002}, date = {2002-01-01}, journal = {International Conference on Acoustics}, volume = {1}, pages = {I--377--I--380}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
D Dimitriadis, P Maragos, A Potamianos Modulation features for speech recognition Conference International Conference on Acoustics, 1 , 2002. @conference{Dimitriadis2002, title = {Modulation features for speech recognition}, author = { D Dimitriadis and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/dimitriadis2002.pdf}, year = {2002}, date = {2002-05-01}, booktitle = {International Conference on Acoustics}, volume = {1}, pages = {I--377--I--380}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
V Pitsikalis, P Maragos Speech analysis and feature extraction using chaotic models Conference International Conference on Acoustics, 1 , 2002. @conference{Pitsikalis2002, title = {Speech analysis and feature extraction using chaotic models}, author = { V Pitsikalis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/pitsikalis2002.pdf}, doi = {10.1109/ICASSP.2002.5743772}, year = {2002}, date = {2002-05-01}, booktitle = {International Conference on Acoustics}, volume = {1}, pages = {I--533--I--536 vol.1}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos Lattice and PDE Approaches for Multiscale Image Analysis and Segementation Problems in Computer Vision Conference Invited Keynote Paper, Proc. of XI Int'l Conf. on Computing, Mexico City, Nov. 2002, 2002. @conference{249, title = {Lattice and PDE Approaches for Multiscale Image Analysis and Segementation Problems in Computer Vision}, author = { P Maragos}, year = {2002}, date = {2002-01-01}, booktitle = {Invited Keynote Paper, Proc. of XI Int'l Conf. on Computing, Mexico City, Nov. 2002}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos, Alexander G. Dimakis, Iasonas Kokkinos Some advances in nonlinear speech modeling using modulations, fractals, and chaos Conference International Conference on Digital Signal Processing, DSP, 1 , 2002, ISBN: 0780375033. Abstract | BibTeX | Links: [PDF] @conference{250, title = {Some advances in nonlinear speech modeling using modulations, fractals, and chaos}, author = { Petros Maragos and Alexander G. Dimakis and Iasonas Kokkinos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosDimakisKokinos_AdvancesNonlinSpeechModeling_DSP2002.pdf}, doi = {10.1109/ICDSP.2002.1027897}, isbn = {0780375033}, year = {2002}, date = {2002-01-01}, booktitle = {International Conference on Digital Signal Processing, DSP}, volume = {1}, pages = {325--332}, abstract = {In this paper we briefly summarize our on-going work on modeling nonlinear structures in speech signals, caused by modulation and turbulence phenomena, using the theo-ries of modulation, fractals, and chaos as well as suitable nonlinear signal analysis methods. Further, we focus on two advances: i) AM-FM modeling of fricative sounds with random modulation signals of the 1/f-noise type and ii) improved methods for speech analysis and prediction on reconstructed multidimensional attractors.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we briefly summarize our on-going work on modeling nonlinear structures in speech signals, caused by modulation and turbulence phenomena, using the theo-ries of modulation, fractals, and chaos as well as suitable nonlinear signal analysis methods. Further, we focus on two advances: i) AM-FM modeling of fricative sounds with random modulation signals of the 1/f-noise type and ii) improved methods for speech analysis and prediction on reconstructed multidimensional attractors. |
Petros Maragos, Thanasis Loupas, Vassilis Pitsikalis On improving Doppler ultrasound spectroscopy with multiband instantaneous energy separation Conference International Conference on Digital Signal Processing, DSP, 2 , 2002, ISBN: 0780375033. @conference{251, title = {On improving Doppler ultrasound spectroscopy with multiband instantaneous energy separation}, author = { Petros Maragos and Thanasis Loupas and Vassilis Pitsikalis}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosLoupasPitsikalis_DopplerSpectroscMbandEnergySepar_DSP2002.pdf}, doi = {10.1109/ICDSP.2002.1028164}, isbn = {0780375033}, year = {2002}, date = {2002-01-01}, booktitle = {International Conference on Digital Signal Processing, DSP}, volume = {2}, pages = {611--614}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2001 |
C S Tzafestas, S G Tzafestas Full-State Modeling, Motion Planning and Control of Mobile Manipulators Journal Article 2001. @article{5_41, title = {Full-State Modeling, Motion Planning and Control of Mobile Manipulators}, author = {C S Tzafestas and S G Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Full_state_modeling_motion_planning_and.pdf}, year = {2001}, date = {2001-01-01}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
S.G. Tzafestas P.A. Prokopiou, C S Tzafestas A New Partitioned Robot Neurocontroller: General Analysis and Application to Teleoperator Modeling Uncertainties Compensation Journal Article 2001. @article{6_49, title = {A New Partitioned Robot Neurocontroller: General Analysis and Application to Teleoperator Modeling Uncertainties Compensation}, author = {S.G. Tzafestas P.A. Prokopiou and C S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/A_new_partitioned_robot_neurocontroller.pdf}, year = {2001}, date = {2001-01-01}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Alexandros Potamianos, Petros Maragos Time-frequency distributions for automatic speech recognition Journal Article Transactions on Speech and Audio Processing, IEEE, 9 (3), pp. 196–200, 2001. @article{114, title = {Time-frequency distributions for automatic speech recognition}, author = {Alexandros Potamianos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PotamianosMaragos_TFD-ASR_ieeetSAP2001.pdf}, year = {2001}, date = {2001-01-01}, journal = {Transactions on Speech and Audio Processing, IEEE}, volume = {9}, number = {3}, pages = {196--200}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Costas S. Tzafestas Teleplanning by human demonstration for VR-based teleoperation of a mobile robotic assistant Conference Proceedings - IEEE International Workshop on Robot and Human Interactive Communication, 2001, ISBN: 0780372220. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas2001, title = {Teleplanning by human demonstration for VR-based teleoperation of a mobile robotic assistant}, author = { Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/teleplanning-by-human-demonstration-for-vrbased-teleoperation-of.pdf}, doi = {10.1109/ROMAN.2001.981947}, isbn = {0780372220}, year = {2001}, date = {2001-09-01}, booktitle = {Proceedings - IEEE International Workshop on Robot and Human Interactive Communication}, pages = {462--467}, abstract = {Focuses on the integration of local path planning techniques in a multimodal teleoperation interface, for the efficient remote control of a mobile robotic assistant. The main principle underlying this scheme is related to finding new ways to establish an efficient human-robot cooperation framework, where humans and robots take charge of the parts of the tasks that they can perform more efficiently. For the teleoperation of a mobile robotic platform, a simple application of this general principle could be to commit the human operator in performing the necessary global planning operations, which are more demanding in terms of complex reasoning and required "intelligence", while other more local tasks such as collision avoidance and trajectory optimization are dedicated to the telerobotic system. We propose an implementation of this principle within a mobile robot teleoperation interface integrating virtual reality techniques and Web standards. The paper describes the multimodal interface and the design principles followed, as well as the integration of a local path planning method. This scheme, called "computer-assisted teleplanning by human demonstration", aims at providing active assistance to the human operator, enabling him to indicate in a natural way the desired global motion plan, for a more efficient teleoperation of a mobile robotic assistant}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Focuses on the integration of local path planning techniques in a multimodal teleoperation interface, for the efficient remote control of a mobile robotic assistant. The main principle underlying this scheme is related to finding new ways to establish an efficient human-robot cooperation framework, where humans and robots take charge of the parts of the tasks that they can perform more efficiently. For the teleoperation of a mobile robotic platform, a simple application of this general principle could be to commit the human operator in performing the necessary global planning operations, which are more demanding in terms of complex reasoning and required "intelligence", while other more local tasks such as collision avoidance and trajectory optimization are dedicated to the telerobotic system. We propose an implementation of this principle within a mobile robot teleoperation interface integrating virtual reality techniques and Web standards. The paper describes the multimodal interface and the design principles followed, as well as the integration of a local path planning method. This scheme, called "computer-assisted teleplanning by human demonstration", aims at providing active assistance to the human operator, enabling him to indicate in a natural way the desired global motion plan, for a more efficient teleoperation of a mobile robotic assistant |
Petros Maragos Algebraic and PDE Approaches for Multiscale Image Operators with Global Constraints : Reference Semilattice Erosions and Levelings Conference Proc. Int'l Conf. on Scale-Space and Morphology Theories in Computer Vision (SCALE-SPACE'01), Vancouver, Canada, July 2001; Lecture Notes in Computer Science 2106, pp.137-148, Springer-Verlag, 2001, ISSN: 16113349. @conference{Maragos2001, title = {Algebraic and PDE Approaches for Multiscale Image Operators with Global Constraints : Reference Semilattice Erosions and Levelings}, author = { Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_AlgebraicPDEApproaches_2001.pdf}, issn = {16113349}, year = {2001}, date = {2001-07-01}, booktitle = {Proc. Int'l Conf. on Scale-Space and Morphology Theories in Computer Vision (SCALE-SPACE'01), Vancouver, Canada, July 2001; Lecture Notes in Computer Science 2106, pp.137-148, Springer-Verlag}, pages = {137--148}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D. Dimitriadis, P. Maragos An improved energy demodulation algorithm using splines Conference 2001 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (Cat. No.01CH37221), 6 , 2001, ISSN: 1520-6149. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Dimitriadis2001, title = {An improved energy demodulation algorithm using splines}, author = { D. Dimitriadis and P. Maragos}, url = {http://ieeexplore.ieee.org/document/940591/ http://robotics.ntua.gr/wp-content/uploads/sites/2/DimitriadisMaragos_SplineESA_ICASSP2001.pdf}, doi = {10.1109/ICASSP.2001.940591}, issn = {1520-6149}, year = {2001}, date = {2001-05-01}, booktitle = {2001 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (Cat. No.01CH37221)}, volume = {6}, pages = {3481--3484}, abstract = {A new algorithm is proposed for demodulating discrete-time AM-FM signals, which first interpolates the signals with smooth splines and then uses the continuous-time energy separation algorithm (ESA) based on the Teager-Kaiser energy operator. This spline-based ESA retains the excellent time resolution of the ESA based on discrete energy operators but performs better in the presence of noise. Further, its dependence on smooth splines allows some optimal trade-off between data fitting versus smoothing}, keywords = {}, pubstate = {published}, tppubtype = {conference} } A new algorithm is proposed for demodulating discrete-time AM-FM signals, which first interpolates the signals with smooth splines and then uses the continuous-time energy separation algorithm (ESA) based on the Teager-Kaiser energy operator. This spline-based ESA retains the excellent time resolution of the ESA based on discrete energy operators but performs better in the presence of noise. Further, its dependence on smooth splines allows some optimal trade-off between data fitting versus smoothing |
A Doulamis, N Doulamis, P Maragos Generalized multiscale connected operators with applications to granulometric image analysis Conference Image Processing, 2001. Proceedings. 2001 International Conference on, 3 , 2001. Abstract | BibTeX | Links: [PDF] @conference{255, title = {Generalized multiscale connected operators with applications to granulometric image analysis}, author = { A Doulamis and N Doulamis and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Doulamis2Maragos_GeneralPatternSpectrum_ICIP2001.pdf}, year = {2001}, date = {2001-01-01}, booktitle = {Image Processing, 2001. Proceedings. 2001 International Conference on}, volume = {3}, pages = {684--687vol.3}, abstract = {In this paper, generalized granulometric size distributions and size histograms (a.k.a 'pattern spectra') are developed using generalized multiscale lattice operators of the opening and closing type. The generalized size histograms are applied to granulometric analysis of soilsection images. An interesting structure is obtained when the histogram is based on area openings. Furthermore, a fast im-plementation of the generalized size histograms is presented using threshold analysis-synthesis. Comparisons with size distributions based on conventional morphological operators indicate that the generalized histograms provide a more direct and informative de-scription of the image content in objects with scale-dependent geo-metric attributes. Applications are also developed for studying the structure of soilsection images.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, generalized granulometric size distributions and size histograms (a.k.a 'pattern spectra') are developed using generalized multiscale lattice operators of the opening and closing type. The generalized size histograms are applied to granulometric analysis of soilsection images. An interesting structure is obtained when the histogram is based on area openings. Furthermore, a fast im-plementation of the generalized size histograms is presented using threshold analysis-synthesis. Comparisons with size distributions based on conventional morphological operators indicate that the generalized histograms provide a more direct and informative de-scription of the image content in objects with scale-dependent geo-metric attributes. Applications are also developed for studying the structure of soilsection images. |
P. Maragos, V. Tzouvaras, G. Stamou SYNTHESIS AND APPLICATIONS OF LATTICE IMAGE OPERATORS BASED ON FUZZY NORMS Conference Proc. Int’l Conf. on Image Processing (ICIP-2001), 2001, ISBN: 0780367251. Abstract | BibTeX | Links: [PDF] @conference{256, title = {SYNTHESIS AND APPLICATIONS OF LATTICE IMAGE OPERATORS BASED ON FUZZY NORMS}, author = {P. Maragos and V. Tzouvaras and G. Stamou}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosTzouvarasStamou_LatImOpFuzzNorm_ICIP2001.pdf}, isbn = {0780367251}, year = {2001}, date = {2001-01-01}, booktitle = {Proc. Int’l Conf. on Image Processing (ICIP-2001)}, pages = {521--524}, abstract = {In this paper we use concepts from the lattice-based theory of morphological operators and fuzzy sets to develop generalized lattice image operators that can be expressed as nonlinear convolutions that are suprema or infima of fuzzy intersection or union norms. Our emphasis (and differences with previous works) is the construction of pairs of fuzzy dilation and erosion operators that form lattice adjunctions. This guarantees that their composition will be a valid algebraic opening or closing. The power but also the difficulty in applying these fuzzy operators to image analysis is the large variety of fuzzy norms and the absence of systematic ways in selecting them. Towards this goal, we have performed extensive experiments in applying these fuzzy operators to various nonlinear filtering and image analysis tasks, attempting first to understand the effect that the type of fuzzy norm and the shape-size of structuring function have on the resulting new image operators. Further, we have developed some new fuzzy edge gradients and optimized their usage for edge detection on test problems via a parametric fuzzy norm.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper we use concepts from the lattice-based theory of morphological operators and fuzzy sets to develop generalized lattice image operators that can be expressed as nonlinear convolutions that are suprema or infima of fuzzy intersection or union norms. Our emphasis (and differences with previous works) is the construction of pairs of fuzzy dilation and erosion operators that form lattice adjunctions. This guarantees that their composition will be a valid algebraic opening or closing. The power but also the difficulty in applying these fuzzy operators to image analysis is the large variety of fuzzy norms and the absence of systematic ways in selecting them. Towards this goal, we have performed extensive experiments in applying these fuzzy operators to various nonlinear filtering and image analysis tasks, attempting first to understand the effect that the type of fuzzy norm and the shape-size of structuring function have on the resulting new image operators. Further, we have developed some new fuzzy edge gradients and optimized their usage for edge detection on test problems via a parametric fuzzy norm. |
A. Sofou, C. Tzafestas, P. Maragos SEGMENTATION OF SOILSECTION IMAGES USING CONNECTED OPERATORS Conference Proc. Int'l Conf. on Image Processing (ICIP-2001), Thessaloniki, Greece, Oct. 2001, 2001, ISBN: 0780367251. @conference{254, title = {SEGMENTATION OF SOILSECTION IMAGES USING CONNECTED OPERATORS}, author = {A. Sofou and C. Tzafestas and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SofouTzafestasMaragos_SoilSegmentConnectivity_ICIP2001.pdf}, isbn = {0780367251}, year = {2001}, date = {2001-01-01}, booktitle = {Proc. Int'l Conf. on Image Processing (ICIP-2001), Thessaloniki, Greece, Oct. 2001}, pages = {1087--1090}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C S Tzafestas Multimodal Teleoperation Interface integrating VR Models for a Mobile Robotic Assistant Conference Proc. 10th International Workshop on Robotics in Alpe-Adria-Danube Region (RAAD'2001), Vienna, Austria, May 16-18, 2001, 2001. @conference{60b, title = {Multimodal Teleoperation Interface integrating VR Models for a Mobile Robotic Assistant}, author = { C S Tzafestas}, year = {2001}, date = {2001-01-01}, booktitle = {Proc. 10th International Workshop on Robotics in Alpe-Adria-Danube Region (RAAD'2001), Vienna, Austria, May 16-18, 2001}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C S Tzafestas, S G Tzafestas Mobile Manipulators: Full-State Modeling, Motion Planning and Control Conference 17th Intl. Conference on CAD/CAM, Robotics and the Factories of the Future (CARS & FOF '2001), Durban, South Africa, July 2001, 2001. @conference{59b, title = {Mobile Manipulators: Full-State Modeling, Motion Planning and Control}, author = { C S Tzafestas and S G Tzafestas}, year = {2001}, date = {2001-01-01}, booktitle = {17th Intl. Conference on CAD/CAM, Robotics and the Factories of the Future (CARS & FOF '2001), Durban, South Africa, July 2001}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos 10 - Differential Morphology Book Chapter Mitra, Sanjit K; Sicuranza, Giovanni L (Ed.): Nonlinear Image Processing, pp. 289 - 329, Academic Press, San Diego, 2001, ISBN: 978-0-12-500451-0. Abstract | BibTeX | Links: [Webpage] [PDF] @inbook{Maragos2001b, title = {10 - Differential Morphology}, author = {Petros Maragos}, editor = {Sanjit K Mitra and Giovanni L Sicuranza}, url = {http://www.sciencedirect.com/science/article/pii/B9780125004510500102 http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_DifferentialMorphology_NonlinImageProcessch10_2001.pdf}, doi = {https://doi.org/10.1016/B978-012500451-0/50010-2}, isbn = {978-0-12-500451-0}, year = {2001}, date = {2001-01-01}, booktitle = {Nonlinear Image Processing}, pages = {289 - 329}, publisher = {Academic Press}, address = {San Diego}, series = {Communications, Networking and Multimedia}, abstract = {Publisher Summary This chapter provides an overview of the analytic tools for morphological image processing that is based on ideas from differential calculus and dynamic systems, including the use of partial differential equations or difference equations to model nonlinear multi-scale analysis or distance propagation in images. The chapter discusses 2D nonlinear difference equations of the max-sum or min-sum type that model the space dynamics of 2D morphological systems, and some nonlinear signal transforms, called slope transforms, that can analyze these systems in a transform domain in ways conceptually similar to the application of Fourier transforms to linear systems. The chapter uses these nonlinear difference equations to model discrete distance transforms and relates them to numerical solutions of the eikonal partial differential equation (PDE) of optics. This chapter also reviews some nonlinear PDEs that model the evolution of multiscale morphological operators and use morphological derivatives. It then analyzes the multi-scale morphological PDEs and eikonal PDE solved via weighted distance transforms as a unified area in nonlinear image processing, called differential morphology and discusses some of its potential applications to image processing.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } Publisher Summary This chapter provides an overview of the analytic tools for morphological image processing that is based on ideas from differential calculus and dynamic systems, including the use of partial differential equations or difference equations to model nonlinear multi-scale analysis or distance propagation in images. The chapter discusses 2D nonlinear difference equations of the max-sum or min-sum type that model the space dynamics of 2D morphological systems, and some nonlinear signal transforms, called slope transforms, that can analyze these systems in a transform domain in ways conceptually similar to the application of Fourier transforms to linear systems. The chapter uses these nonlinear difference equations to model discrete distance transforms and relates them to numerical solutions of the eikonal partial differential equation (PDE) of optics. This chapter also reviews some nonlinear PDEs that model the evolution of multiscale morphological operators and use morphological derivatives. It then analyzes the multi-scale morphological PDEs and eikonal PDE solved via weighted distance transforms as a unified area in nonlinear image processing, called differential morphology and discusses some of its potential applications to image processing. |
2000 |
C S Tzafestas, P A Prokopiou, S G Tzafestas A differential motion planning algorithm for controlling multi-robot systems handling a common object Journal Article Control and Cybernetics, 29 (2), pp. 566–584, 2000, ISSN: 03248569. Abstract | BibTeX | Links: [PDF] @article{7_51, title = {A differential motion planning algorithm for controlling multi-robot systems handling a common object}, author = {C S Tzafestas and P A Prokopiou and S G Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/A_differential_motion_planning_algo.pdf}, issn = {03248569}, year = {2000}, date = {2000-01-01}, journal = {Control and Cybernetics}, volume = {29}, number = {2}, pages = {566--584}, abstract = {Multi-robot systems have substantially increased capabilities over single-robot systems and can handle very large or peculiar objects. This paper presents a differential (incremental) motion planning algorithm for an m-robot system (m ≥ 2) to cooperatively transfer an object from an initial to a desired final position / orientation by rigidly holding it at given respective points Q 1 , Q 2 , . . . , Q m . One of the robots plays the role of a "master" while other robots operate in the "slave" mode maintaining invariant their relative positions and orientations during the system motion. The method employs the differential displacements of the end-effector of each robot arm. Then, the differential displacements of the joints of the m robots are computed for the application of incremental motion control. The algorithm was tested on many examples. A representative of them is shown here, concerning the case of three STAUBLI RX-90L robots similar to 6-dof PUMA robots. The results obtained show the practicality and effectiveness of the method, which, however, needs particular care for completely eliminating the cumulative errors that may occur.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Multi-robot systems have substantially increased capabilities over single-robot systems and can handle very large or peculiar objects. This paper presents a differential (incremental) motion planning algorithm for an m-robot system (m ≥ 2) to cooperatively transfer an object from an initial to a desired final position / orientation by rigidly holding it at given respective points Q 1 , Q 2 , . . . , Q m . One of the robots plays the role of a "master" while other robots operate in the "slave" mode maintaining invariant their relative positions and orientations during the system motion. The method employs the differential displacements of the end-effector of each robot arm. Then, the differential displacements of the joints of the m robots are computed for the application of incremental motion control. The algorithm was tested on many examples. A representative of them is shown here, concerning the case of three STAUBLI RX-90L robots similar to 6-dof PUMA robots. The results obtained show the practicality and effectiveness of the method, which, however, needs particular care for completely eliminating the cumulative errors that may occur. |
C S Tzafestas, Ph Coiffet Dexterous haptic interaction with virtual environments: hand-distributed kinesthetic feedback and haptic perception Journal Article 38 (2), pp. 433–476, 2000, ISSN: 02329298. @article{8_47, title = {Dexterous haptic interaction with virtual environments: hand-distributed kinesthetic feedback and haptic perception}, author = {C S Tzafestas and Ph Coiffet}, issn = {02329298}, year = {2000}, date = {2000-01-01}, volume = {38}, number = {2}, pages = {433--476}, abstract = {One of the key characteristics of a general Virtual Reality (VR) system is the multimodal, real-time interaction between the human operator and the computer simulated and animated environment. This paper deals more particularly with problems related to the haptic interaction between the human operator and a virtual environment (VE). The general goal of such a system can be seen as that of integrating the functionality of the human hand within a virtual world. Maintaining the dexterity and prehensile skills of the human operator within such applications implies the use of an anthropomorphic device which has enough degrees of freedom while being capable of monitoring the actions of the human hand and fingers. Such an exoskeleton glove device (the LRP hand master) has been recently developed in our laboratory and integrated in an interactive virtual prehension system. This paper focuses more particularly on two issues: (a) the synthesis of hand-distributed kinesthetic feedback to be applied on the fingers, and (b) the experimental evaluation of this haptic feedback system based on psychophysical methods. A series of experiments is reported here concerning the perception of the weight of manipulated virtual objects. Issues related to the use of sensory substitution techniques for the application of haptic feedback on the human hand are also discussed.}, keywords = {}, pubstate = {published}, tppubtype = {article} } One of the key characteristics of a general Virtual Reality (VR) system is the multimodal, real-time interaction between the human operator and the computer simulated and animated environment. This paper deals more particularly with problems related to the haptic interaction between the human operator and a virtual environment (VE). The general goal of such a system can be seen as that of integrating the functionality of the human hand within a virtual world. Maintaining the dexterity and prehensile skills of the human operator within such applications implies the use of an anthropomorphic device which has enough degrees of freedom while being capable of monitoring the actions of the human hand and fingers. Such an exoskeleton glove device (the LRP hand master) has been recently developed in our laboratory and integrated in an interactive virtual prehension system. This paper focuses more particularly on two issues: (a) the synthesis of hand-distributed kinesthetic feedback to be applied on the fingers, and (b) the experimental evaluation of this haptic feedback system based on psychophysical methods. A series of experiments is reported here concerning the perception of the weight of manipulated virtual objects. Issues related to the use of sensory substitution techniques for the application of haptic feedback on the human hand are also discussed. |
P Maragos, M A Butt Curve Evolution, Differential Morphology, and Distance Transforms Applied to Multiscale and Eikonal Problems Journal Article Fundamenta Informaticae, 41 , pp. 91–129, 2000, ISSN: 01692968. @article{80b, title = {Curve Evolution, Differential Morphology, and Distance Transforms Applied to Multiscale and Eikonal Problems}, author = {P Maragos and M A Butt}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosButt_CurvEvolDifMorf_FundInfo2000.pdf}, issn = {01692968}, year = {2000}, date = {2000-01-01}, journal = {Fundamenta Informaticae}, volume = {41}, pages = {91--129}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
H F C Pessoa, Petros Maragos Neural networks with hybrid morphological / rank / linear nodes : a unifying framework with applications to handwritten character recognition ଝ Journal Article Pattern Recognition, 33 , 2000. @article{111, title = {Neural networks with hybrid morphological / rank / linear nodes : a unifying framework with applications to handwritten character recognition ଝ}, author = {H F C Pessoa and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PessoaMaragos_MRLNNet_PatRecogn2000.pdf}, year = {2000}, date = {2000-01-01}, journal = {Pattern Recognition}, volume = {33}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
B. Santhanam, P. Maragos Multicomponent AM--FM Demodulation via Periodicity-based Algebraic Separation and Energy-based Demodulation Journal Article IEEE Trans. on Communications, 48 (3), pp. 473–490, 2000. @article{112, title = {Multicomponent AM--FM Demodulation via Periodicity-based Algebraic Separation and Energy-based Demodulation}, author = {B. Santhanam and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SanthanamMaragos_MultAFM-PASED_ieeetCOM2000.pdf}, year = {2000}, date = {2000-01-01}, journal = {IEEE Trans. on Communications}, volume = {48}, number = {3}, pages = {473--490}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Fernand Meyer, Petros Maragos Nonlinear scale-space representation with morphological levelings Journal Article Journal of Visual Communication and Image Representation, 11 (2), pp. 245–265, 2000, ISSN: 10473203. Abstract | BibTeX | Links: [PDF] @article{113, title = {Nonlinear scale-space representation with morphological levelings}, author = {Fernand Meyer and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MeyerMaragos_NonlinScaleSpaceLevelings_JVCIR2000.pdf}, doi = {10.1006/jvci.1999.0447}, issn = {10473203}, year = {2000}, date = {2000-01-01}, journal = {Journal of Visual Communication and Image Representation}, volume = {11}, number = {2}, pages = {245--265}, abstract = {In this paper we present a nonlinear scale-space representation based on a general class of morphological strong filters, the levelings, which include the openings and closings by reconstruction. These filters are very useful for image simplification and segmentation. From one scale to the next, details vanish, but the contours of the remaining objects are preserved sharp and perfectly localized. Both the lattice algebraic and the scale-space properties of levelings are analyzed and illustrated. We also develop a nonlinear partial differential equation that models the generation of levelings as the limit of a controlled growth starting from an initial seed signal. Finally, we outline the use of levelings in improving the Gaussian scale-space by using the latter as an initial seed to generate multiscale levelings that have a superior preservation of image edges. textcopyright 2000 Academic Press.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper we present a nonlinear scale-space representation based on a general class of morphological strong filters, the levelings, which include the openings and closings by reconstruction. These filters are very useful for image simplification and segmentation. From one scale to the next, details vanish, but the contours of the remaining objects are preserved sharp and perfectly localized. Both the lattice algebraic and the scale-space properties of levelings are analyzed and illustrated. We also develop a nonlinear partial differential equation that models the generation of levelings as the limit of a controlled growth starting from an initial seed signal. Finally, we outline the use of levelings in improving the Gaussian scale-space by using the latter as an initial seed to generate multiscale levelings that have a superior preservation of image edges. textcopyright 2000 Academic Press. |
Costas S Tzafestas, Dimitris Valatsos VR-based Teleoperation of a Mobile Robotic Assistant : Progress Report Conference in Proc: European Workshop on Service and Humanoid Robots (Servicerob'2001), pp.51-56, Santorini, Greece, 2001, (c), 2000. @conference{Tzafestas2000, title = {VR-based Teleoperation of a Mobile Robotic Assistant : Progress Report}, author = { Costas S Tzafestas and Dimitris Valatsos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/KTzaf_DEMO_2000_13.pdf}, year = {2000}, date = {2000-11-01}, booktitle = {in Proc: European Workshop on Service and Humanoid Robots (Servicerob'2001), pp.51-56, Santorini, Greece, 2001}, number = {c}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos, F Meyer A PDE approach to nonlinear image simplification via levelings and reconstruction filters Conference ICIP 2000: Proceedings of 2000 International Conference on Image Processing, Volume 2, 2000. @conference{Maragos2000b, title = {A PDE approach to nonlinear image simplification via levelings and reconstruction filters}, author = { P Maragos and F Meyer}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/maragos2000.pdf}, year = {2000}, date = {2000-09-01}, booktitle = {ICIP 2000: Proceedings of 2000 International Conference on Image Processing, Volume 2}, pages = {938--941}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos, Giorgos Stamou, Spyros Tzafestas A lattice control model of fuzzy dynamical systems in state-space ∗ Conference in Mathematical Morphology and Its Application to Image and Signal Processing, J. Goutsias, L. Vincent and D. Bloomberg, Editors, Kluwer Academic Publishers, Boston, 2000, pp. 61--70, 70 (ii), 2000. @conference{260, title = {A lattice control model of fuzzy dynamical systems in state-space ∗}, author = { Petros Maragos and Giorgos Stamou and Spyros Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosStamouTzafestas_LatControlFuzzySyst_ISMM2000.pdf}, year = {2000}, date = {2000-01-01}, booktitle = {in Mathematical Morphology and Its Application to Image and Signal Processing, J. Goutsias, L. Vincent and D. Bloomberg, Editors, Kluwer Academic Publishers, Boston, 2000, pp. 61--70}, volume = {70}, number = {ii}, pages = {61--70}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. S. Tzafestas, Ph Coiffet Dexterous haptic interaction with virtual environments: hand-distributed kinesthetic feedback and haptic perception Conference Proc. IARP First International Workshop on Humanoid and Human Friendly Robotics, Tsukuba, Japan, October 26-27, 1998, 38 (2), 2000, ISSN: 02329298. @conference{64b, title = {Dexterous haptic interaction with virtual environments: hand-distributed kinesthetic feedback and haptic perception}, author = { C. S. Tzafestas and Ph Coiffet}, issn = {02329298}, year = {2000}, date = {2000-01-01}, booktitle = {Proc. IARP First International Workshop on Humanoid and Human Friendly Robotics, Tsukuba, Japan, October 26-27, 1998}, volume = {38}, number = {2}, pages = {433--476}, abstract = {One of the key characteristics of a general Virtual Reality (VR) system is the multimodal, real-time interaction between the human operator and the computer simulated and animated environment. This paper deals more particularly with problems related to the haptic interaction between the human operator and a virtual environment (VE). The general goal of such a system can be seen as that of integrating the functionality of the human hand within a virtual world. Maintaining the dexterity and prehensile skills of the human operator within such applications implies the use of an anthropomorphic device which has enough degrees of freedom while being capable of monitoring the actions of the human hand and fingers. Such an exoskeleton glove device (the LRP hand master) has been recently developed in our laboratory and integrated in an interactive virtual prehension system. This paper focuses more particularly on two issues: (a) the synthesis of hand-distributed kinesthetic feedback to be applied on the fingers, and (b) the experimental evaluation of this haptic feedback system based on psychophysical methods. A series of experiments is reported here concerning the perception of the weight of manipulated virtual objects. Issues related to the use of sensory substitution techniques for the application of haptic feedback on the human hand are also discussed.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } One of the key characteristics of a general Virtual Reality (VR) system is the multimodal, real-time interaction between the human operator and the computer simulated and animated environment. This paper deals more particularly with problems related to the haptic interaction between the human operator and a virtual environment (VE). The general goal of such a system can be seen as that of integrating the functionality of the human hand within a virtual world. Maintaining the dexterity and prehensile skills of the human operator within such applications implies the use of an anthropomorphic device which has enough degrees of freedom while being capable of monitoring the actions of the human hand and fingers. Such an exoskeleton glove device (the LRP hand master) has been recently developed in our laboratory and integrated in an interactive virtual prehension system. This paper focuses more particularly on two issues: (a) the synthesis of hand-distributed kinesthetic feedback to be applied on the fingers, and (b) the experimental evaluation of this haptic feedback system based on psychophysical methods. A series of experiments is reported here concerning the perception of the weight of manipulated virtual objects. Issues related to the use of sensory substitution techniques for the application of haptic feedback on the human hand are also discussed. |
P Maragos, L F C Pessoa Morphological Filtering for Image Enhancement and Detection Book Chapter Bovik, A C (Ed.): Image and Video Processing Handbook (First Edition), pp. 101-116, Acad. Press, 2000. @inbook{Maragos2000, title = {Morphological Filtering for Image Enhancement and Detection}, author = {P Maragos and L F C Pessoa}, editor = {A C Bovik}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/2000_MaragosPessoa_MorphFilterEnhancDetect_ImVidHbked1_AcadPress.pdf}, year = {2000}, date = {2000-01-01}, booktitle = {Image and Video Processing Handbook (First Edition)}, pages = {101-116}, publisher = {Acad. Press}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
1999 |
Petros Maragos, Alexandros Potamianos Fractal dimensions of speech sounds: Computation and application to automatic speech recognition Journal Article The Journal of the Acoustical Society of America, 105 (3), pp. 1925–1932, 1999, ISSN: 0001-4966. Abstract | BibTeX | Links: [Webpage] [PDF] @article{109, title = {Fractal dimensions of speech sounds: Computation and application to automatic speech recognition}, author = {Petros Maragos and Alexandros Potamianos}, url = {http://asa.scitation.org/doi/10.1121/1.426738 http://robotics.ntua.gr/wp-content/uploads/sites/2/MaragosPotamianos_SpeecFrDimRecogn_JASA1999.pdf}, doi = {10.1121/1.426738}, issn = {0001-4966}, year = {1999}, date = {1999-01-01}, journal = {The Journal of the Acoustical Society of America}, volume = {105}, number = {3}, pages = {1925--1932}, abstract = {The dynamics of airflow during speech production may often result in some small or large degree of turbulence. In this paper, the geometry of speechturbulence as reflected in the fragmentation of the time signal is quantified by using fractalmodels. An efficient algorithm for estimating the short-time fractal dimension of speech signals based on multiscale morphological filtering is described, and its potential for speech segmentation and phonetic classification discussed. Also reported are experimental results on using the short-time fractal dimension of speech signals at multiple scales as additional features in an automatic speech-recognition system using hidden Markovmodels, which provide a modest improvement in speech-recognition performance.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The dynamics of airflow during speech production may often result in some small or large degree of turbulence. In this paper, the geometry of speechturbulence as reflected in the fragmentation of the time signal is quantified by using fractalmodels. An efficient algorithm for estimating the short-time fractal dimension of speech signals based on multiscale morphological filtering is described, and its potential for speech segmentation and phonetic classification discussed. Also reported are experimental results on using the short-time fractal dimension of speech signals at multiple scales as additional features in an automatic speech-recognition system using hidden Markovmodels, which provide a modest improvement in speech-recognition performance. |
Alexandros Potamianos, Petros Maragos Speech analysis and synthesis using an AM ± FM modulation Journal Article Speech Communication, 28 (3), pp. 195–209, 1999. @article{110, title = {Speech analysis and synthesis using an AM ± FM modulation}, author = {Alexandros Potamianos and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PotamianosMaragos_SpeechAnalSynthUsingAMFM-ModulModel_SpeCom1999.pdf}, year = {1999}, date = {1999-01-01}, journal = {Speech Communication}, volume = {28}, number = {3}, pages = {195--209}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Fernand Meyer, Petros Maragos Morphological scale-space representation with levelings Conference Scale-Space Theories in Computer Vision, 1999, ISSN: 16113349. BibTeX | Links: [Webpage] [PDF] @conference{Meyer1999, title = {Morphological scale-space representation with levelings}, author = { Fernand Meyer and Petros Maragos}, url = {http://link.springer.com/chapter/10.1007/3-540-48236-9_17 http://robotics.ntua.gr/wp-content/uploads/sites/2/MeyerMaragos_MorphologicalSSLevelings_SS1999.pdf}, issn = {16113349}, year = {1999}, date = {1999-09-01}, booktitle = {Scale-Space Theories in Computer Vision}, pages = {187--198}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Fernand Meyer, Petros Maragos Multiscale Morphological Segmentations Based on Watershed, Flooding, and Eikonal PDE Conference Proc. Int'l Conf. on Scale-Space Theories in Computer Vision (SCALE-SPACE'99), Corfu, Greece, Sep. 1999; Lecture Notes in Computer Science 1682, pp.351-362, Springer-Verlag, 1999, ISSN: 16113349. BibTeX | Links: [Webpage] [PDF] @conference{Meyer1999b, title = {Multiscale Morphological Segmentations Based on Watershed, Flooding, and Eikonal PDE}, author = { Fernand Meyer and Petros Maragos}, url = {http://link.springer.com/10.1007/3-540-48236-9_31 http://robotics.ntua.gr/wp-content/uploads/sites/2/MeyerMaragos_MultiscaleMorphSegmWshedFloodPDE_SS1999.pdf}, doi = {10.1007/3-540-48236-9_31}, issn = {16113349}, year = {1999}, date = {1999-09-01}, booktitle = {Proc. Int'l Conf. on Scale-Space Theories in Computer Vision (SCALE-SPACE'99), Corfu, Greece, Sep. 1999; Lecture Notes in Computer Science 1682, pp.351-362, Springer-Verlag}, pages = {351--362}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Costas S. Tzafestas, Spyros G. Tzafestas Recent algorithms for fuzzy and neurofuzzy path planning and navigation of autonomous mobile robots Conference Systems Science, 25 (2), 1999, ISSN: 01371223. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas1999, title = {Recent algorithms for fuzzy and neurofuzzy path planning and navigation of autonomous mobile robots}, author = { Costas S. Tzafestas and Spyros G. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/tzafestas1999.pdf}, issn = {01371223}, year = {1999}, date = {1999-08-01}, booktitle = {Systems Science}, volume = {25}, number = {2}, pages = {25--39}, abstract = {textcopyright 1999 EUCA. This paper reviews a number of recent algorithms for mobile robot path planning, navigation and motion control, which employ fuzzy logic and neuro-fuzzy learning and reasoning. Starting with a discussion of the structure of fuzzy and neuro-fuzzy systems, two fuzzy obstacle avoidance path planning algorithms are presented followed by a 3-level neuro-fuzzy local and global path planning scheme. Then the motion planning and control problem is considered. A fuzzy path tracking strategy is outlined, followed by a fuzzy navigation algorithm among polygonal obstacles and a learning-by-doing neuro-fuzzy motion planning scheme. The paper ends with a hybrid robust motion control technique which combines the minimum interference and sliding mode control principles with fuzzy inference. A representative set of examples are included which illustrate the performance of the algorithms under various realistic conditions.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } textcopyright 1999 EUCA. This paper reviews a number of recent algorithms for mobile robot path planning, navigation and motion control, which employ fuzzy logic and neuro-fuzzy learning and reasoning. Starting with a discussion of the structure of fuzzy and neuro-fuzzy systems, two fuzzy obstacle avoidance path planning algorithms are presented followed by a 3-level neuro-fuzzy local and global path planning scheme. Then the motion planning and control problem is considered. A fuzzy path tracking strategy is outlined, followed by a fuzzy navigation algorithm among polygonal obstacles and a learning-by-doing neuro-fuzzy motion planning scheme. The paper ends with a hybrid robust motion control technique which combines the minimum interference and sliding mode control principles with fuzzy inference. A representative set of examples are included which illustrate the performance of the algorithms under various realistic conditions. |
S G Tzafestas, C S Tzafestas Virtual Reality in Telerobotics: The State-of-the-Art Conference Proc. IEEE Intl. Symposium on Industrial Electronics (ISIE'99), Bled, Slovenia, July 12-16, 1999, 1999. @conference{Tzafestas1999b, title = {Virtual Reality in Telerobotics: The State-of-the-Art}, author = { S G Tzafestas and C S Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/virtual-reality-in-telerobotics-the-stateoftheart.pdf}, year = {1999}, date = {1999-07-01}, booktitle = {Proc. IEEE Intl. Symposium on Industrial Electronics (ISIE'99), Bled, Slovenia, July 12-16, 1999}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos, Fernand Meyer Nonlinear PDEs and Numerical Algorithms for Modeling Levelings and Reconstruction Filters Conference Proc. Int'l Conf. on Scale-Space Theories in Computer Vision (SCALE-SPACE'99), Corfu, Greece, Sep. 1999; Lecture Notes on Computer Science 1682, pp.363-374, Springer-Verlag, 1999, ISSN: 16113349. @conference{261, title = {Nonlinear PDEs and Numerical Algorithms for Modeling Levelings and Reconstruction Filters}, author = { Petros Maragos and Fernand Meyer}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosMeyer_PDEsNumericalLevelingsReconstrction_SS1999.pdf}, issn = {16113349}, year = {1999}, date = {1999-01-01}, booktitle = {Proc. Int'l Conf. on Scale-Space Theories in Computer Vision (SCALE-SPACE'99), Corfu, Greece, Sep. 1999; Lecture Notes on Computer Science 1682, pp.363-374, Springer-Verlag}, pages = {363--374}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos, S G Tzafestas Min-Max Control with Application to Discrete Event Dynamic Systems Book Chapter Advances in Manufacturing, pp. 217–230, Springer London, London, 1999, ISBN: 978-1-4471-0855-9. Abstract | BibTeX | Links: [PDF] @inbook{Maragos1999b, title = {Min-Max Control with Application to Discrete Event Dynamic Systems}, author = {P Maragos and S G Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/1999_MaragosTzafestas_MinmaxControlApplicDEDS_BookChap_Springer.pdf}, isbn = {978-1-4471-0855-9}, year = {1999}, date = {1999-01-01}, booktitle = {Advances in Manufacturing}, pages = {217--230}, publisher = {Springer London}, address = {London}, abstract = {Large classes of dynamic phenomena such as material flow in manufacturing systems, traffic flow in transportation or communication networks, and related scheduling problems can be viewed as discrete event dynamical systems (DEDS); see the papers in [7] for surveys. An efficient approach [3,9] to model large classes of DEDS has been based on the minimax algebra [4] and describes the time dynamics of such DEDS by using nonlinear state space equations which algebraically resemble the linear (sum-product) equations of linear control but in which the addition and multiplication have been replaced by maximum and addition, respectively}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } Large classes of dynamic phenomena such as material flow in manufacturing systems, traffic flow in transportation or communication networks, and related scheduling problems can be viewed as discrete event dynamical systems (DEDS); see the papers in [7] for surveys. An efficient approach [3,9] to model large classes of DEDS has been based on the minimax algebra [4] and describes the time dynamics of such DEDS by using nonlinear state space equations which algebraically resemble the linear (sum-product) equations of linear control but in which the addition and multiplication have been replaced by maximum and addition, respectively |
P Maragos Morphological Signal and Image Processing Book Chapter Madisetti, V K; Williams, D B (Ed.): Digital Signal Processing Handbook, CRC Press, 1999. @inbook{Maragos1999, title = {Morphological Signal and Image Processing}, author = {P Maragos}, editor = {V K Madisetti and D B Williams}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/1999_Maragos_MSIP_DSP-Handbook_chap74_CRC.pdf}, year = {1999}, date = {1999-01-01}, booktitle = {Digital Signal Processing Handbook}, publisher = {CRC Press}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
1998 |
L F C Pessoa, P Maragos MRL-filters: a general class of nonlinear systems and their optimal design for image processing Journal Article IEEE Transactions on Image Processing, 7 (7), pp. 966-978, 1998, ISSN: 1057-7149. Abstract | BibTeX | Links: [PDF] @article{701150, title = {MRL-filters: a general class of nonlinear systems and their optimal design for image processing}, author = {L F C Pessoa and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/PessoaMaragos_MRLFilt_ieeetIP1998.pdf}, doi = {10.1109/83.701150}, issn = {1057-7149}, year = {1998}, date = {1998-07-01}, journal = {IEEE Transactions on Image Processing}, volume = {7}, number = {7}, pages = {966-978}, abstract = {A class of morphological/rank/linear (MRL)-filters is presented as a general nonlinear tool for image processing. They consist of a linear combination between a morphological/rank filter and a linear filter. A gradient steepest descent method is proposed to optimally design these filters, using the averaged least mean squares (LMS) algorithm. The filter design is viewed as a learning process, and convergence issues are theoretically and experimentally investigated. A systematic approach is proposed to overcome the problem of nondifferentiability of the nonlinear filter component and to improve the numerical robustness of the training algorithm, which results in simple training equations. Image processing applications in system identification and image restoration are also presented, illustrating the simplicity of training MRL-filters and their effectiveness for image/signal processing.}, keywords = {}, pubstate = {published}, tppubtype = {article} } A class of morphological/rank/linear (MRL)-filters is presented as a general nonlinear tool for image processing. They consist of a linear combination between a morphological/rank filter and a linear filter. A gradient steepest descent method is proposed to optimally design these filters, using the averaged least mean squares (LMS) algorithm. The filter design is viewed as a learning process, and convergence issues are theoretically and experimentally investigated. A systematic approach is proposed to overcome the problem of nondifferentiability of the nonlinear filter component and to improve the numerical robustness of the training algorithm, which results in simple training equations. Image processing applications in system identification and image restoration are also presented, illustrating the simplicity of training MRL-filters and their effectiveness for image/signal processing. |
Costas S Tzafestas, Platon A Prokopiou, Spyros G Tzafestas Path Planning and Control of a Cooperative Three-Robot System Manipulating Large Objects Journal Article Journal of Intelligent and Robotic Systems: Theory and Applications, 22 (2), pp. 99–116, 1998, ISSN: 09210296. Abstract | BibTeX | Links: [PDF] @article{11b, title = {Path Planning and Control of a Cooperative Three-Robot System Manipulating Large Objects}, author = {Costas S Tzafestas and Platon A Prokopiou and Spyros G Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/A_1007943632269.pdf}, doi = {10.1023/A:1007943632269}, issn = {09210296}, year = {1998}, date = {1998-01-01}, journal = {Journal of Intelligent and Robotic Systems: Theory and Applications}, volume = {22}, number = {2}, pages = {99--116}, abstract = {After a brief review of the current research on multi-robot systems, the paper presents a path planning and control scheme for a cooperative three-robot system transferring/manipulating a large object from an initial to a desired final position/orientation. The robots are assumed to be capable of holding the object at three points that define an isosceles triangle. The mode of operation adopted is that of a "master-and-two-slave robots". The control scheme employs the differential displacement of the object which is transformed into that of the end-effector of each robotic arm, and then used to compute the differential displacements of the joints of the robots. The scheme was applied to several 3-robot systems by simulation and proved to be adequately effective, subject to certain conditions regarding the magnitude of the differential displacements. Here, an example is included which concerns the case of three Staubli RX-90L robots.}, keywords = {}, pubstate = {published}, tppubtype = {article} } After a brief review of the current research on multi-robot systems, the paper presents a path planning and control scheme for a cooperative three-robot system transferring/manipulating a large object from an initial to a desired final position/orientation. The robots are assumed to be capable of holding the object at three points that define an isosceles triangle. The mode of operation adopted is that of a "master-and-two-slave robots". The control scheme employs the differential displacement of the object which is transformed into that of the end-effector of each robotic arm, and then used to compute the differential displacements of the joints of the robots. The scheme was applied to several 3-robot systems by simulation and proved to be adequately effective, subject to certain conditions regarding the magnitude of the differential displacements. Here, an example is included which concerns the case of three Staubli RX-90L robots. |
Muhammad Akmal Butt, Petros Maragos Optimum design of Chamfer distance transforms Journal Article IEEE Transactions on Image Processing, 7 (10), pp. 1477–1484, 1998, ISSN: 10577149. Abstract | BibTeX | Links: [PDF] @article{81b, title = {Optimum design of Chamfer distance transforms}, author = {Muhammad Akmal Butt and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/ButtMaragos_ChamfDistTransf_ieeetIP1998.pdf}, doi = {10.1109/83.718487}, issn = {10577149}, year = {1998}, date = {1998-01-01}, journal = {IEEE Transactions on Image Processing}, volume = {7}, number = {10}, pages = {1477--1484}, abstract = {The distance transform has found many applications in image analysis. Chamfer distance transforms are a class of discrete algorithms that offer a good approximation to the desired Euclidean distance transform at a lower computational cost. They can also give integer-valued distances that are more suitable for several digital image processing tasks. The local distances used to compute a chamfer distance transform are selected to minimize an approximation error. A new geometric approach is developed to find optimal local distances. This new approach is easier to visualize than the approaches found in previous work, and can be easily extended to chamfer metrics that use large neighborhoods. A new concept of critical local distances is presented which reduces the computational complexity of the chamfer distance transform without increasing the maximum approximation error}, keywords = {}, pubstate = {published}, tppubtype = {article} } The distance transform has found many applications in image analysis. Chamfer distance transforms are a class of discrete algorithms that offer a good approximation to the desired Euclidean distance transform at a lower computational cost. They can also give integer-valued distances that are more suitable for several digital image processing tasks. The local distances used to compute a chamfer distance transform are selected to minimize an approximation error. A new geometric approach is developed to find optimal local distances. This new approach is easier to visualize than the approaches found in previous work, and can be easily extended to chamfer metrics that use large neighborhoods. A new concept of critical local distances is presented which reduces the computational complexity of the chamfer distance transform without increasing the maximum approximation error |
Petros Maragos MRL-Filters : A General Class of Nonlinear Systems and Their Optimal Design for Image Processing Journal Article 7 (7), pp. 966–978, 1998. @article{107, title = {MRL-Filters : A General Class of Nonlinear Systems and Their Optimal Design for Image Processing}, author = {Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PessoaMaragos_MRLFilt_ieeetIP1998.pdf}, year = {1998}, date = {1998-01-01}, volume = {7}, number = {7}, pages = {966--978}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Balasubramaniam Santhanam, Petros Maragos Harmonic analysis and restoration of separation methods for periodic signal mixtures: Algebraic separation versus comb filtering Journal Article Signal Processing, 69 (1), pp. 81–91, 1998, ISSN: 01651684. Abstract | BibTeX | Links: [PDF] @article{108, title = {Harmonic analysis and restoration of separation methods for periodic signal mixtures: Algebraic separation versus comb filtering}, author = {Balasubramaniam Santhanam and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/SanthanamMaragos_HarmonicAnalysis_SignalProcessing98.pdf}, doi = {10.1016/S0165-1684(98)00088-7}, issn = {01651684}, year = {1998}, date = {1998-01-01}, journal = {Signal Processing}, volume = {69}, number = {1}, pages = {81--91}, abstract = {The problem of separating a mixture of periodic signals into its constituent components occurs in sound detection, biomedical signal processing, and in communications. Existing approaches to solving it are either based on harmonic selection in the frequency domain or on linear comb filtering in the time-domain. In this paper, the recently proposed matrix algebraic separation approach is analyzed in the frequency domain. The insight obtained via this analysis leads to the development of harmonic restoration techniques that fill in the information missing at the harmonics shared by the components and also to the development of constraints on the carrier frequencies and bandwidths for narrowband, bandpass, and periodic AM-FM components for minimum information loss. The restored methods are then applied to mixtures of sines and AM-FM signals. Differences between this improved approach and a similar improvement of the comb filtering approach are also emphasized. textcopyright 1998 Elsevier Science B.V. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The problem of separating a mixture of periodic signals into its constituent components occurs in sound detection, biomedical signal processing, and in communications. Existing approaches to solving it are either based on harmonic selection in the frequency domain or on linear comb filtering in the time-domain. In this paper, the recently proposed matrix algebraic separation approach is analyzed in the frequency domain. The insight obtained via this analysis leads to the development of harmonic restoration techniques that fill in the information missing at the harmonics shared by the components and also to the development of constraints on the carrier frequencies and bandwidths for narrowband, bandpass, and periodic AM-FM components for minimum information loss. The restored methods are then applied to mixtures of sines and AM-FM signals. Differences between this improved approach and a similar improvement of the comb filtering approach are also emphasized. textcopyright 1998 Elsevier Science B.V. All rights reserved. |
P. Maragos, M. Akmal Butt, L.F.C. Pessoa Two frontiers in morphological image analysis: differential evolution models and hybrid morphological/linear neural networks Conference Proceedings SIBGRAPI'98. International Symposium on Computer Graphics, Image Processing, and Vision (Cat. No.98EX237), 1998, ISBN: 0-8186-9215-4. BibTeX | Links: [Webpage] [PDF] @conference{Maragos1998, title = {Two frontiers in morphological image analysis: differential evolution models and hybrid morphological/linear neural networks}, author = { P. Maragos and M. {Akmal Butt} and L.F.C. Pessoa}, url = {http://ieeexplore.ieee.org/document/722726/ http://robotics.ntua.gr/wp-content/uploads/sites/2/MaragosButtPesoa_DifMorfMRLNN_SIBGRAPI1998.pdf}, doi = {10.1109/SIBGRA.1998.722726}, isbn = {0-8186-9215-4}, year = {1998}, date = {1998-10-01}, booktitle = {Proceedings SIBGRAPI'98. International Symposium on Computer Graphics, Image Processing, and Vision (Cat. No.98EX237)}, pages = {10--17}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A Kheddar, C Tzafestas Fitting tele-operation and virtual reality technologies towards teleworking Conference FIR'98, 4th French-Israeli łdots, 1998. BibTeX | Links: [Webpage] [PDF] @conference{Kheddar1998, title = {Fitting tele-operation and virtual reality technologies towards teleworking}, author = { A Kheddar and C Tzafestas}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.45.2873&rep=rep1&type=pdf http://robotics.ntua.gr/wp-content/uploads/sites/2/Kheddar_FIR98.pdf}, year = {1998}, date = {1998-05-01}, booktitle = {FIR'98, 4th French-Israeli łdots}, pages = {147--152}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos, M A Butt Advances in Differential Morphology: Image Segmentation via Eikonal PDE and Curve Evolution and Reconstruction via Constrained Dilation Flow Conference Proceedings of the fourth international symposium on Mathematical morphology and its applications to image and signal processing, 1998. @conference{268, title = {Advances in Differential Morphology: Image Segmentation via Eikonal PDE and Curve Evolution and Reconstruction via Constrained Dilation Flow}, author = { P Maragos and M A Butt}, year = {1998}, date = {1998-01-01}, booktitle = {Proceedings of the fourth international symposium on Mathematical morphology and its applications to image and signal processing}, pages = {167--174}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1997 |
S G a Tzafestas, T E a Krikochoritis, C S b Tzafestas Robust Sliding-mode Control of Nine-link Biped Robot Walking Journal Article Journal of Intelligent and Robotic Systems: Theory and Applications, 20 (2-4), pp. 375–402, 1997, ISSN: 09210296. Abstract | BibTeX | Links: [Webpage] [PDF] @article{12b, title = {Robust Sliding-mode Control of Nine-link Biped Robot Walking}, author = {S G a Tzafestas and T E a Krikochoritis and C S b Tzafestas}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0031250682&partnerID=40&md5=f38e59558dfc6492f0513f105db7b4cf http://robotics.ntua.gr/wp-content/uploads/sites/2/TzafestasS_KrikochoritisT_TzafestasC_RobustSliding-modeControlNineLinkBipedRobotWalk_1997-1.pdf}, doi = {Doi 10.1023/A:1007924731253}, issn = {09210296}, year = {1997}, date = {1997-01-01}, journal = {Journal of Intelligent and Robotic Systems: Theory and Applications}, volume = {20}, number = {2-4}, pages = {375--402}, abstract = {A nine-link planar biped robot model is considered which, in addition to the main links (i.e., legs, thighs and trunk), includes a two-segment foot. First, a continuous walking pattern of the biped on a flat terrain is synthesized, and the corresponding desired trajectories of the robot joints are calculated. Next, the kinematic and dynamic equations that describe its locomotion during the various walking phases are briefly presented. Finally, a nonlinear robust control approach is followed, motivated by the fact that the control which has to guarantee the stability of the biped robot must take into account its exact nonlinear dynamics. However, an accurate model of the biped robot is not available in practice, due to the existence of uncertainties of various kinds such as unmodeled dynamics and parameter inaccuracies. Therefore, under the assumption that the estimation error on the unknown (probably time-varying) parameters is bounded by a given function, a sliding-mode controller is applied, which provies a successful way to preserve stability and achieve good performance, despite the presence of strong modeling imprecisions or uncertainties. The paper includes a set of representative simulation results that demonstrate the very good behavior of the sliding-mode robust biped controller.}, keywords = {}, pubstate = {published}, tppubtype = {article} } A nine-link planar biped robot model is considered which, in addition to the main links (i.e., legs, thighs and trunk), includes a two-segment foot. First, a continuous walking pattern of the biped on a flat terrain is synthesized, and the corresponding desired trajectories of the robot joints are calculated. Next, the kinematic and dynamic equations that describe its locomotion during the various walking phases are briefly presented. Finally, a nonlinear robust control approach is followed, motivated by the fact that the control which has to guarantee the stability of the biped robot must take into account its exact nonlinear dynamics. However, an accurate model of the biped robot is not available in practice, due to the existence of uncertainties of various kinds such as unmodeled dynamics and parameter inaccuracies. Therefore, under the assumption that the estimation error on the unknown (probably time-varying) parameters is bounded by a given function, a sliding-mode controller is applied, which provies a successful way to preserve stability and achieve good performance, despite the presence of strong modeling imprecisions or uncertainties. The paper includes a set of representative simulation results that demonstrate the very good behavior of the sliding-mode robust biped controller. |
Costas S Tzafestas, Nacer K M'Sirdi, N Manamani Adaptive Impedance Control Applied to a Pneumatic Legged Robot Journal Article Journal of Intelligent & Robotic Systems, 20 (2), pp. 105–129, 1997, ISSN: 0921-0296. Abstract | BibTeX | Links: [PDF] @article{13b, title = {Adaptive Impedance Control Applied to a Pneumatic Legged Robot}, author = {Costas S Tzafestas and Nacer K M'Sirdi and N Manamani}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/TzafestasC_Adaptive_1997.pdf}, doi = {10.1023/A:1007987608963}, issn = {0921-0296}, year = {1997}, date = {1997-01-01}, journal = {Journal of Intelligent & Robotic Systems}, volume = {20}, number = {2}, pages = {105--129}, abstract = {An adaptive impedance control scheme with estimation of robot andenvironment parameters is proposed in this paper. It consists of two stagesof adaptation and control. The first one performs an on-line estimation ofthe robot inertial parameters, during the complete (constrained or not)motion of the leg, while the second one compensates for the uncertainties onthe characteristics of the ground (position and stiffness). Simulationresults obtained for a single leg of a pneumatic driven, quadruped robotshow the effectiveness of the proposed control scheme in case ofconsiderable uncertainty both in the robot and ground parameters.}, keywords = {}, pubstate = {published}, tppubtype = {article} } An adaptive impedance control scheme with estimation of robot andenvironment parameters is proposed in this paper. It consists of two stagesof adaptation and control. The first one performs an on-line estimation ofthe robot inertial parameters, during the complete (constrained or not)motion of the leg, while the second one compensates for the uncertainties onthe characteristics of the ground (position and stiffness). Simulationresults obtained for a single leg of a pneumatic driven, quadruped robotshow the effectiveness of the proposed control scheme in case ofconsiderable uncertainty both in the robot and ground parameters. |
H J A M Heijmans, P Maragos Lattice Calculus of the Morphological Slope Transform Journal Article Signal Processing, 59 , pp. 17–42, 1997, ISSN: 01651684. @article{106, title = {Lattice Calculus of the Morphological Slope Transform}, author = {H J A M Heijmans and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/HeijmansMaragos_LatticeCalculus_SignalProcessing97.pdf}, issn = {01651684}, year = {1997}, date = {1997-01-01}, journal = {Signal Processing}, volume = {59}, pages = {17--42}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A. Kheddar, C. Tzafestas, P. Coiffet The hidden robot concept-high level abstraction teleoperation Conference Proceedings of the 1997 IEEE/RSJ International Conference on Intelligent Robot and Systems. Innovative Robotics for Real-World Applications. IROS '97, 3 , 1997, ISBN: 0-7803-4119-8. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Kheddar1997, title = {The hidden robot concept-high level abstraction teleoperation}, author = { A. Kheddar and C. Tzafestas and P. Coiffet}, url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=656609 http://robotics.ntua.gr/wp-content/uploads/sites/2/the-hidden-robot-concepthigh-level-abstraction-teleoperation.pdf}, doi = {10.1109/IROS.1997.656609}, isbn = {0-7803-4119-8}, year = {1997}, date = {1997-09-01}, booktitle = {Proceedings of the 1997 IEEE/RSJ International Conference on Intelligent Robot and Systems. Innovative Robotics for Real-World Applications. IROS '97}, volume = {3}, pages = {1818--1825}, abstract = {This paper discusses the development of new teleoperator systems. While many innovations during the last decade made teleoperation technology progress, some severe well known lacks that we enumerate still persist. With respect to some attractive solutions proposed for coping with these problems we designed a bilateral control scheme based on what we called the hidden robot concept. The teleoperator achieves tasks manually in a natural way within a virtual environment (VE). Thanks to suitable bilateral transformations, the virtual tasks are being reproduced by any slave robot within the remote site. Mainly task based, our approach is not considered like a high level task knowledge based control. Rather, we consider it like a more refined shared autonomy control with a high level abstraction interface. Three main components are developed: (i) supervision loop, (ii) bilateral transformation layer, (iii) execution loop. The approach has been validated experimentally and preliminary results as well as further work are discussed}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper discusses the development of new teleoperator systems. While many innovations during the last decade made teleoperation technology progress, some severe well known lacks that we enumerate still persist. With respect to some attractive solutions proposed for coping with these problems we designed a bilateral control scheme based on what we called the hidden robot concept. The teleoperator achieves tasks manually in a natural way within a virtual environment (VE). Thanks to suitable bilateral transformations, the virtual tasks are being reproduced by any slave robot within the remote site. Mainly task based, our approach is not considered like a high level task knowledge based control. Rather, we consider it like a more refined shared autonomy control with a high level abstraction interface. Three main components are developed: (i) supervision loop, (ii) bilateral transformation layer, (iii) execution loop. The approach has been validated experimentally and preliminary results as well as further work are discussed |
Petros Maragos, Alexandros Potamianos On Using Fractal Features of Speech Sounds in Automatic Speech Recognition Conference Eurospeech, 1997. @conference{Maragos1997, title = {On Using Fractal Features of Speech Sounds in Automatic Speech Recognition}, author = { Petros Maragos and Alexandros Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/maragos97_eurospeech.pdf}, year = {1997}, date = {1997-09-01}, booktitle = {Eurospeech}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Coiffet A. Kheddar C. Tzafestas, I Mazon C Laugier & R Chellali T. Kotoku K. Tanie Parallel, Multi-Robot Long-Distance Teleoperation Conference Proc. 1997 IEEE International Conference on Advanced Robotics (ICAR'97), pp.1007-1012, Monterey, CA, USA, July 7-9, 1997, 1997. @conference{Tzafestas}1997, title = {Parallel, Multi-Robot Long-Distance Teleoperation}, author = { P Coiffet {A. Kheddar C. Tzafestas} and I Mazon C Laugier {&} R Chellali {T. Kotoku K. Tanie}}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/parallel-multirobots-long-distance-teleoperation.pdf}, year = {1997}, date = {1997-07-01}, booktitle = {Proc. 1997 IEEE International Conference on Advanced Robotics (ICAR'97), pp.1007-1012, Monterey, CA, USA, July 7-9, 1997}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Spyros G. Tzafestas, Platon A. Prokopiou, Costas S. Tzafestas Telemanipulator neurocontrol using multiple RBF networks Conference IEEE International Symposium on Intelligent Control - Proceedings, 1997. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas1997b, title = {Telemanipulator neurocontrol using multiple RBF networks}, author = { Spyros G. Tzafestas and Platon A. Prokopiou and Costas S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/telemanipulator-neurocontrol-using-multiple-rbf-networks.pdf}, year = {1997}, date = {1997-07-01}, booktitle = {IEEE International Symposium on Intelligent Control - Proceedings}, abstract = {This paper addresses the control problem of master-slave systems which involve severe modeling errors and other high - level uncertainties, using Neural Networks. The solution approach is based on a recent teleoperator control scheme (S. Lee and H. S. Lee), which is suitably enhanced such that to become capable of compensating the uncertainties. The class of radial-basis functions (RBF) neural networks are employed in a multipartitioned neural network architecture, and a special learning scheme is adopted which distributes the learning error to each subnetwork and allows on-line learning. The effectiveness of the present RBF neurocontroller was investigated through extensive simulation and compared to that of MLP (multi-layer perceptron) neurocontroller and a robust sliding-mode controller representative.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } This paper addresses the control problem of master-slave systems which involve severe modeling errors and other high - level uncertainties, using Neural Networks. The solution approach is based on a recent teleoperator control scheme (S. Lee and H. S. Lee), which is suitably enhanced such that to become capable of compensating the uncertainties. The class of radial-basis functions (RBF) neural networks are employed in a multipartitioned neural network architecture, and a special learning scheme is adopted which distributes the learning error to each subnetwork and allows on-line learning. The effectiveness of the present RBF neurocontroller was investigated through extensive simulation and compared to that of MLP (multi-layer perceptron) neurocontroller and a robust sliding-mode controller representative. |
S. G. Tzafestas, P. A. Prokopiou, C. S. Tzafestas Robust telemanipulator control using a partitioned neural network architecture Conference IEEE International Conference on Neural Networks - Conference Proceedings, 3 , 1997, ISSN: 10987576. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas1997b, title = {Robust telemanipulator control using a partitioned neural network architecture}, author = { S. G. Tzafestas and P. A. Prokopiou and C. S. Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/robust-telemanipulator-control-using-a-partitioned-neural-networ.pdf}, doi = {10.1109/ICNN.1997.614161}, issn = {10987576}, year = {1997}, date = {1997-06-01}, booktitle = {IEEE International Conference on Neural Networks - Conference Proceedings}, volume = {3}, pages = {1755--1760}, abstract = {In this paper the control problem of telemanipulators is considered under the condition that they are subject to modeling and other uncertainties of considerable levels. The design is based on the S. Lee and H.S. Lee teleoperator control scheme (1993, 1994), which is modified so as to be able to compensate the uncertainties, and is implemented using a partitioned multilayer perceptron neural network. Several subnetworks are used each one identifying a term of the manipulator's dynamic model. A new learning algorithm is proposed which distributes the learning error to each subnetwork and enables online training. Several simulation results are provided, which show the robustness ability by the partitioned neurocontroller, and compare it with the results obtained through sliding mode control ER -}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper the control problem of telemanipulators is considered under the condition that they are subject to modeling and other uncertainties of considerable levels. The design is based on the S. Lee and H.S. Lee teleoperator control scheme (1993, 1994), which is modified so as to be able to compensate the uncertainties, and is implemented using a partitioned multilayer perceptron neural network. Several subnetworks are used each one identifying a term of the manipulator's dynamic model. A new learning algorithm is proposed which distributes the learning error to each subnetwork and enables online training. Several simulation results are provided, which show the robustness ability by the partitioned neurocontroller, and compare it with the results obtained through sliding mode control ER - |
B. Santhanam, P. Maragos Demodulation of discrete multicomponent AM-FM signals using periodic algebraic separation and energy demodulation Conference 1997 IEEE International Conference on Acoustics, Speech, and Signal Processing, 3 , 1997, ISSN: 1520-6149. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Santhanam1997, title = {Demodulation of discrete multicomponent AM-FM signals using periodic algebraic separation and energy demodulation}, author = { B. Santhanam and P. Maragos}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=599542 http://robotics.ntua.gr/wp-content/uploads/sites/2/demodulation-of-discrete-multicomponent-amfm-signals-using-perio.pdf}, doi = {10.1109/ICASSP.1997.599542}, issn = {1520-6149}, year = {1997}, date = {1997-04-01}, booktitle = {1997 IEEE International Conference on Acoustics, Speech, and Signal Processing}, volume = {3}, pages = {2409--2412}, abstract = {Existing multicomponent AM-FM demodulation algorithms either assume spectrally distinct components or components separable via linear filtering and break down when the components overlap spectrally or if one of the components is stronger than the other. In this paper, we present a nonlinear algorithm for multicomponent AM-FM demodulation which avoids the above shortcomings and works well even for extremely small spectral separation of the components. The proposed algorithm separates the multicomponent demodulation problem into two tasks: periodicity-based algebraic separation of the components and then monocomponent demodulation via energy-based methods}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Existing multicomponent AM-FM demodulation algorithms either assume spectrally distinct components or components separable via linear filtering and break down when the components overlap spectrally or if one of the components is stronger than the other. In this paper, we present a nonlinear algorithm for multicomponent AM-FM demodulation which avoids the above shortcomings and works well even for extremely small spectral separation of the components. The proposed algorithm separates the multicomponent demodulation problem into two tasks: periodicity-based algebraic separation of the components and then monocomponent demodulation via energy-based methods |
Costas Tzafestas, Philippe Coiffet Computing optimal forces for generalized kinesthetic feedback on the human hand during virtual grasping and manipulation Conference Proceedings of the 1997 IEEE International Conference on Robotics and Automation, ICRA. Part 3 (of 4), 1 , 1997, ISSN: 10504729. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Tzafestas1997, title = {Computing optimal forces for generalized kinesthetic feedback on the human hand during virtual grasping and manipulation}, author = { Costas Tzafestas and Philippe Coiffet}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-0030689450&partnerID=40&md5=7af2ee142d6712b81d4055a125026bb1 http://robotics.ntua.gr/wp-content/uploads/sites/2/computing-optimal-forces-for-generalised-kinesthetic-feedback-on.pdf}, doi = {10.1109/ROBOT.1997.620025}, issn = {10504729}, year = {1997}, date = {1997-04-01}, booktitle = {Proceedings of the 1997 IEEE International Conference on Robotics and Automation, ICRA. Part 3 (of 4)}, volume = {1}, pages = {118--123}, abstract = {The paper focuses on the problem of force-feedback for the human-operator hand when manipulating virtual objects. We propose a method for the computation of feedback-forces that have to be applied on each individual phalanx and finger of the human hand in order to display pertinent, kinesthetic information about static or dynamic characteristics of objects present in the virtual scene. External forces and moments of the manipulated virtual objects have to be mapped on the contact-forces space of the virtual grasp. The method is based on the solution of a nonlinear programming problem, formulated by performing a static analysis of a general, multiple contact points virtual grasp. A methodology for modelling interactions within a virtual environment, and performing realistic grasping and manipulation, is also presented.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The paper focuses on the problem of force-feedback for the human-operator hand when manipulating virtual objects. We propose a method for the computation of feedback-forces that have to be applied on each individual phalanx and finger of the human hand in order to display pertinent, kinesthetic information about static or dynamic characteristics of objects present in the virtual scene. External forces and moments of the manipulated virtual objects have to be mapped on the contact-forces space of the virtual grasp. The method is based on the solution of a nonlinear programming problem, formulated by performing a static analysis of a general, multiple contact points virtual grasp. A methodology for modelling interactions within a virtual environment, and performing realistic grasping and manipulation, is also presented. |
P A Prokopiou C.S. Tzafestas S.G. Tzafestas Incremental Control of Three Cooperating Robots in Large-Object-Transfer Operations Conference Proc. 2nd MATHMOD: Mathematical Modeling IMACS Symposium, Vienna, Feb.5-7, 1997, 1997. @conference{67, title = {Incremental Control of Three Cooperating Robots in Large-Object-Transfer Operations}, author = { P A Prokopiou {C.S. Tzafestas S.G. Tzafestas}}, year = {1997}, date = {1997-01-01}, booktitle = {Proc. 2nd MATHMOD: Mathematical Modeling IMACS Symposium, Vienna, Feb.5-7, 1997}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
T E Krikochoritis S.G. Tzafestas, C S Tzafestas Robust and Adaptive Control of Biped Robot Walking Conference Proc. 1st MobiNet Symp. on Mobile Robotics Technology for Health Care Services (MOBINET'97), Athens, Greece, May 15-16, 271-286, 1997, 1997. @conference{71, title = {Robust and Adaptive Control of Biped Robot Walking}, author = { T E Krikochoritis {S.G. Tzafestas} and C S Tzafestas}, year = {1997}, date = {1997-01-01}, booktitle = {Proc. 1st MobiNet Symp. on Mobile Robotics Technology for Health Care Services (MOBINET'97), Athens, Greece, May 15-16, 271-286, 1997}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1996 |
Chiou Shann Fuh, Petros Maragos Least-squares algorithms for motion and shape recovery under perspective projection Journal Article Journal of Information Science and Engineering, 12 (1), pp. 1–23, 1996, ISSN: 10162364. @article{Fuh1996, title = {Least-squares algorithms for motion and shape recovery under perspective projection}, author = {Chiou Shann Fuh and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/JISE_199601_01.pdf}, issn = {10162364}, year = {1996}, date = {1996-03-01}, journal = {Journal of Information Science and Engineering}, volume = {12}, number = {1}, pages = {1--23}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Tzafestas C S S.G. Tzafestas M. Raibert Robust Sliding Mode Control Applied to a 5-link Biped Robot Journal Article 1996. @article{Raibert}1996, title = {Robust Sliding Mode Control Applied to a 5-link Biped Robot}, author = {Tzafestas C S {S.G. Tzafestas M. Raibert}}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Tzafestas_RobusSlidinMode_1996.pdf}, year = {1996}, date = {1996-01-01}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Potamianos, P Maragos Speech formant frequency and bandwidth tracking using multiband energy demodulation Journal Article 1995 International Conference on Acoustics, Speech, and Signal Processing, 1 , pp. 784–787, 1996, ISSN: 1520-6149. Abstract | BibTeX | Links: [Webpage] [PDF] @article{Potamianos1996, title = {Speech formant frequency and bandwidth tracking using multiband energy demodulation}, author = {A Potamianos and P Maragos}, url = {http://ieeexplore.ieee.org/document/479811/ http://robotics.ntua.gr/wp-content/uploads/sites/2/PotamianosMaragos_SpeecFormntBandwESA_JASA1996.pdf}, doi = {10.1109/ICASSP.1995.479811}, issn = {1520-6149}, year = {1996}, date = {1996-01-01}, journal = {1995 International Conference on Acoustics, Speech, and Signal Processing}, volume = {1}, pages = {784--787}, abstract = {In this paper, the amplitude and frequency ?AM–FM? modulation model and a multiband demodulation analysis scheme are applied to formant frequency and bandwidth tracking of speech signals. Filtering by a bank of Gabor bandpass filters is performed to isolate each speech resonance in the signal. Next, the amplitude envelope ?AM? and instantaneous frequency ?FM? are estimated for each band using the energy separation algorithm ?ESA?. Short-time formant frequency and bandwidth estimates are obtained from the instantaneous amplitude and frequency signals; two frequency estimates are proposed and their relative merits are discussed. The short-time estimates are used to compute the formant locations and bandwidths. Performance and computational issues of the algorithm are discussed. Overall, multiband demodulation analysis ?MDA? is shown to be a useful tool for extracting information from the speech resonances in the time–frequency plane.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this paper, the amplitude and frequency ?AM–FM? modulation model and a multiband demodulation analysis scheme are applied to formant frequency and bandwidth tracking of speech signals. Filtering by a bank of Gabor bandpass filters is performed to isolate each speech resonance in the signal. Next, the amplitude envelope ?AM? and instantaneous frequency ?FM? are estimated for each band using the energy separation algorithm ?ESA?. Short-time formant frequency and bandwidth estimates are obtained from the instantaneous amplitude and frequency signals; two frequency estimates are proposed and their relative merits are discussed. The short-time estimates are used to compute the formant locations and bandwidths. Performance and computational issues of the algorithm are discussed. Overall, multiband demodulation analysis ?MDA? is shown to be a useful tool for extracting information from the speech resonances in the time–frequency plane. |
P Maragos Differential Morphology and Image-Processing Journal Article IEEE Trans.$backslash$ Image Process., 5 (6), pp. 922–937, 1996. @article{104, title = {Differential Morphology and Image-Processing}, author = {P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Maragos_DifMorfImProc_ieeetIP1996.pdf}, year = {1996}, date = {1996-01-01}, journal = {IEEE Trans.$backslash$ Image Process.}, volume = {5}, number = {6}, pages = {922--937}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Kheddar, C Tzafestas, P Coiffet, T Kotoku, K Tanie Multi-robot teleoperation using direct human hand actions Journal Article Advanced Robotics, 11 (8), pp. 799–825, 1996, ISSN: 15685535. Abstract | BibTeX | Links: [PDF] @article{Kheddar1996, title = {Multi-robot teleoperation using direct human hand actions}, author = {A Kheddar and C Tzafestas and P Coiffet and T Kotoku and K Tanie}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/kheddar1996.pdf}, doi = {10.1163/156855398X00343}, issn = {15685535}, year = {1996}, date = {1996-01-01}, journal = {Advanced Robotics}, volume = {11}, number = {8}, pages = {799--825}, abstract = {This paper presents a novel concept for teleoperation using direct human hand(s) actions which we called 'the hidden robot' concept. The proposed teleoperation scheme is composed of three main components: the operator/ computer loop, the execution loop and between them the bilateral transformation modules linked by the communication channel. Within the operator/computer master loop, the operator performs what we call a 'virtual task', without being constrained by the slave robot. At this stage, the bilateral transformation layer is in charge of extracting, at the low level, pertinent parameters from the virtual task and transforming them onto robot control signals. The execution loop performs control of the slave robot(s) to achieve the desired task, described by the virtual one. At this stage, the transformation layer extracts pertinent data to provide feedback when possible and needed. It also makes sure that the task is being performed correctly in the real site; otherwise, it takes necessary recovery procedures or informs the operator to procede in a different way. We will describe in detail each component, highlighting the originalities of our approach. We will also present the experiment performed by applying this concept to long-distance, simultaneous teleoperation of four slave robots with different kinematics and situated at different locations in France and in Japan. The experimental task consisted of assembling a four-piece puzzle. All the robots had to perform the same task in parallel. We will discuss the experimental results presented in this paper, concering long-distance teleoperated robot control and round-trip communication time delay. The experiment demonstrated the feasibility of the proposed scheme and gave guidelines related to the direct use of the operator hand, within an intermediate representation, as a guide for task execution.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper presents a novel concept for teleoperation using direct human hand(s) actions which we called 'the hidden robot' concept. The proposed teleoperation scheme is composed of three main components: the operator/ computer loop, the execution loop and between them the bilateral transformation modules linked by the communication channel. Within the operator/computer master loop, the operator performs what we call a 'virtual task', without being constrained by the slave robot. At this stage, the bilateral transformation layer is in charge of extracting, at the low level, pertinent parameters from the virtual task and transforming them onto robot control signals. The execution loop performs control of the slave robot(s) to achieve the desired task, described by the virtual one. At this stage, the transformation layer extracts pertinent data to provide feedback when possible and needed. It also makes sure that the task is being performed correctly in the real site; otherwise, it takes necessary recovery procedures or informs the operator to procede in a different way. We will describe in detail each component, highlighting the originalities of our approach. We will also present the experiment performed by applying this concept to long-distance, simultaneous teleoperation of four slave robots with different kinematics and situated at different locations in France and in Japan. The experimental task consisted of assembling a four-piece puzzle. All the robots had to perform the same task in parallel. We will discuss the experimental results presented in this paper, concering long-distance teleoperated robot control and round-trip communication time delay. The experiment demonstrated the feasibility of the proposed scheme and gave guidelines related to the direct use of the operator hand, within an intermediate representation, as a guide for task execution. |
C Tzafestas, P Coiffet Real-time collision detection using spherical octrees: virtual reality application Conference Robot and Human Communication, 1996., 5th IEEE International Workshop on, 1996, ISBN: 0780332539. Abstract | BibTeX | Links: [PDF] @conference{Tzafestas1996, title = {Real-time collision detection using spherical octrees: virtual reality application}, author = { C Tzafestas and P Coiffet}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/realtime-collision-detection-using-spherical-octrees-virtual-rea.pdf}, doi = {10.1109/ROMAN.1996.568888}, isbn = {0780332539}, year = {1996}, date = {1996-11-01}, booktitle = {Robot and Human Communication, 1996., 5th IEEE International Workshop on}, pages = {500--506}, abstract = {A method for detecting potential collisions between three-dimensional moving objects is described in this paper. An object-centered, spherical octree representation is defined and implemented for the localisation of potentially colliding features between polyhedral objects. These features are subsequently tested for intersection in order to calculate precisely the actual collision points. Application of the algorithm for the direct manipulation of objects in a virtual scene is considered, to investigate its real-time behaviour. The performance of the algorithm is found to remain linear with respect to the complexity of the colliding objects}, keywords = {}, pubstate = {published}, tppubtype = {conference} } A method for detecting potential collisions between three-dimensional moving objects is described in this paper. An object-centered, spherical octree representation is defined and implemented for the localisation of potentially colliding features between polyhedral objects. These features are subsequently tested for intersection in order to calculate precisely the actual collision points. Application of the algorithm for the direct manipulation of objects in a virtual scene is considered, to investigate its real-time behaviour. The performance of the algorithm is found to remain linear with respect to the complexity of the colliding objects |
F.C. Pessoa, P. Maragos Morphological/rank neural networks and their adaptive optimal design for image processing Conference Acoustics, Speech, and Signal Processing (ICASSP-96), 6 , 1996. @conference{Pessoa1996, title = {Morphological/rank neural networks and their adaptive optimal design for image processing}, author = { F.C. Pessoa and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/morphologicalrank-neural-networks-and-their-adaptive-optimal-des.pdf}, year = {1996}, date = {1996-05-01}, booktitle = {Acoustics, Speech, and Signal Processing (ICASSP-96)}, volume = {6}, pages = {3398--3401}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Balasubramaniam Santhanam, Petros Maragos Energy demodulation of two-component AM-FM signal mixtures Conference IEEE Signal Processing Letters, 3 (11), 1996, ISSN: 10709908. @conference{Santhanam1996, title = {Energy demodulation of two-component AM-FM signal mixtures}, author = {Balasubramaniam Santhanam and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/santhanam1996-1.pdf}, doi = {10.1109/97.542159}, issn = {10709908}, year = {1996}, date = {1996-01-01}, booktitle = {IEEE Signal Processing Letters}, volume = {3}, number = {11}, pages = {294--298}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1995 |
Petros Maragos, Alan C Bovik Image demodulation using multidimensional energy separation Journal Article Journal of the Optical Society of America A, 12 (9), pp. 1867, 1995, ISSN: 1084-7529. Abstract | BibTeX | Links: [Webpage] [PDF] @article{Maragos1995b, title = {Image demodulation using multidimensional energy separation}, author = {Petros Maragos and Alan C Bovik}, url = {https://www.osapublishing.org/abstract.cfm?URI=josaa-12-9-1867 http://robotics.ntua.gr/wp-content/uploads/sites/2/MaragosBovik_ImageDemodMultidimEnergSepar_JOSA1995.pdf}, doi = {10.1364/JOSAA.12.001867}, issn = {1084-7529}, year = {1995}, date = {1995-09-01}, journal = {Journal of the Optical Society of America A}, volume = {12}, number = {9}, pages = {1867}, abstract = {Locally narrow-band images can be modeled as two-dimensional (2D) spatial AM–FM signals with several applications in image texture analysis and computer vision. We formulate an image-demodulation problem and present a solution based on the multidimensional energy operator $Phi$(f) = ||∇f||2 − f∇2f. This nonlinear operator is a multidimensional extension of the one-dimensional (1D) energy-tracking operator $Psi$(f) = (f′)2 − ff″, which has been found useful for demodulating 1D AM–FM and speech signals. We discuss some interesting properties of the multidimensional operator and develop a multidimensional energy-separation algorithm to estimate the amplitude envelope and instantaneous frequencies of 2D spatially varying AM–FM signals. Experiments are also presented on applying this 2D energy-demodulation algorithm to estimate the instantaneous amplitude contrast and spatial frequencies of image textures bandpass filtered by means of Gabor filters. The attractive features of the multidimensional energy operator and the 2D energy-separation algorithm are their simplicity, efficiency, and ability to track instantaneously varying spatial-modulation patterns.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Locally narrow-band images can be modeled as two-dimensional (2D) spatial AM–FM signals with several applications in image texture analysis and computer vision. We formulate an image-demodulation problem and present a solution based on the multidimensional energy operator $Phi$(f) = ||∇f||2 − f∇2f. This nonlinear operator is a multidimensional extension of the one-dimensional (1D) energy-tracking operator $Psi$(f) = (f′)2 − ff″, which has been found useful for demodulating 1D AM–FM and speech signals. We discuss some interesting properties of the multidimensional operator and develop a multidimensional energy-separation algorithm to estimate the amplitude envelope and instantaneous frequencies of 2D spatially varying AM–FM signals. Experiments are also presented on applying this 2D energy-demodulation algorithm to estimate the instantaneous amplitude contrast and spatial frequencies of image textures bandpass filtered by means of Gabor filters. The attractive features of the multidimensional energy operator and the 2D energy-separation algorithm are their simplicity, efficiency, and ability to track instantaneously varying spatial-modulation patterns. |
P Maragos Slope transforms: theory and application to nonlinear signal processing Journal Article IEEE Transactions on Signal Processing, 43 (4), pp. 864–877, 1995, ISSN: 1053587X. BibTeX | Links: [Webpage] [PDF] @article{Maragos1995, title = {Slope transforms: theory and application to nonlinear signal processing}, author = {P Maragos}, url = {http://ieeexplore.ieee.org/document/376839/ http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_SlopeTransf_ieeetSP1995.pdf}, doi = {10.1109/78.376839}, issn = {1053587X}, year = {1995}, date = {1995-04-01}, journal = {IEEE Transactions on Signal Processing}, volume = {43}, number = {4}, pages = {864--877}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P -F Yang, P Maragos Min-Max Classifiers: Learnability, Design and Application Journal Article Pattern Recognition, 1995. @article{99b, title = {Min-Max Classifiers: Learnability, Design and Application}, author = {P -F Yang and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/YangMaragos_MinMaxClassifiers_PR95.pdf}, year = {1995}, date = {1995-01-01}, journal = {Pattern Recognition}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Petros Maragos, Alexandros Potamianos Higher Order Differential Energy Operators Journal Article IEEE Signal Processing Letters, 2 (8), pp. 152–154, 1995, ISSN: 15582361. Abstract | BibTeX | Links: [PDF] @article{100, title = {Higher Order Differential Energy Operators}, author = {Petros Maragos and Alexandros Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosPotamianos_HOEnergOper_ieeeSPL1995.pdf}, doi = {10.1109/97.404130}, issn = {15582361}, year = {1995}, date = {1995-01-01}, journal = {IEEE Signal Processing Letters}, volume = {2}, number = {8}, pages = {152--154}, abstract = {Instantaneous signal operators$backslash$nϒtextlesssubtextgreaterktextless/subtextgreater(x)=x˙xtextlesssuptextgreater(k-1)textless/suptextgreater-xxtextlesssuptextgreater(k)textless/suptextgreater of$backslash$ninteger orders k are proposed to measure the cross energy between a$backslash$nsignal x and its derivatives. These higher order differential energy$backslash$noperators contain as a special case, for k=2, the Teager-Kaiser (1990)$backslash$noperator. When applied to (possibly modulated) sinusoids, they yield$backslash$nseveral new energy measurements useful for parameter estimation or AM-FM$backslash$ndemodulation. Applying them to sampled signals involves replacing$backslash$nderivatives with differences that lead to several useful discrete energy$backslash$noperators defined on an extremely short window of samples}, keywords = {}, pubstate = {published}, tppubtype = {article} } Instantaneous signal operators$backslash$nϒtextlesssubtextgreaterktextless/subtextgreater(x)=x˙xtextlesssuptextgreater(k-1)textless/suptextgreater-xxtextlesssuptextgreater(k)textless/suptextgreater of$backslash$ninteger orders k are proposed to measure the cross energy between a$backslash$nsignal x and its derivatives. These higher order differential energy$backslash$noperators contain as a special case, for k=2, the Teager-Kaiser (1990)$backslash$noperator. When applied to (possibly modulated) sinusoids, they yield$backslash$nseveral new energy measurements useful for parameter estimation or AM-FM$backslash$ndemodulation. Applying them to sampled signals involves replacing$backslash$nderivatives with differences that lead to several useful discrete energy$backslash$noperators defined on an extremely short window of samples |
P. Maragos, A. Potamianos, B. Santhanam Instantaneous Energy Operators: Applications to Speech Processing and Communications Conference Proc. IEEE Workshop on Nonlinear Signal and Image Processing, Halkidiki, Greece, pp.955-958, June 1995, 1995. @conference{Maragos1995b, title = {Instantaneous Energy Operators: Applications to Speech Processing and Communications}, author = { P. Maragos and A. Potamianos and B. Santhanam}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/nsip95.pdf}, year = {1995}, date = {1995-06-01}, booktitle = {Proc. IEEE Workshop on Nonlinear Signal and Image Processing, Halkidiki, Greece, pp.955-958, June 1995}, pages = {955--958}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A. Potamianos, P. Maragos Speech formant frequency and bandwidth tracking using multiband energy demodulation Conference 1995 International Conference on Acoustics, Speech, and Signal Processing, 1 , 1995, ISSN: 1520-6149. Abstract | BibTeX | Links: [Webpage] [PDF] @conference{Potamianos1995, title = {Speech formant frequency and bandwidth tracking using multiband energy demodulation}, author = { A. Potamianos and P. Maragos}, url = {http://ieeexplore.ieee.org/document/479811/ http://robotics.ntua.gr/wp-content/uploads/sites/2/speech-formant-frequency-and-bandwidth-tracking-using-multiband-.pdf}, doi = {10.1109/ICASSP.1995.479811}, issn = {1520-6149}, year = {1995}, date = {1995-05-01}, booktitle = {1995 International Conference on Acoustics, Speech, and Signal Processing}, volume = {1}, pages = {784--787}, abstract = {In this paper, the amplitude and frequency ?AM–FM? modulation model and a multiband demodulation analysis scheme are applied to formant frequency and bandwidth tracking of speech signals. Filtering by a bank of Gabor bandpass filters is performed to isolate each speech resonance in the signal. Next, the amplitude envelope ?AM? and instantaneous frequency ?FM? are estimated for each band using the energy separation algorithm ?ESA?. Short-time formant frequency and bandwidth estimates are obtained from the instantaneous amplitude and frequency signals; two frequency estimates are proposed and their relative merits are discussed. The short-time estimates are used to compute the formant locations and bandwidths. Performance and computational issues of the algorithm are discussed. Overall, multiband demodulation analysis ?MDA? is shown to be a useful tool for extracting information from the speech resonances in the time–frequency plane.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In this paper, the amplitude and frequency ?AM–FM? modulation model and a multiband demodulation analysis scheme are applied to formant frequency and bandwidth tracking of speech signals. Filtering by a bank of Gabor bandpass filters is performed to isolate each speech resonance in the signal. Next, the amplitude envelope ?AM? and instantaneous frequency ?FM? are estimated for each band using the energy separation algorithm ?ESA?. Short-time formant frequency and bandwidth estimates are obtained from the instantaneous amplitude and frequency signals; two frequency estimates are proposed and their relative merits are discussed. The short-time estimates are used to compute the formant locations and bandwidths. Performance and computational issues of the algorithm are discussed. Overall, multiband demodulation analysis ?MDA? is shown to be a useful tool for extracting information from the speech resonances in the time–frequency plane. |
1994 |
H M Hanson, P Maragos, A Potamianos A system for finding speech formants and modulations via energy separation Journal Article IEEE Transactions on Speech and Audio Processing, 2 (3), pp. 436-443, 1994, ISSN: 1063-6676. Abstract | BibTeX | Links: [PDF] @article{Hanson1994, title = {A system for finding speech formants and modulations via energy separation}, author = {H M Hanson and P Maragos and A Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/HansonMaragosPotamianos_IterESA_ieeetSAP1994.pdf}, doi = {10.1109/89.294358}, issn = {1063-6676}, year = {1994}, date = {1994-07-01}, journal = {IEEE Transactions on Speech and Audio Processing}, volume = {2}, number = {3}, pages = {436-443}, abstract = {This correspondence presents an experimental system that uses an energy-tracking operator and a related energy separation algorithm to automatically find speech formants and amplitude/frequency modulations in voiced speech segments. Initial estimates of formant center frequencies are provided by either LPC or morphological spectral peak picking. These estimates are then shown to be improved by a combination of bandpass filtering and iterative application of energy separation.< keywords = {}, pubstate = {published}, tppubtype = {article} } This correspondence presents an experimental system that uses an energy-tracking operator and a related energy separation algorithm to automatically find speech formants and amplitude/frequency modulations in voiced speech segments. Initial estimates of formant center frequencies are provided by either LPC or morphological spectral peak picking. These estimates are then shown to be improved by a combination of bandpass filtering and iterative application of energy separation.<<ETX>> |
Alan C Bovik, Petros Maragos Conditions for Positivity of an Energy Operator Journal Article IEEE Transactions on Signal Processing, 42 (2), pp. 469–471, 1994, ISSN: 19410476. @article{95c, title = {Conditions for Positivity of an Energy Operator}, author = {Alan C Bovik and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BovikMaragos_PosEnOp_ieeetSP1994.pdf}, doi = {10.1109/78.275632}, issn = {19410476}, year = {1994}, date = {1994-01-01}, journal = {IEEE Transactions on Signal Processing}, volume = {42}, number = {2}, pages = {469--471}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A Potamianos, P Maragos A Comparison of the Energy Operator and Hilbert Transform Approaches for Signal and Speech Demodulation Journal Article Signal Processing, 37 (1), pp. 95–120, 1994. @article{96c, title = {A Comparison of the Energy Operator and Hilbert Transform Approaches for Signal and Speech Demodulation}, author = {A Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/PotamianosMaragos_ComparEnergOpHilbertTransfSigSpeechDemod_SigPro1994.pdf}, year = {1994}, date = {1994-01-01}, journal = {Signal Processing}, volume = {37}, number = {1}, pages = {95--120}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Petros Maragos Morphological systems: Slope transforms and max-min difference and differential equations Journal Article Signal Processing, 38 (1), pp. 57–77, 1994, ISSN: 01651684. Abstract | BibTeX | Links: [PDF] @article{Maragos1994, title = {Morphological systems: Slope transforms and max-min difference and differential equations}, author = {Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/1994_Maragos_Msystems-SlopeTransf-MaxminDiffEqns_SigPro.pdf}, doi = {10.1016/0165-1684(94)90057-4}, issn = {01651684}, year = {1994}, date = {1994-01-01}, journal = {Signal Processing}, volume = {38}, number = {1}, pages = {57--77}, abstract = {Linear time-invariant systems are well understood in the time domain either as convolutions with their impulse response or by describing their dynamics via linear differential equations. Their analysis in the frequency domain using their exponential eigenfunctions and related frequency response is also greatly facilitated via Fourier transforms. Attempting to extend such ideas to nonlinear systems, we present in this paper a theory for a broad class of nonlinear systems and a collection of related analytic tools, which parallel the functionality of and have many conceptual similarities with ideas and tools used in linear systems. These nonlinear systems are time-invariant dilations or erosions, in continuous and discrete time, and obey a supremum- or infimum-of-sums superposition. In the time domain, their equivalence with morphological dilation or erosion by their impulse response is established, and their causality and stability are examined. A class of nonlinear difference did differential equations based on max-min operations is also introduced to describe their dynamics. After finding that the affine signals $alpha$t + b are eigenfunctions of morphological systems, their slope response is introduced as a function of the slope $alpha$, and related slope transforms for arbitrary signals are developed. These ideas provide a transform (slope) domain for morphological systems, where dilation and erosion in time corresponds to addition of slope transforms. Recursive morphological systems, described by max-min difference equations, are also investigated and shown to be equivalent to dilation or erosion by infinite-support structuring elements. Their analysis is significantly aided by using slope transforms. These recursive morphological systems are applied to the design of ideal-cutoff slope-selective filters which are useful for signal envelope estimation. textcopyright 1994.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Linear time-invariant systems are well understood in the time domain either as convolutions with their impulse response or by describing their dynamics via linear differential equations. Their analysis in the frequency domain using their exponential eigenfunctions and related frequency response is also greatly facilitated via Fourier transforms. Attempting to extend such ideas to nonlinear systems, we present in this paper a theory for a broad class of nonlinear systems and a collection of related analytic tools, which parallel the functionality of and have many conceptual similarities with ideas and tools used in linear systems. These nonlinear systems are time-invariant dilations or erosions, in continuous and discrete time, and obey a supremum- or infimum-of-sums superposition. In the time domain, their equivalence with morphological dilation or erosion by their impulse response is established, and their causality and stability are examined. A class of nonlinear difference did differential equations based on max-min operations is also introduced to describe their dynamics. After finding that the affine signals $alpha$t + b are eigenfunctions of morphological systems, their slope response is introduced as a function of the slope $alpha$, and related slope transforms for arbitrary signals are developed. These ideas provide a transform (slope) domain for morphological systems, where dilation and erosion in time corresponds to addition of slope transforms. Recursive morphological systems, described by max-min difference equations, are also investigated and shown to be equivalent to dilation or erosion by infinite-support structuring elements. Their analysis is significantly aided by using slope transforms. These recursive morphological systems are applied to the design of ideal-cutoff slope-selective filters which are useful for signal envelope estimation. textcopyright 1994. |
R W Brockett, P Maragos Evolution equations for continuous-scale morphology.pdf Journal Article 1994. @article{98c, title = {Evolution equations for continuous-scale morphology.pdf}, author = {R W Brockett and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BrockettMaragos_PdeMorf_ieeetSP1994.pdf}, year = {1994}, date = {1994-01-01}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P. Maragos Differential morphology: Multiscale image dynamics, max-min difference equations, and slope transforms Conference Proceedings - International Conference on Image Processing, ICIP, 2 , 1994, ISSN: 15224880. @conference{Maragos1994b, title = {Differential morphology: Multiscale image dynamics, max-min difference equations, and slope transforms}, author = { P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/differential-morphology-multiscale-image-dynamics-maxmin-differe.pdf}, doi = {10.1109/ICIP.1994.413630}, issn = {15224880}, year = {1994}, date = {1994-11-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {2}, pages = {545--549}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Maragos, A. C. Bovik Demodulation of images modeled by amplitude-frequency modulations using multidimensional energy separation Conference Proceedings - International Conference on Image Processing, ICIP, 3 , 1994, ISSN: 15224880. Abstract | BibTeX | Links: [PDF] @conference{Maragos1994b, title = {Demodulation of images modeled by amplitude-frequency modulations using multidimensional energy separation}, author = { P. Maragos and A. C. Bovik}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/demodulation-of-images-modeled-by-amplitudefrequency-modulations.pdf}, doi = {10.1109/ICIP.1994.413772}, issn = {15224880}, year = {1994}, date = {1994-11-01}, booktitle = {Proceedings - International Conference on Image Processing, ICIP}, volume = {3}, pages = {421--425}, abstract = {Locally narrowband images can be modeled as 2D spatial AM-FM signals with several applications in image texture analysis and computer vision. In this paper we formulate such an image demodulation problem, and present a solution based on the multidimensional energy operator $Phi$(f)=||∇f||}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Locally narrowband images can be modeled as 2D spatial AM-FM signals with several applications in image texture analysis and computer vision. In this paper we formulate such an image demodulation problem, and present a solution based on the multidimensional energy operator $Phi$(f)=||∇f|| |
P Maragos A Time and Slope Domain Theory of Morphological Systems: Slope Transforms and Max-Min Dynamics Conference Proc. European Signal Processing Conf. (EUSIPCO-94), Edinburgh, Scotland, pp.II:971--974, Sep. 1994, 1994. @conference{279, title = {A Time and Slope Domain Theory of Morphological Systems: Slope Transforms and Max-Min Dynamics}, author = { P Maragos}, year = {1994}, date = {1994-01-01}, booktitle = {Proc. European Signal Processing Conf. (EUSIPCO-94), Edinburgh, Scotland, pp.II:971--974, Sep. 1994}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A Potamianos, P Maragos Applications of Speech Processing Using an AM--FM Modulation Model and Energy Operators Conference Proc. European Signal Process. Conf., 1994. @conference{Potamianos1994, title = {Applications of Speech Processing Using an AM--FM Modulation Model and Energy Operators}, author = { A Potamianos and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Potamianos_ApplicSpeechProc_1994.pdf}, year = {1994}, date = {1994-01-01}, booktitle = {Proc. European Signal Process. Conf.}, pages = {III: 1669----1672}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos Fractal Signal Analysis Using Mathematical Morphology Book Chapter Hawkes, Peter W (Ed.): 88 , pp. 199 - 246, Academic Press, 1994, ISSN: 0065-2539. Abstract | BibTeX | Links: [Webpage] [PDF] @inbook{Maragos1994b, title = {Fractal Signal Analysis Using Mathematical Morphology}, author = {Petros Maragos}, editor = {Peter W Hawkes}, url = {http://www.sciencedirect.com/science/article/pii/S0065253908605491 http://robotics.ntua.gr/wp-content/uploads/sites/2/maragos1994.pdf}, doi = {https://doi.org/10.1016/S0065-2539(08)60549-1}, issn = {0065-2539}, year = {1994}, date = {1994-01-01}, volume = {88}, pages = {199 - 246}, publisher = {Academic Press}, series = {Advances in Electronics and Electron Physics}, abstract = {Publisher Summary This chapter provides the definitions of basic morphological transformations for sets and signals—i.e., the erosion, dilation, and opening operations—and surveys the theory of fractal dimensions. There is a proliferation of fractal dimensions, all of which are more or less capable of measuring the degree of fragmentation of a signal's graph; their definitions and interrelationships are also discussed in the chapter. Emphasis is given on the Minkowski–Bouligand dimension, whose analysis is done using morphological operations. The chapter also reviews three classes of parametric fractal signals and related algorithms for their synthesis. The performance of the presented morphological method for measuring fractal dimension is tested by applying it to the above synthetic fractal signals. In the chapter, various covering methods—a class of general and efficient approaches to compute the fractal dimension of arbitrary fractal signals—are discussed. The morphological covering approach to find the fractal dimension of 2D signals are described in the chapter followed by the fractal binary image modeling using collages.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } Publisher Summary This chapter provides the definitions of basic morphological transformations for sets and signals—i.e., the erosion, dilation, and opening operations—and surveys the theory of fractal dimensions. There is a proliferation of fractal dimensions, all of which are more or less capable of measuring the degree of fragmentation of a signal's graph; their definitions and interrelationships are also discussed in the chapter. Emphasis is given on the Minkowski–Bouligand dimension, whose analysis is done using morphological operations. The chapter also reviews three classes of parametric fractal signals and related algorithms for their synthesis. The performance of the presented morphological method for measuring fractal dimension is tested by applying it to the above synthetic fractal signals. In the chapter, various covering methods—a class of general and efficient approaches to compute the fractal dimension of arbitrary fractal signals—are discussed. The morphological covering approach to find the fractal dimension of 2D signals are described in the chapter followed by the fractal binary image modeling using collages. |
1993 |
Petros Maragos, Fang Kuo Sun Measuring the Fractal Dimension of Signals: Morphological Covers and Iterative Optimization Journal Article IEEE Transactions on Signal Processing, 41 (1), pp. 108, 1993, ISSN: 19410476. @article{91c, title = {Measuring the Fractal Dimension of Signals: Morphological Covers and Iterative Optimization}, author = {Petros Maragos and Fang Kuo Sun}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosSun_FrDimMorfCov_ieeetSP1993.pdf}, doi = {10.1109/TSP.1993.193131}, issn = {19410476}, year = {1993}, date = {1993-01-01}, journal = {IEEE Transactions on Signal Processing}, volume = {41}, number = {1}, pages = {108}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P Maragos, J F Kaiser, T F Quatieri On amplitude and frequency demodulation using energy operators Journal Article IEEE Transactions on Signal Processing, 41 (4), pp. 1532–1550, 1993. @article{92c, title = {On amplitude and frequency demodulation using energy operators}, author = {P Maragos and J F Kaiser and T F Quatieri}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosKaiserQuatieri_AFModEnergOper_ieeetSP1993.pdf}, year = {1993}, date = {1993-01-01}, journal = {IEEE Transactions on Signal Processing}, volume = {41}, number = {4}, pages = {1532--1550}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P Maragos, J F Kaiser, T F Quatieri Energy separation in signal modulation with application to speech analysis Journal Article IEEE Trans. Signal Processing, 41 (4), pp. 3024–3051, 1993. @article{93c, title = {Energy separation in signal modulation with application to speech analysis}, author = {P Maragos and J F Kaiser and T F Quatieri}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosKaiserQuatieri_ESA_ieeetSP1993.pdf}, year = {1993}, date = {1993-01-01}, journal = {IEEE Trans. Signal Processing}, volume = {41}, number = {4}, pages = {3024--3051}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A C Bovik, P Maragos, T F Quatieri AM-FM Energy Detection and Separation in Noise Using Multiband Energy Operators Journal Article Proc. IEEE Int'l. Symp. Info. Theory, 41 (12), pp. 3245-3265, 1993, ISBN: 0780308786. @article{94c, title = {AM-FM Energy Detection and Separation in Noise Using Multiband Energy Operators}, author = {A C Bovik and P Maragos and T F Quatieri}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BovikMaragosQuatieri_NoisMesa_ieeetSP1993.pdf}, isbn = {0780308786}, year = {1993}, date = {1993-01-01}, journal = {Proc. IEEE Int'l. Symp. Info. Theory}, volume = {41}, number = {12}, pages = {3245-3265}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
M Raibert, S Tzafestas, C Tzafestas Comparative simulation study of three control techniques applied to a biped robot Conference International Conference on Systems, Man and Cybernetics, Le Touquet, France, 1993. @conference{Raibert1993, title = {Comparative simulation study of three control techniques applied to a biped robot}, author = { M Raibert and S Tzafestas and C Tzafestas}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/comparative-simulation-study-of-three-control-techniques-applied.pdf}, year = {1993}, date = {1993-10-01}, booktitle = {International Conference on Systems, Man and Cybernetics, Le Touquet, France}, pages = {494--502}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
H. M. Hanson, P. Maragos,, A. Potamianos Finding Speech Formants and Modulations via Energy Separation: With an Application to a Vocoder Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-93), Minneapolis, MN, 1993. @conference{Hanson1993b, title = {Finding Speech Formants and Modulations via Energy Separation: With an Application to a Vocoder}, author = {H. M. Hanson, P. Maragos, and A. Potamianos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/hanson1993.pdf}, year = {1993}, date = {1993-04-04}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-93)}, journal = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-93)}, address = {Minneapolis, MN}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P.-F. Yang, P Maragos Morphological Systems for Character Image Processing and Recognition Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-93), Minneapolis, MN, 1993, ISSN: 07367791. @conference{Yang1993, title = {Morphological Systems for Character Image Processing and Recognition}, author = { P.-F. Yang and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/yang1993.pdf}, issn = {07367791}, year = {1993}, date = {1993-04-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-93)}, address = {Minneapolis, MN}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C S Fuh, P Maragos, L Vincent Visual Motion Correspondence by Region-Based Approaches Conference Proc. of 1993 Asian Conference on Computer Vision (ACCV--93), Osaka, Japan, 1993. @conference{280, title = {Visual Motion Correspondence by Region-Based Approaches}, author = { C S Fuh and P Maragos and L Vincent}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/FuhMaragosVincent_VisualMotionCorrespondenceByRegionBasedApproaches_ACCV1993.pdf}, year = {1993}, date = {1993-01-01}, booktitle = {Proc. of 1993 Asian Conference on Computer Vision (ACCV--93)}, pages = {784--789}, address = {Osaka, Japan}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos Max-Min Difference Equations and Recursive Morphological Systems Conference Proc. Int'l Workshop on Mathematical Morphology and its Applications to Signal Processing, Barcelona, Spain, 1993. @conference{281, title = {Max-Min Difference Equations and Recursive Morphological Systems}, author = { P Maragos}, url = {http://cvsp.cs.ntua.gr/publications/confr/1993_Maragos_MaxMinDiffEqnsRecursiveMorphSystems_MathMorfWorkshop.pdf}, year = {1993}, date = {1993-01-01}, booktitle = {Proc. Int'l Workshop on Mathematical Morphology and its Applications to Signal Processing}, pages = {168--173}, address = {Barcelona, Spain}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos, J F Kaiser, T F Quatieri On amplitude and frequency demodulation using energy operators Conference Signal Processing IEEE Transactions on, 41 (4), 1993. @conference{287, title = {On amplitude and frequency demodulation using energy operators}, author = { P Maragos and J F Kaiser and T F Quatieri}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosKaiserQuatieri_SeparatingAmplitudeFromFreqModulationsUsingEnergyOperators_ICASSP1992.pdf}, year = {1993}, date = {1993-01-01}, booktitle = {Signal Processing IEEE Transactions on}, volume = {41}, number = {4}, pages = {1532--1550}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1992 |
P. Maragos (Ed.) Proceedings of SPIE, Visual Communications and Image Processing, Book SPIE Optical Engineering Press, Bellingham, Washington, USA, 1992. @book{Maragos1992b, title = {Proceedings of SPIE, Visual Communications and Image Processing,}, editor = {P. Maragos}, year = {1992}, date = {1992-03-01}, publisher = { SPIE Optical Engineering Press}, address = {Bellingham, Washington, USA}, keywords = {}, pubstate = {published}, tppubtype = {book} } |
P.-F. Yang, P. Maragos Character Recognition Using Min-Max Classifiers Designed via an LMS Algorithm Conference Visual Communications and Image Processing '92, Proc. SPIE, 1818 , Boston, Massachusetts, 1992. @conference{Yang1992b, title = {Character Recognition Using Min-Max Classifiers Designed via an LMS Algorithm}, author = {P.-F. Yang and P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/yang1992.pdf }, doi = {10.1117/12.131482}, year = {1992}, date = {1992-11-01}, booktitle = {Visual Communications and Image Processing '92, Proc. SPIE}, journal = {Visual Communications and Image Processing '92, Proc. SPIE}, volume = {1818}, pages = {674-685}, address = {Boston, Massachusetts}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
A.C. Bovik, P. Maragos, T.F. Quatieri Measuring Amplitude and Frequency Modulations in Noise Using Multiband Energy Operators Conference Proceedings of 1992 International Symposium on Time-Frequency and Time-Scale Analysis, (4), Victoria, B.C., Canada, 1992, ISBN: 0-7803-0805-0. Abstract | BibTeX | Links: [PDF] @conference{Bovik1992, title = {Measuring Amplitude and Frequency Modulations in Noise Using Multiband Energy Operators}, author = {A.C. Bovik and P. Maragos and T.F. Quatieri}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/measuring-amplitude-and-frequency-modulations-in-noise-using-mul.pdf}, doi = {10.1109/TFTSA.1992.274246}, isbn = {0-7803-0805-0}, year = {1992}, date = {1992-10-01}, booktitle = {Proceedings of 1992 International Symposium on Time-Frequency and Time-Scale Analysis}, number = {4}, pages = {3--6}, address = {Victoria, B.C., Canada}, abstract = {The statistical properties of the nonlinear energy operator Ψ($backslash$ntextlesse1textgreaterstextless/e1textgreater)=(textlesse1textgreaterdstextless/e1textgreater/textlesse1textgreaterdttextless/e1textgreater)textlesssuptextgreater2textless/suptextgreater-textlesse1textgreatersdtextless/e1textgreatertextlesssuptextgreater2$backslash$ntextless/suptextgreater/textlesse1textgreaterdttextless/e1textgreatertextlesssuptextgreater2textless/suptextgreater and a related energy separation algorithm$backslash$n(ESA) are developed. The ESA uses Ψ to demodulate noisy AM-FM$backslash$nsignals. The performance of Ψ and the ESA when applied to bandpass$backslash$nnoisy AM-FM signals is examined. The predicted performance is found to$backslash$nbe greatly improved if the local signal frequencies occur within the$backslash$nfilter passband. Using this observation, a multiband energy operator and$backslash$nESA approach are devised. The results suggest that greatly improved$backslash$npractical strategies are feasible for tracking and identifying local$backslash$npattern coherencies manifested as local concentrations of signal$backslash$nfrequencies}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The statistical properties of the nonlinear energy operator Ψ($backslash$ntextlesse1textgreaterstextless/e1textgreater)=(textlesse1textgreaterdstextless/e1textgreater/textlesse1textgreaterdttextless/e1textgreater)textlesssuptextgreater2textless/suptextgreater-textlesse1textgreatersdtextless/e1textgreatertextlesssuptextgreater2$backslash$ntextless/suptextgreater/textlesse1textgreaterdttextless/e1textgreatertextlesssuptextgreater2textless/suptextgreater and a related energy separation algorithm$backslash$n(ESA) are developed. The ESA uses Ψ to demodulate noisy AM-FM$backslash$nsignals. The performance of Ψ and the ESA when applied to bandpass$backslash$nnoisy AM-FM signals is examined. The predicted performance is found to$backslash$nbe greatly improved if the local signal frequencies occur within the$backslash$nfilter passband. Using this observation, a multiband energy operator and$backslash$nESA approach are devised. The results suggest that greatly improved$backslash$npractical strategies are feasible for tracking and identifying local$backslash$npattern coherencies manifested as local concentrations of signal$backslash$nfrequencies |
P. Maragos, J. F. Kaiser, T. F. Quatieri On Separating Amplitude from Frequency Modulations Using Energy Operators Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-92), II , San Fransisco, CA, 1992. @conference{Maragos1992, title = {On Separating Amplitude from Frequency Modulations Using Energy Operators}, author = {P. Maragos and J. F. Kaiser and T. F. Quatieri}, url = {http://cvsp.cs.ntua.gr/publications/confr/MaragosKaiserQuatieri_SeparatingAmplitudeFromFreqModulationsUsingEnergyOperators_ICASSP1992.pdf}, doi = {10.1109/ICASSP.1992.226135}, year = {1992}, date = {1992-03-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-92)}, journal = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-92)}, volume = {II}, pages = {1-4}, address = {San Fransisco, CA}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
R W Brockett, P Maragos Evolution Equations for Continuous-Scale Morphology Conference Proc. Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-92), III , San Fransisco, CA, 1992. @conference{288, title = {Evolution Equations for Continuous-Scale Morphology}, author = { R W Brockett and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/BrockettMaragos_EvolutionEquationsForContinuousScaleMorph_ICASSP1992.pdf}, year = {1992}, date = {1992-01-01}, booktitle = {Proc. Int'l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-92)}, volume = {III}, pages = {125-128,}, address = {San Fransisco, CA}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos, Alan C Bovik, Thomas F Quatieri A Multidimensional Energy Operator for Image Processing Conference Visual Communications and Image Processing '92, Proc. SPIE, 1818 , 1992, ISSN: 0277786X. @conference{285, title = {A Multidimensional Energy Operator for Image Processing}, author = { Petros Maragos and Alan C Bovik and Thomas F Quatieri}, url = {http://cvsp.cs.ntua.gr/publications/confr/1992_MaragosBovikQuatieri_MultidimEnergyOperForImageProcessing_SPIE-1818.pdf}, doi = {10.1117/12.131436}, issn = {0277786X}, year = {1992}, date = {1992-01-01}, booktitle = {Visual Communications and Image Processing '92, Proc. SPIE}, volume = {1818}, pages = {177--186}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Chiou-shann Fuh, Petros Maragos Affine models for motion and shape recovery Conference Visual Communications and Image Processing '92, Proc. SPIE, 1818 , Boston, Massachusetts, 1992, ISSN: 0277786X. @conference{286, title = {Affine models for motion and shape recovery}, author = { Chiou-shann Fuh and Petros Maragos}, url = {http://cvsp.cs.ntua.gr/publications/confr/1992_FuhMaragos_AffineModelsForMotionShapeRecovery_SPIE-1818.pdf}, issn = {0277786X}, year = {1992}, date = {1992-01-01}, booktitle = {Visual Communications and Image Processing '92, Proc. SPIE}, volume = {1818}, pages = {120-134}, address = {Boston, Massachusetts}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1991 |
Chiou-Shann Fuh, Petros Maragos Motion displacement estimation using an affine model for image matching Journal Article Optical Engineering, 30 (7), pp. 881, 1991, ISSN: 00913286, (Invited Paper). Abstract | BibTeX | Links: [Webpage] [PDF] @article{Fuh1991, title = {Motion displacement estimation using an affine model for image matching}, author = {Chiou-Shann Fuh and Petros Maragos}, url = {http://opticalengineering.spiedigitallibrary.org/article.aspx?doi=10.1117/12.55885 http://robotics.ntua.gr/wp-content/uploads/sites/2/1991_FuhMaragos_AffineMotionDispl_OptEng.pdf}, doi = {10.1117/12.55885}, issn = {00913286}, year = {1991}, date = {1991-07-01}, journal = {Optical Engineering}, volume = {30}, number = {7}, pages = {881}, abstract = {A model is developed for estimating the displacement field in spatio-temporal image sequences that allows for affine shape deformations of corresponding spatial regions and for affine transformations of the image intensity range. This model includes the block matching method as a special case. The model parameters are found by using a least-squares algorithm. We demonstrate experimentally that the affine matching algorithm performs better in estimating displacements than other standard approaches, especially for long-range motion with possible changes in scene illumination. The algorithm is successfully applied to various classes of moving imagery, including the tracking of cloud motion.}, note = {Invited Paper}, keywords = {}, pubstate = {published}, tppubtype = {article} } A model is developed for estimating the displacement field in spatio-temporal image sequences that allows for affine shape deformations of corresponding spatial regions and for affine transformations of the image intensity range. This model includes the block matching method as a special case. The model parameters are found by using a least-squares algorithm. We demonstrate experimentally that the affine matching algorithm performs better in estimating displacements than other standard approaches, especially for long-range motion with possible changes in scene illumination. The algorithm is successfully applied to various classes of moving imagery, including the tracking of cloud motion. |
Ping-fai Yang, Petros Maragos Learnability of min-max pattern classifiers Conference Visual Communications and Image Processing, Proc. of SPIE, 1606 , Boston, Massachusetts, 1991, ISSN: 0277786X. @conference{Yang1991, title = {Learnability of min-max pattern classifiers}, author = { Ping-fai Yang and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/1991_YangMaragos_LearnabilityOfMinmaxClassifiers_SPIE-1606.pdf}, issn = {0277786X}, year = {1991}, date = {1991-11-01}, booktitle = {Visual Communications and Image Processing, Proc. of SPIE}, volume = {1606}, pages = {294--308}, address = {Boston, Massachusetts}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Chiou-sham Fuh, Petros Maragos Affine Models for Image Matching and Motion Detection Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-91), Toronto, Canada, 1991, ISBN: 2977719110000. Abstract | BibTeX | Links: [PDF] @conference{292, title = {Affine Models for Image Matching and Motion Detection}, author = {Chiou-sham Fuh and Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/FuhMaragos_AffineImageMatchMotionDetect_ICASSP1991.pdf}, doi = {10.1109/ICASSP.1991.150878}, isbn = {2977719110000}, year = {1991}, date = {1991-01-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-91)}, pages = {2409--2412}, address = {Toronto, Canada}, abstract = {A model is developed for detecting the displacement field in spatio-temporal image sequences that al-lows for affine shape deformations of corresponding spatial re-gions and for affine transformations of the image intensity range. This model includes the block matching method as a special case. A least-squares algorithm is used to find the model parameters. It is experimentally demonstrated that the affine matching model performs better than other standard approaches. The resulting 2-D motion estimates are then used by a 3-D affine model and a least-squares algorithm that recover 3-D rigid body motion and depth from two perspective views.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } A model is developed for detecting the displacement field in spatio-temporal image sequences that al-lows for affine shape deformations of corresponding spatial re-gions and for affine transformations of the image intensity range. This model includes the block matching method as a special case. A least-squares algorithm is used to find the model parameters. It is experimentally demonstrated that the affine matching model performs better than other standard approaches. The resulting 2-D motion estimates are then used by a 3-D affine model and a least-squares algorithm that recover 3-D rigid body motion and depth from two perspective views. |
P Maragos, T Quatieri, J Kaiser Speech nonlinearities, modulations, and energy operators Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-91), Toronto, Canada, 1991, ISSN: 07367791. @conference{291, title = {Speech nonlinearities, modulations, and energy operators}, author = { P Maragos and T Quatieri and J Kaiser}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosQuatieriKaiser_SpeechNonlinearitiesModulationsEnergyOperators_ICASSP1991.pdf}, doi = {10.1109/ICASSP.1991.150366}, issn = {07367791}, year = {1991}, date = {1991-01-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-91)}, pages = {421--424}, address = {Toronto, Canada}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos Fractal aspects of speech signals: dimension and interpolation Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-91),, Toronto, Canada, 1991, ISSN: 1520-6149. Abstract | BibTeX | Links: [PDF] @conference{290, title = {Fractal aspects of speech signals: dimension and interpolation}, author = { Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Maragos_FractalAspects-of-SpeechSignals-DimensionInterpolation_ICASSP1991.pdf}, doi = {10.1109/ICASSP.1991.150365}, issn = {1520-6149}, year = {1991}, date = {1991-01-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-91),}, pages = {417--420}, address = {Toronto, Canada}, abstract = {The nonlinear dynamics of air flow during speech production may often result in some small or large degree of turbulence. The author quantifies the geometry of speech turbulence, as reflected in the fragmentation of the time signal, by using fractal models. He describes an efficient algorithm for estimating the short-time fractal dimension of speech segmentation and sound classification. He also develops a method for fractal speech interpolation which can be used to synthesize controlled amounts of turbulence in speech or to increase its sampling rate by preserving not its bandwidth (as is classically done) but rather its fractal dimension}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The nonlinear dynamics of air flow during speech production may often result in some small or large degree of turbulence. The author quantifies the geometry of speech turbulence, as reflected in the fragmentation of the time signal, by using fractal models. He describes an efficient algorithm for estimating the short-time fractal dimension of speech segmentation and sound classification. He also develops a method for fractal speech interpolation which can be used to synthesize controlled amounts of turbulence in speech or to increase its sampling rate by preserving not its bandwidth (as is classically done) but rather its fractal dimension |
1990 |
P Maragos, R D Ziff Threshold superposition in morphological image analysis systems Journal Article IEEE Trans. on Pattern Analysis and Machine Intelligence, 12 (5), pp. 498–504, 1990, ISSN: 01628828. @article{89c, title = {Threshold superposition in morphological image analysis systems}, author = {P Maragos and R D Ziff}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/MaragosZiff_ThreshSuperposMorfImAnalSyst_ieeetPAMI1990.pdf}, doi = {10.1109/34.55110}, issn = {01628828}, year = {1990}, date = {1990-05-01}, journal = {IEEE Trans. on Pattern Analysis and Machine Intelligence}, volume = {12}, number = {5}, pages = {498--504}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Petros Maragos, Ronald W Schafer Morphological Systems for Multidimensional Signal Processing Journal Article Proceedings of the IEEE, 78 (4), pp. 690–710, 1990, ISSN: 15582256. Abstract | BibTeX | Links: [PDF] @article{88c, title = {Morphological Systems for Multidimensional Signal Processing}, author = {Petros Maragos and Ronald W Schafer}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosSchafer_MorphSystMultidimSigProc_PIEEE1990.pdf}, doi = {10.1109/5.54808}, issn = {15582256}, year = {1990}, date = {1990-04-01}, journal = {Proceedings of the IEEE}, volume = {78}, number = {4}, pages = {690--710}, abstract = {The basic theory and applications of a set-theoretic approach to image analysis called mathematical morphology are reviewed. The goals are to show how the concepts of mathematical morphology geometrical structure in signals to illuminate the ways that morphological systems can enrich the theory and applications of multidimensional signal processing. The topics covered include: applications to nonlinear filtering (morphological and rank-order filters, multiscale smoothing, morphological sampling, and morphological correlation); applications to image analysis (feature extraction, shape representation and description, size distributions, and fractals); and representation theorems, which shows how a large class of nonlinear and linear signal operators can be realized as a combination of simple morphological operations}, keywords = {}, pubstate = {published}, tppubtype = {article} } The basic theory and applications of a set-theoretic approach to image analysis called mathematical morphology are reviewed. The goals are to show how the concepts of mathematical morphology geometrical structure in signals to illuminate the ways that morphological systems can enrich the theory and applications of multidimensional signal processing. The topics covered include: applications to nonlinear filtering (morphological and rank-order filters, multiscale smoothing, morphological sampling, and morphological correlation); applications to image analysis (feature extraction, shape representation and description, size distributions, and fractals); and representation theorems, which shows how a large class of nonlinear and linear signal operators can be realized as a combination of simple morphological operations |
P. Maragos, T. F. Quatieri,, J. F. Kaiser Detecting Nonlinearities in Speech using an Energy Operator Conference Proc. IEEE Int'l Workshop on Digital Signal Processing, New Paltz, NY, 1990. @conference{293, title = {Detecting Nonlinearities in Speech using an Energy Operator}, author = {P. Maragos and T. F. Quatieri, and J. F. Kaiser}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosQuatieriKaiser_DetectingNonlinearUsingEnergyOperator_ieeeDSPW1990.pdf}, year = {1990}, date = {1990-09-01}, booktitle = {Proc. IEEE Int'l Workshop on Digital Signal Processing}, publisher = {New Paltz, NY}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
K.-M. Yang, P. Maragos, L. Wu A VLSI Chip Architecture Design For a 2-D Gray-level Morphological Operations Conference Hybrid Image and Signal Processing II, Proc. of SPIE, 1297 , 1990. @conference{Yang1990b, title = {A VLSI Chip Architecture Design For a 2-D Gray-level Morphological Operations}, author = {K.-M. Yang and P. Maragos and L. Wu}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/yang1990.pdf}, year = {1990}, date = {1990-09-01}, booktitle = {Hybrid Image and Signal Processing II, Proc. of SPIE}, journal = {Hybrid Image and Signal Processing II, Proc. of SPIE}, volume = {1297}, pages = {286-295}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
D Bloomberg, P Maragos Generalized Hit-Miss Operations Conference Image Algebra and Morphological Image Processing, Proc. of SPIE, 1350 , San Diego, California, 1990. @conference{Bloomberg1990, title = {Generalized Hit-Miss Operations}, author = {D Bloomberg and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/bloomberg1990.pdf}, year = {1990}, date = {1990-07-01}, booktitle = {Image Algebra and Morphological Image Processing, Proc. of SPIE}, volume = {1350}, pages = {116--128}, address = {San Diego, California}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos Affine Morphology and Affine Signal Models Conference Image Algebra and Morphological Image Processing, Proc. of SPIE, 1350 , San Diego, California, 1990, ISSN: 0277786X. Abstract | BibTeX | Links: [PDF] @conference{295, title = {Affine Morphology and Affine Signal Models}, author = {P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Maragos_AffineMorphologyAffineSignalModels_spie-IAMIP1990.pdf}, doi = {10.1117/12.23574}, issn = {0277786X}, year = {1990}, date = {1990-07-01}, booktitle = {Image Algebra and Morphological Image Processing, Proc. of SPIE}, volume = {1350}, pages = {31--43}, address = {San Diego, California}, abstract = {Abstract not available}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Abstract not available |
P Maragos, K L Young Fractal Excitation Signals For CELP Speech Coders Conference Proc. Int’l Conf. on Acoustics, Speech, Signal Processing (ICASSP-90), Albuquerque, New Mexico, 1990, ISSN: 07367791. @conference{Maragos1990, title = {Fractal Excitation Signals For CELP Speech Coders}, author = { P Maragos and K L Young}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/fractal-excitation-signals-for-celp-speech-coders.pdf}, doi = {10.1109/ICASSP.1990.115838}, issn = {07367791}, year = {1990}, date = {1990-04-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, Signal Processing (ICASSP-90)}, pages = {669--672}, address = {Albuquerque, New Mexico}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1989 |
Petros Maragos A Representation Theory for Morphological Image and Signal Processing Journal Article IEEE Trans. on Pattern Analysis and Machine Intelligence, 11 (6), pp. 586–599, 1989, ISSN: 01628828. Abstract | BibTeX | Links: [Webpage] [PDF] @article{86c, title = {A Representation Theory for Morphological Image and Signal Processing}, author = {Petros Maragos}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=24793 http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_RepresentationTheory_ieeetPAMI89.pdf}, doi = {10.1109/34.24793}, issn = {01628828}, year = {1989}, date = {1989-06-01}, journal = {IEEE Trans. on Pattern Analysis and Machine Intelligence}, volume = {11}, number = {6}, pages = {586--599}, abstract = {A unifying theory for many concepts and operations encountered in or related to morphological image and signal analysis is presented. The unification requires a set-theoretic methodology, where signals are modeled as sets, systems (signal transformations) are viewed as set mappings, and translational-invariant systems are uniquely characterized by special collections of input signals. This approach leads to a general representation theory, in which any translation-invariant, increasing, upper semicontinuous system can be presented exactly as a minimal nonlinear superposition of morphological erosions or dilations. The theory is used to analyze some special cases of image/signal analysis systems, such as morphological filters, median and order-statistic filters, linear filters, and shape recognition transforms. Although the developed theory is algebraic, its prototype operations are well suited for shape analysis; hence, the results also apply to systems that extract information about the geometrical structure of signals}, keywords = {}, pubstate = {published}, tppubtype = {article} } A unifying theory for many concepts and operations encountered in or related to morphological image and signal analysis is presented. The unification requires a set-theoretic methodology, where signals are modeled as sets, systems (signal transformations) are viewed as set mappings, and translational-invariant systems are uniquely characterized by special collections of input signals. This approach leads to a general representation theory, in which any translation-invariant, increasing, upper semicontinuous system can be presented exactly as a minimal nonlinear superposition of morphological erosions or dilations. The theory is used to analyze some special cases of image/signal analysis systems, such as morphological filters, median and order-statistic filters, linear filters, and shape recognition transforms. Although the developed theory is algebraic, its prototype operations are well suited for shape analysis; hence, the results also apply to systems that extract information about the geometrical structure of signals |
Petros Maragos Pattern Spectrum and Multiscale Shape Representation Journal Article IEEE Trans. on Pattern Analysis and Machine Intelligence, 11 (7), pp. 701–716, 1989, ISSN: 01628828. Abstract | BibTeX | Links: [PDF] @article{87c, title = {Pattern Spectrum and Multiscale Shape Representation}, author = {Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/Maragos_PatternSpectrum_ieeetPAMI89.pdf}, doi = {10.1109/34.192465}, issn = {01628828}, year = {1989}, date = {1989-01-01}, journal = {IEEE Trans. on Pattern Analysis and Machine Intelligence}, volume = {11}, number = {7}, pages = {701--716}, abstract = {The results of a study on multiscale shape description, smoothing$backslash$nand representation are reported. Multiscale nonlinear smoothing filters$backslash$nare first developed, using morphological opening and closings. G.$backslash$nMatheron (1975) used openings and closings to obtain probabilistic size$backslash$ndistributions of Euclidean-space sets (continuous binary images). These$backslash$ndistributions are used to develop a concept of pattern spectrum (a$backslash$nshape-size descriptor). A pattern spectrum is introduced for continuous$backslash$ngraytone images and arbitrary multilevel signals, as well as for$backslash$ndiscrete images, by developing a discrete-size family of patterns. Large$backslash$njumps in the pattern spectrum at a certain scale indicate the existence$backslash$nof major (protruding or intruding) substructures of the signal at the$backslash$nscale. An entropy-like shape-size complexity measure is also developed$backslash$nbased on the pattern spectrum. For shape representation, a reduced$backslash$nmorphological skeleton transform is introduced for discrete binary and$backslash$ngraytone images. This transform is a sequence of skeleton components$backslash$n(sparse images) which represent the original shape at various scales. It$backslash$nis shown that the partially reconstructed images from the inverse$backslash$ntransform on subsequences of skeleton components are the openings of the$backslash$nimage at a scale determined by the number of eliminated components; in$backslash$naddition, two-way correspondences are established among the degree of$backslash$nshape smoothing via multiscale openings or closings, the pattern$backslash$nspectrum zero values, and the elimination or nonexistence of skeleton$backslash$ncomponents at certain scales}, keywords = {}, pubstate = {published}, tppubtype = {article} } The results of a study on multiscale shape description, smoothing$backslash$nand representation are reported. Multiscale nonlinear smoothing filters$backslash$nare first developed, using morphological opening and closings. G.$backslash$nMatheron (1975) used openings and closings to obtain probabilistic size$backslash$ndistributions of Euclidean-space sets (continuous binary images). These$backslash$ndistributions are used to develop a concept of pattern spectrum (a$backslash$nshape-size descriptor). A pattern spectrum is introduced for continuous$backslash$ngraytone images and arbitrary multilevel signals, as well as for$backslash$ndiscrete images, by developing a discrete-size family of patterns. Large$backslash$njumps in the pattern spectrum at a certain scale indicate the existence$backslash$nof major (protruding or intruding) substructures of the signal at the$backslash$nscale. An entropy-like shape-size complexity measure is also developed$backslash$nbased on the pattern spectrum. For shape representation, a reduced$backslash$nmorphological skeleton transform is introduced for discrete binary and$backslash$ngraytone images. This transform is a sequence of skeleton components$backslash$n(sparse images) which represent the original shape at various scales. It$backslash$nis shown that the partially reconstructed images from the inverse$backslash$ntransform on subsequences of skeleton components are the openings of the$backslash$nimage at a scale determined by the number of eliminated components; in$backslash$naddition, two-way correspondences are established among the degree of$backslash$nshape smoothing via multiscale openings or closings, the pattern$backslash$nspectrum zero values, and the elimination or nonexistence of skeleton$backslash$ncomponents at certain scales |
P. Maragos, F.K. Sun Measuring Fractal Dimension: Morphological Estimates and Iterative Optimization Journal Article Visual Communications and Image Processing, SPIE, 1199 , pp. 416-430, 1989. @article{Maragos1989b, title = {Measuring Fractal Dimension: Morphological Estimates and Iterative Optimization}, author = {P. Maragos and F.K. Sun}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/1989_MaragosSun_MeasureFractalDim-MorphEstimIterativOptimiz_SPIE-1199.pdf}, year = {1989}, date = {1989-01-01}, journal = {Visual Communications and Image Processing, SPIE}, volume = {1199}, pages = {416-430}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
F.-K. Sun, P Maragos Experiments on Image Compression Using Morphological Pyramids Conference Visual Communications and Image Processing IV, Proc. of SPIE, 1141 , Philadelphia, PA, 1989. @conference{297, title = {Experiments on Image Compression Using Morphological Pyramids}, author = {F.-K. Sun and P Maragos}, url = {SunMaragos_ExperimentsImageCompressionUsingMorphologicalPyramids_SPIE-VCIP1989.pdf}, year = {1989}, date = {1989-11-01}, booktitle = {Visual Communications and Image Processing IV, Proc. of SPIE}, volume = {1141}, pages = {1303-1312}, address = {Philadelphia, PA}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C.-S. Fuh, P Maragos Region-based Optical Flow Estimation Conference Proc. IEEE Conf. on Computer Vision and Pattern Recognition (CVPR-89), San Diego, California, 1989. @conference{Fuh1989, title = {Region-based Optical Flow Estimation}, author = { C.-S. Fuh and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/RegionBasedOpticalFlowEstimation.pdf }, year = {1989}, date = {1989-06-01}, booktitle = {Proc. IEEE Conf. on Computer Vision and Pattern Recognition (CVPR-89)}, pages = {130-135}, address = {San Diego, California}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Petros Maragos Morphological correlation and mean absolute error criteria Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-89), (7), Glasgow, Scotland, 1989, ISSN: 07367791. @conference{299, title = {Morphological correlation and mean absolute error criteria}, author = { Petros Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/Maragos_MorphologicalCorrelation-and-MAE-Criteria_ICASSP1989.pdf}, doi = {10.1109/ICASSP.1989.266742}, issn = {07367791}, year = {1989}, date = {1989-01-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-89)}, number = {7}, pages = {1568-1571}, address = {Glasgow, Scotland}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1988 |
P. Maragos, R. D. Ziff Threshold Parallelism in Morphological Feature Extraction, Skeletonization, and Pattern Spectrum Journal Article Visual Communications and Image Processing, SPIE, 1001 , pp. 106-115, 1988. @article{Maragos1988, title = {Threshold Parallelism in Morphological Feature Extraction, Skeletonization, and Pattern Spectrum}, author = {P. Maragos and R. D. Ziff}, url = {http://cvsp.cs.ntua.gr/publications/jpubl+bchap/1988_MaragosZiff_ThresholdParallelismMorphFeatureExtraction_SPIE-1001.pdf}, year = {1988}, date = {1988-01-01}, journal = {Visual Communications and Image Processing, SPIE}, volume = {1001}, pages = {106-115}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P. Maragos Optimal Morphological Approaches To Image Matching And Object Detection Conference Proc. Int’l Conf. on Computer Vision (ICCV-88), Tarpon Springs, Florida, 1988, ISBN: 0-8186-0883-8. @conference{Maragos1988b, title = {Optimal Morphological Approaches To Image Matching And Object Detection}, author = {P. Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/optimal-morphological-approaches-to-image-matching-and-object-de.pdf}, doi = {10.1109/CCV.1988.590053}, isbn = {0-8186-0883-8}, year = {1988}, date = {1988-12-01}, booktitle = {Proc. Int’l Conf. on Computer Vision (ICCV-88)}, pages = {695-699}, address = {Tarpon Springs, Florida}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P Maragos Morphology-based symbolic image modeling, multi-scale nonlinear smoothing, and pattern spectrum Conference Proc. IEEE Conf. on Computer Vision and Pattern Recognition (CVPR-88), Ann Arbor, MI, 1988, ISBN: 0818608625. @conference{Maragos1988b, title = {Morphology-based symbolic image modeling, multi-scale nonlinear smoothing, and pattern spectrum}, author = {P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/sites/2/morphologybased-symbolic-image-modeling-multiscale-nonlinear-smo.pdf}, doi = {10.1109/CVPR.1988.196321}, isbn = {0818608625}, year = {1988}, date = {1988-06-01}, booktitle = {Proc. IEEE Conf. on Computer Vision and Pattern Recognition (CVPR-88)}, pages = {766-773}, address = {Ann Arbor, MI}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1987 |
P. Maragos, R. W. Schafer Morphological filters—Part I: Their set-theoretic analysis and relations to linear shift-invariant filters Journal Article IEEE Transactions on Acoustics, Speech, and Signal Processing, 35 (8), pp. 1153–1169, 1987, ISSN: 00963518. Abstract | BibTeX | Links: [PDF] @article{317, title = {Morphological filters—Part I: Their set-theoretic analysis and relations to linear shift-invariant filters}, author = {P. Maragos and R. W. Schafer}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/1987_MaragosSchafer_MorphologicFilters-part1_ieeetASSP_+corrections1989.pdf}, doi = {10.1109/TASSP.1987.1165259}, issn = {00963518}, year = {1987}, date = {1987-08-01}, journal = {IEEE Transactions on Acoustics, Speech, and Signal Processing}, volume = {35}, number = {8}, pages = {1153--1169}, abstract = {This paper examines the set-theoretic interpretation of morphological filters in the framework of mathematical morphology and introduces the representation of classical linear filters in terms of morphological correlations, which involve supremum/infimum operations and additions. Binary signals are classified as sets, and multilevel signals as functions. Two set-theoretic representations of signals are reviewed. Filters are classified as set-processing (SP) or function-processing (FP). Conditions are provided for certain FP filters that pass binary signals to commute with signal thresholding because then they can be analyzed and implemented as SP filters. The basic morphological operations of set erosion, dilation, opening, and closing are related to Minkowski set operations and are used to construct FP morphological filters. Emphasis is then given to analytically and geometrically quantifying the similarities and differences between morphological filtering of signals by sets and functions; the latter case allows the definition of morphological convolutions and correlations. Toward this goal, various properties of FP morphological filters are also examined. Linear shift-invariant filters (due to their translation-invariance) are uniquely characterized by their kernel, which is a special collection of input signals. Increasing linear filters are represented as the supremum of erosions by their kernel functions. If the filters are also discrete and have a finite-extent impulse response, they can be represented as the supremum of erosions only by their minimal (with respect to a signal ordering) kernel functions. Stable linear filters can be represented as the sum of (at most) two weighted suprema of erosions. These results demonstrate the power of mathematical morphology as a unifying approach to both linear and nonlinear signal-shaping strategies.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper examines the set-theoretic interpretation of morphological filters in the framework of mathematical morphology and introduces the representation of classical linear filters in terms of morphological correlations, which involve supremum/infimum operations and additions. Binary signals are classified as sets, and multilevel signals as functions. Two set-theoretic representations of signals are reviewed. Filters are classified as set-processing (SP) or function-processing (FP). Conditions are provided for certain FP filters that pass binary signals to commute with signal thresholding because then they can be analyzed and implemented as SP filters. The basic morphological operations of set erosion, dilation, opening, and closing are related to Minkowski set operations and are used to construct FP morphological filters. Emphasis is then given to analytically and geometrically quantifying the similarities and differences between morphological filtering of signals by sets and functions; the latter case allows the definition of morphological convolutions and correlations. Toward this goal, various properties of FP morphological filters are also examined. Linear shift-invariant filters (due to their translation-invariance) are uniquely characterized by their kernel, which is a special collection of input signals. Increasing linear filters are represented as the supremum of erosions by their kernel functions. If the filters are also discrete and have a finite-extent impulse response, they can be represented as the supremum of erosions only by their minimal (with respect to a signal ordering) kernel functions. Stable linear filters can be represented as the sum of (at most) two weighted suprema of erosions. These results demonstrate the power of mathematical morphology as a unifying approach to both linear and nonlinear signal-shaping strategies. |
P Maragos, R W Schafer Morphological filters -- Part II: Their relations to median, order-statistics, and stack filters Journal Article IEEE Trans. on Acoustics, Speech, and Signal Processing, 35 (8), pp. 1170–1184, 1987. @article{316, title = {Morphological filters -- Part II: Their relations to median, order-statistics, and stack filters}, author = {P Maragos and R W Schafer}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/1987_MaragosSchafer_MorphologicFilters-part2_ieeetASSP_+corrections1989.pdf}, doi = {10.1109/TASSP.1987.1165254}, year = {1987}, date = {1987-08-01}, journal = {IEEE Trans. on Acoustics, Speech, and Signal Processing}, volume = {35}, number = {8}, pages = {1170--1184}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Petros Maragos Tutorial On Advances In Morphological Image Processing And Analysis Journal Article Optical Engineering, 26 (7), pp. 623-632, 1987, ISSN: 0091-3286. Abstract | BibTeX | Links: [Webpage] [PDF] @article{Maragos1987, title = {Tutorial On Advances In Morphological Image Processing And Analysis}, author = {Petros Maragos}, url = {http://opticalengineering.spiedigitallibrary.org/article.aspx?doi=10.1117/12.7974127 http://robotics.ntua.gr/wp-content/uploads/sites/2/1986_Maragos_TutorialAdvancesMorphImagProcessAnal_SPIE-0707.pdf}, doi = {10.1117/12.7974127}, issn = {0091-3286}, year = {1987}, date = {1987-07-01}, journal = {Optical Engineering}, volume = {26}, number = {7}, pages = {623-632}, abstract = {This paper reviews some recent advances in the theory and applications of morphological image analysis. Regarding applications, we show how the morphological filters can be used to provide simple and systematic algorithms for image processing and analysis tasks as diverse as nonlinear image filtering, noise suppression, edge detection, region filling, skeletonization, coding, shape representation, smoothing, and recognition. Regarding theory, we summarize the representation of a large class of translation-invariant nonlinear filters (including morphological, median, order-statistic, and shape recognition filters) as a minimal combination of morphological erosions or dilations; these results provide new realizations of these filters and lead to a unified image algebra.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper reviews some recent advances in the theory and applications of morphological image analysis. Regarding applications, we show how the morphological filters can be used to provide simple and systematic algorithms for image processing and analysis tasks as diverse as nonlinear image filtering, noise suppression, edge detection, region filling, skeletonization, coding, shape representation, smoothing, and recognition. Regarding theory, we summarize the representation of a large class of translation-invariant nonlinear filters (including morphological, median, order-statistic, and shape recognition filters) as a minimal combination of morphological erosions or dilations; these results provide new realizations of these filters and lead to a unified image algebra. |
R Libeskind-Hadas, P Maragos Application of Iterated Function Systems and Skeletonization to Synthesis of Fractal Images Conference Proc. SPIE, Visual Communications and Image Processing II, 845 , 1987, ISSN: 1996756X. @conference{302, title = {Application of Iterated Function Systems and Skeletonization to Synthesis of Fractal Images}, author = {R Libeskind-Hadas and P Maragos}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/LibeskindMaragos_ApplicationSkeletonIterFunctionSystems-to-SynthFractalImages_SPIE-VCIP1987.pdf}, doi = {10.1117/12.976516}, issn = {1996756X}, year = {1987}, date = {1987-10-01}, booktitle = {Proc. SPIE, Visual Communications and Image Processing II}, volume = {845}, pages = {276--284}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1986 |
P Maragos, R Schafer Morphological skeleton representation and coding of binary images Journal Article IEEE Trans. on Acoustics, Speech, and Signal Processing, 34 (5), pp. 1228-1244, 1986. BibTeX | Links: [Webpage] [PDF] @article{Maragos1986, title = {Morphological skeleton representation and coding of binary images}, author = {P Maragos and R Schafer}, url = {https://ieeexplore.ieee.org/document/1164959 http://robotics.ntua.gr/wp-content/uploads/sites/2/MaragosSchafer_MSkelRepresCodBinImag_ieeetASSP1986.pdf}, doi = {10.1109/TASSP.1986.1164959}, year = {1986}, date = {1986-10-01}, journal = {IEEE Trans. on Acoustics, Speech, and Signal Processing}, volume = {34}, number = {5}, pages = {1228-1244}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
1985 |
P. Maragos, R. Schafer A unification of linear, median, order-statistics and morphological filters under mathematical morphology Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-85), Tampa, Florida, 1985. BibTeX | Links: [Webpage] [PDF] @conference{Maragos1985, title = {A unification of linear, median, order-statistics and morphological filters under mathematical morphology}, author = { P. Maragos and R. Schafer}, url = {http://ieeexplore.ieee.org/document/1168257/ http://robotics.ntua.gr/wp-content/uploads/sites/2/a-unification-of-linear-median-orderstatistics-and-morphological.pdf}, doi = {10.1109/ICASSP.1985.1168257}, year = {1985}, date = {1985-04-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-85)}, pages = {1329--1332}, address = {Tampa, Florida}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1984 |
P. Maragos, R. W. Schafer, R. M. Mersereau Two-Dimensional Linear Prediction and Its Application to Adaptive Predictive Coding of Images Journal Article IEEE Trans. on Acoustics, Speech, and Signal Processing, 32 (6), pp. 1213-1229, 1984. @article{84c, title = {Two-Dimensional Linear Prediction and Its Application to Adaptive Predictive Coding of Images}, author = {P. Maragos and R. W. Schafer and R. M. Mersereau}, url = {http://robotics.ntua.gr/wp-content/uploads/publications/MaragosSchaferMersereau_2dLinPredAdaptImagCod_ieeetASSP1984.pdf}, doi = {10.1109/TASSP.1984.1164463}, year = {1984}, date = {1984-12-01}, journal = {IEEE Trans. on Acoustics, Speech, and Signal Processing}, volume = {32}, number = {6}, pages = {1213-1229}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
P. Maragos, R. Mersereau, R. Schafer Multichannel Linear Predictive Coding of Color Images Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-84), San Diego, California, 1984. BibTeX | Links: [Webpage] [PDF] @conference{Maragos1984, title = {Multichannel Linear Predictive Coding of Color Images}, author = { P. Maragos and R. Mersereau and R. Schafer}, url = {http://ieeexplore.ieee.org/document/1172459/ http://robotics.ntua.gr/wp-content/uploads/sites/2/multichannel-linear-predictive-coding-of-color-images.pdf}, doi = {10.1109/ICASSP.1984.1172459}, year = {1984}, date = {1984-03-19}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-84)}, pages = {535--538}, address = {San Diego, California}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
P. Maragos, R. Schafer Morphological skeleton representation and coding of binary images Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing, (ICASSP-84), 9 , San Diego, California, 1984. BibTeX | Links: [Webpage] [PDF] @conference{Maragos1984b, title = {Morphological skeleton representation and coding of binary images}, author = { P. Maragos and R. Schafer}, url = {http://ieeexplore.ieee.org/document/1172472/ http://robotics.ntua.gr/wp-content/uploads/sites/2/morphological-skeleton-representation-and-coding-of-binary-image.pdf}, doi = {10.1109/ICASSP.1984.1172472}, year = {1984}, date = {1984-03-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing, (ICASSP-84)}, volume = {9}, pages = {523--526}, address = {San Diego, California}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
1983 |
P. Maragos, R. Mersereau, R. Schafer Two-dimensional linear predictive analysis of arbitrarily-shaped regions Conference Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-83), Boston, Massachusetts, 1983. BibTeX | Links: [Webpage] [PDF] @conference{Maragos1983, title = {Two-dimensional linear predictive analysis of arbitrarily-shaped regions}, author = { P. Maragos and R. Mersereau and R. Schafer}, url = {http://ieeexplore.ieee.org/document/1172205/ http://robotics.ntua.gr/wp-content/uploads/sites/2/twodimensional-linear-predictive-analysis-of-arbitrarilyshaped-r.pdf}, doi = {10.1109/ICASSP.1983.1172205}, year = {1983}, date = {1983-04-01}, booktitle = {Proc. Int’l Conf. on Acoustics, Speech, and Signal Processing (ICASSP-83)}, pages = {104--107}, address = {Boston, Massachusetts}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Copyright Notice:
Some material presented is available for download to ensure timely dissemination of scholarly and technical work. Copyright and all rights therein are retained by authors or by other copyright holders. All persons copying this information are expected to adhere to the terms and constraints invoked by each author’s copyright. In most cases, these works may not be reposted without the explicit permission of the copyright holder.
The work already published by the IEEE is under its copyright. Personal use of such material is permitted. However, permission to reprint/republish the material for advertising or promotional purposes, or for creating new collective works for resale or redistribution to servers or lists, or to reuse any copyrighted component of the work in other works must be obtained from the IEEE.