Book Chapter
2022 |
|
B3 | Bonnot, Justine; Menard, Daniel; Desnos, Karol Analysis of the Impact of Approximate Computing on the Application Quality Incollection Approximate Computing Techniques: From Component- to Application-Level, Springer @incollection{Bonnot_Analysis_2022, title = {Analysis of the Impact of Approximate Computing on the Application Quality}, author = {Justine Bonnot and Daniel Menard and Karol Desnos}, editor = {Alberto Bosio and Daniel Menard and Olivier Sentieys }, doi = {10.1007/978-3-030-94705-7_6}, isbn = {978-3-030-94705-7}, year = {2022}, date = {2022-06-01}, booktitle = {Approximate Computing Techniques: From Component- to Application-Level}, pages = {145 - 176}, publisher = {Springer}, chapter = {6}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } |
2021 |
|
B2 | Castrillon, Jeronimo; Desnos, Karol; Goens, Andrés; Menard, Christian Dataflow Models of Computation for Programming Heterogeneous Multicores Incollection Forthcoming Handbook of Computer Architecture, Springer @incollection{Castrillon_Dataflow_2021, title = {Dataflow Models of Computation for Programming Heterogeneous Multicores}, author = {Jeronimo Castrillon and Karol Desnos and Andr\'{e}s Goens and Christian Menard}, year = {2021}, date = {2021-05-01}, booktitle = {Handbook of Computer Architecture}, publisher = {Springer}, keywords = {}, pubstate = {forthcoming}, tppubtype = {incollection} } |
2018 |
|
B1 | Desnos, Karol; Palumbo, Francesca Dataflow Modeling for Reconfigurable Signal Processing Systems Incollection Handbook of Signal Processing Systems, Springer @incollection{Desnos_Dataflow_2018, title = {Dataflow Modeling for Reconfigurable Signal Processing Systems}, author = {Karol Desnos and Francesca Palumbo}, editor = {S S Bhattacharyya and E F Deprettere and R Leupers and J Takala}, year = {2018}, date = {2018-01-01}, booktitle = {Handbook of Signal Processing Systems}, publisher = {Springer}, edition = {Third}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } |
Journals
2024 |
|
J19 | Marie, Alban; Desnos, Karol; Mercat, Alexandre; Morin, Luce; Vanne, Jarno; Zhang, Lu Advanced Fine-Tuning Procedures to Enhance DNN Robustness in Visual Coding for Machines Journal Article EURASIP Journal on Image and Video Processing, Springer @article{Marie_Advanced_2024, title = {Advanced Fine-Tuning Procedures to Enhance DNN Robustness in Visual Coding for Machines}, author = {Alban Marie and Karol Desnos and Alexandre Mercat and Luce Morin and Jarno Vanne and Lu Zhang}, year = {2024}, date = {2024-10-01}, journal = {EURASIP Journal on Image and Video Processing}, publisher = {Springer}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
J18 | Renaud, Ophélie; Miomandre, Hugo; Desnos, Karol; Nezan, Jean-François Automated Level-Based Clustering of Dataflow Actors for Controlled Scheduling Complexity Journal Article Journal of Systems Architecture (JSA), Elsevier @article{Renaud_Automated_2024, title = {Automated Level-Based Clustering of Dataflow Actors for Controlled Scheduling Complexity}, author = {Oph\'{e}lie Renaud and Hugo Miomandre and Karol Desnos and Jean-Fran\c{c}ois Nezan }, year = {2024}, date = {2024-06-24}, journal = {Journal of Systems Architecture (JSA)}, publisher = {Elsevier}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
J17 | Chillet, Alice; Gerzaguet, Robin; Desnos, Karol; Gautier, Matthieu; Lohan, Elena Simona; Nogues, Erwan; Valkama, Mikko Understanding Radio Frequency Fingerprint Identification with RiFyFi Virtual Databases Journal Article Forthcoming IEEE Open Journal of the Communications Society, @article{Chillet_Understanding_2024, title = {Understanding Radio Frequency Fingerprint Identification with RiFyFi Virtual Databases}, author = {Alice Chillet and Robin Gerzaguet and Karol Desnos and Matthieu Gautier and Elena Simona Lohan and Erwan Nogues and Mikko Valkama}, year = {2024}, date = {2024-06-01}, journal = {IEEE Open Journal of the Communications Society}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } |
2021 |
|
J16 | Sourbier, Nicolas; Desnos, Karol; Guyet, Thomas; Majorczyk, Frédéric; Gesny, Olivier; Pelcat, Maxime SECURE-GEGELATI Always-On Intrusion Detection through GEGELATI Lightweight Tangled Program Graphs Journal Article Forthcoming Journal of Signal Processing Systems, Springer @article{Sourbier_Secure_2021, title = {SECURE-GEGELATI Always-On Intrusion Detection through GEGELATI Lightweight Tangled Program Graphs}, author = {Nicolas Sourbier and Karol Desnos and Thomas Guyet and Fr\'{e}d\'{e}ric Majorczyk and Olivier Gesny and Maxime Pelcat}, year = {2021}, date = {2021-11-30}, journal = {Journal of Signal Processing Systems}, publisher = {Springer}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } |
2020 |
|
J15 | Lee, Yaesop; Liu, Yanzhou; Desnos, Karol; Barford, Lee; Bhattacharyya, Shuvra S Passive-Active Flowgraphs for Efficient Modeling and Design of Signal Processing Systems Journal Article Journal of Signal Processing Systems, Springer @article{Lee_Passive_2020, title = {Passive-Active Flowgraphs for Efficient Modeling and Design of Signal Processing Systems}, author = {Yaesop Lee and Yanzhou Liu and Karol Desnos and Lee Barford and Shuvra S. Bhattacharyya}, doi = {10.1007/s11265-020-01581-8}, year = {2020}, date = {2020-07-20}, journal = {Journal of Signal Processing Systems}, volume = {92}, number = {10}, pages = {1133-1151}, publisher = {Springer}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2019 |
|
J14 | Arrestier, Florian; Desnos, Karol; Juarez, Eduardo; Menard, Daniel Numerical Representation of Directed Acyclic Graphs for Efficient Dataflow Embedded Resource Allocation Journal Article Transactions on Embedded Computing Systems - EMSOFT Proceedings, @article{Arrestier_Numerical_2019, title = {Numerical Representation of Directed Acyclic Graphs for Efficient Dataflow Embedded Resource Allocation}, author = {Florian Arrestier and Karol Desnos and Eduardo Juarez and Daniel Menard }, doi = {10.1145/3358225}, year = {2019}, date = {2019-10-13}, journal = {Transactions on Embedded Computing Systems - EMSOFT Proceedings}, volume = {18}, number = {55}, pages = {1 - 22}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
J13 | Suriano, Leonardo; Arrestier, Florian; Rodriguez, Alfonso; Desnos, Karol; Heulot, Julien; Pelcat, Maxime; de la Torre, Eduardo Microprocessors and Microsystems: Embedded Hardware Design (MICPRO), Elsevier @article{Suriano_DAMHSE_2019, title = {DAMHSE: Programming Heterogeneous MPSoCs with Hardware Acceleration using Dataflow-based Design Space Exploration and Automated Rapid Prototyping}, author = {Leonardo Suriano and Florian Arrestier and Alfonso Rodriguez and Karol Desnos and Julien Heulot and Maxime Pelcat and Eduardo de la Torre }, doi = {10.1016/j.micpro.2019.102882}, year = {2019}, date = {2019-09-01}, journal = {Microprocessors and Microsystems: Embedded Hardware Design (MICPRO)}, volume = {71}, publisher = {Elsevier}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
J12 | Madroñal, Daniel; Arrestier, Florian; Sancho, Jaime; Morvan, Antoine; Lazcano, Raquel; Desnos, Karol; Salvador, Ruben; Menard, Daniel; Juarez, Eduardo; Sanz, Cesar PAPIFY: automatic instrumentation and monitoring of dynamic dataflow applications based on PAPI Journal Article IEEE Access, @article{Madronal_Papify_2019, title = {PAPIFY: automatic instrumentation and monitoring of dynamic dataflow applications based on PAPI}, author = {Daniel Madro\~{n}al and Florian Arrestier and Jaime Sancho and Antoine Morvan and Raquel Lazcano and Karol Desnos and Ruben Salvador and Daniel Menard and Eduardo Juarez and Cesar Sanz }, doi = {10.1109/ACCESS.2019.2934223}, year = {2019}, date = {2019-08-11}, journal = {IEEE Access}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
J11 | Bonnot, Justine; Camus, Vincent; Desnos, Karol; Menard, Daniel Adaptive Simulation-based Framework for Error Characterization of Inexact Circuits Journal Article Microelectronics Reliability , @article{Bonnot_Adaptive_2019, title = {Adaptive Simulation-based Framework for Error Characterization of Inexact Circuits}, author = {Justine Bonnot and Vincent Camus and Karol Desnos and Daniel Menard}, editor = {Elsevier}, doi = {10.1016/j.microrel.2019.02.007}, year = {2019}, date = {2019-02-15}, journal = { Microelectronics Reliability }, abstract = {To design faster and more energy-efficient systems, numerous inexact arithmetic operators have been proposed, generally obtained by modifying the logic structure of conventional circuits. However, as the quality of service of an application has to be ensured, these operators need to be precisely characterized to be usable in commercial or real-life applications. The characterization of the error induced by inexact operators is commonly achieved with exhaustive or stochastic bit-accurate gate-level simulations. However, for high bit-widths, the time and memory required for such simulations become prohibitive. To overcome these limitations, a new characterization framework for inexact operators is proposed. The proposed framework characterizes the error induced by inexact operators in terms of mean error distance, error rate and maximum error distance, allowing to completely define the error probability mass function. By exploiting statistical properties of the approximation error, the number of simulations needed for precise characterization is minimized. From user-defined confidence requirements, the proposed method computes the minimal number of simulations to obtain the desired accuracy on the characterization for the error rate and mean error distance. The maximum error distance value is then extracted from the simulated samples using the extreme value theory. For 32-bit adders, the proposed method reduces the number of simulations needed up to a few tens of thousands points. }, keywords = {}, pubstate = {published}, tppubtype = {article} } To design faster and more energy-efficient systems, numerous inexact arithmetic operators have been proposed, generally obtained by modifying the logic structure of conventional circuits. However, as the quality of service of an application has to be ensured, these operators need to be precisely characterized to be usable in commercial or real-life applications. The characterization of the error induced by inexact operators is commonly achieved with exhaustive or stochastic bit-accurate gate-level simulations. However, for high bit-widths, the time and memory required for such simulations become prohibitive. To overcome these limitations, a new characterization framework for inexact operators is proposed. The proposed framework characterizes the error induced by inexact operators in terms of mean error distance, error rate and maximum error distance, allowing to completely define the error probability mass function. By exploiting statistical properties of the approximation error, the number of simulations needed for precise characterization is minimized. From user-defined confidence requirements, the proposed method computes the minimal number of simulations to obtain the desired accuracy on the characterization for the error rate and mean error distance. The maximum error distance value is then extracted from the simulated samples using the extreme value theory. For 32-bit adders, the proposed method reduces the number of simulations needed up to a few tens of thousands points. |
J10 | Enrici, Andrea; Lallet, Julien; Pacalet, Renaud; Apvrille, Ludovic; Desnos, Karol; Latif, Imran Model-Based Programming for Multi-processor Platforms with TTool/DIPLODOCUS and OMC Journal Article Communications in Computer and Information Science (Extended and revised paper from Model-Driven Engineering and Software Development (MODELSWARD)), Springer International Publishing @article{Enrici_Model_2019, title = {Model-Based Programming for Multi-processor Platforms with TTool/DIPLODOCUS and OMC}, author = {Andrea Enrici and Julien Lallet and Renaud Pacalet and Ludovic Apvrille and Karol Desnos and Imran Latif}, editor = {Springer }, isbn = {978-3-030-11030-7}, year = {2019}, date = {2019-01-01}, journal = { Communications in Computer and Information Science (Extended and revised paper from Model-Driven Engineering and Software Development (MODELSWARD))}, pages = {56--81}, publisher = {Springer International Publishing}, abstract = {The complexity of today's multi-processor architectures raises the need to increase the level of abstraction of software development paradigms above third-generation programming languages (e.g., C/C++). Code generation from model-based specifications is considered as a promising approach to increase the productivity and quality of software development, with respect to traditional paradigms where code is used as the main artifact to develop software. In this context, powerful and robust tools are needed in order to accomplish the transition from code-based programming to model-based programming. In this paper we propose a novel approach and tools where system-level models are compiled into standard C code while optimizing the system's memory footprint. We show the effectiveness of our approach with the model-based programming of UML/SysML diagrams for a 5G decoder. From the compiled C code, we generate both a software implementation for a Digital Signal Processor platform and a hardware-software implementation for a platform based on hardware Intellectual Property (IP) blocks. Our optimizations achieve a memory footprint reduction of 80.07% and 88.93%, respectively.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The complexity of today's multi-processor architectures raises the need to increase the level of abstraction of software development paradigms above third-generation programming languages (e.g., C/C++). Code generation from model-based specifications is considered as a promising approach to increase the productivity and quality of software development, with respect to traditional paradigms where code is used as the main artifact to develop software. In this context, powerful and robust tools are needed in order to accomplish the transition from code-based programming to model-based programming. In this paper we propose a novel approach and tools where system-level models are compiled into standard C code while optimizing the system's memory footprint. We show the effectiveness of our approach with the model-based programming of UML/SysML diagrams for a 5G decoder. From the compiled C code, we generate both a software implementation for a Digital Signal Processor platform and a hardware-software implementation for a platform based on hardware Intellectual Property (IP) blocks. Our optimizations achieve a memory footprint reduction of 80.07% and 88.93%, respectively. |
2018 |
|
J9 | Rubattu, Claudio; Palumbo, Francesca; Sau, Carlo; Salvador, Ruben; Sérot, Jocelyn; Desnos, Karol; Raffo, Luigi; Pelcat, Maxime Dataflow-Functional High-Level Synthesis for Coarse-Grained Reconfigurable Accelerators Journal Article Embedded Systems Letters, @article{Rubattu_2018_Dataflow, title = {Dataflow-Functional High-Level Synthesis for Coarse-Grained Reconfigurable Accelerators}, author = {Claudio Rubattu and Francesca Palumbo and Carlo Sau and Ruben Salvador and Jocelyn S\'{e}rot and Karol Desnos and Luigi Raffo and Maxime Pelcat}, editor = {IEEE }, doi = {10.1109/LES.2018.2882989}, year = {2018}, date = {2018-11-23}, journal = {Embedded Systems Letters}, pages = {4}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2017 |
|
J8 | Pelcat, Maxime; Mercat, Alexandre; Desnos, Karol; Maggiani, Luca; Liu, Yanzhou; Heulot, Julien; Nezan, Jean-François; Hamidouche, Wassim; Ménard, Daniel; Bhattacharyya, Shuvra S Reproducible Evaluation of System Efficiency with a Model of Architecture: From Theory to Practice Journal Article Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD), @article{Pelcat_Reproducible_2017, title = {Reproducible Evaluation of System Efficiency with a Model of Architecture: From Theory to Practice}, author = {Maxime Pelcat and Alexandre Mercat and Karol Desnos and Luca Maggiani and Yanzhou Liu and Julien Heulot and Jean-Fran\c{c}ois Nezan and Wassim Hamidouche and Daniel M\'{e}nard and Shuvra S Bhattacharyya}, editor = {IEEE}, url = {https://hal.archives-ouvertes.fr/hal-01646738/file/tcad_mpelcat17.pdf}, doi = { 10.1109/TCAD.2017.2774822}, year = {2017}, date = {2017-11-17}, journal = {Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD)}, abstract = {Current trends in high performance and embedded computing include design of increasingly complex hardware architectures with high parallelism, heterogeneous processing elements and non-uniform communication resources. In order to take hardware and software design decisions, early evaluations of the system non-functional properties are needed. These evaluations of system efficiency require Electronic System-Level (ESL) information on both the algorithms and the architecture. Contrary to algorithm models for which a major body of work has been conducted on defining formal Models of Computation (MoCs), architecture models from the literature are mostly empirical models from which reproducible experimentation requires the accompanying software. In this paper, a precise definition of a Model of Architecture (MoA) is proposed that focuses on reproducibility and abstraction and removes the overlap previously existing between the notions of MoA and MoC. A first MoA, called the Linear System-Level Architecture Model (LSLA), is presented. To demonstrate the generic nature of the proposed new architecture modeling concepts, we show that the LSLA Model can be integrated flexibly with different MoCs. LSLA is then used to model the energy consumption of a State-of-the-Art Multiprocessor System-on-Chip (MPSoC) when running an application described using the Synchronous Dataflow (SDF) MoC. A method to automatically learn LSLA model parameters from platform measurements is introduced. Despite the high complexity of the underlying hardware and software, a simple LSLA model is demonstrated to estimate the energy consumption of the MPSoC with a fidelity of 86%.�}, keywords = {}, pubstate = {published}, tppubtype = {article} } Current trends in high performance and embedded computing include design of increasingly complex hardware architectures with high parallelism, heterogeneous processing elements and non-uniform communication resources. In order to take hardware and software design decisions, early evaluations of the system non-functional properties are needed. These evaluations of system efficiency require Electronic System-Level (ESL) information on both the algorithms and the architecture. Contrary to algorithm models for which a major body of work has been conducted on defining formal Models of Computation (MoCs), architecture models from the literature are mostly empirical models from which reproducible experimentation requires the accompanying software. In this paper, a precise definition of a Model of Architecture (MoA) is proposed that focuses on reproducibility and abstraction and removes the overlap previously existing between the notions of MoA and MoC. A first MoA, called the Linear System-Level Architecture Model (LSLA), is presented. To demonstrate the generic nature of the proposed new architecture modeling concepts, we show that the LSLA Model can be integrated flexibly with different MoCs. LSLA is then used to model the energy consumption of a State-of-the-Art Multiprocessor System-on-Chip (MPSoC) when running an application described using the Synchronous Dataflow (SDF) MoC. A method to automatically learn LSLA model parameters from platform measurements is introduced. Despite the high complexity of the underlying hardware and software, a simple LSLA model is demonstrated to estimate the energy consumption of the MPSoC with a fidelity of 86%.� |
J7 | Mercat, Alexandre; Bonnot, Justine; Pelcat, Maxime; Desnos, Karol; Hamidouche, Wassim; Menard, Daniel Smart search space reduction for approximate computing: A low energy HEVC encoder case study Journal Article Journal of Systems Architecture (JSA), Elsevier @article{Mercat_Smart_2007, title = {Smart search space reduction for approximate computing: A low energy HEVC encoder case study}, author = {Alexandre Mercat and Justine Bonnot and Maxime Pelcat and Karol Desnos and Wassim Hamidouche and Daniel Menard}, editor = {Elsevier}, doi = {https://doi.org/10.1016/j.sysarc.2017.09.003}, year = {2017}, date = {2017-10-01}, journal = {Journal of Systems Architecture (JSA)}, volume = {80}, pages = {56--67}, publisher = {Elsevier}, abstract = {The approximate computing paradigm provides methods to optimize algorithms while considering both application quality of service and computational complexity. Approximate computing can be applied at different levels of abstraction, from algorithm level to application level. Approximate computing at algorithm level reduces the computational complexity by approximating or skipping computational blocks. A number of applications in the signal and image processing domain integrate algorithms based on discrete optimization techniques. These techniques minimize a cost function by exploring an application parameter search space. In this paper, a new methodology is proposed that exploits the computation-skipping approximate computing concept. The methodology, named Smart Search Space Reduction(Sssr), explores at design time the Pareto relationship between computational complexity and application quality. At run time, an approximation manager can then early select a good candidate configuration. Sssr reduces the run time search space and, in turn, reduces computational complexity. An efficient Sssr technique adjusts at design time the configuration selectivity while selecting at run time the most suitable functions to skip. The real time High Efficiency Video CodingHevc encoder in All Intra(AI) profile is used as a case study to illustrate the benefits of Sssr. In this application, two discrete optimizations are performed. They explore different coding parameters and select the values leading to the minimal cost in terms of a tradeoff between bitrate, quality and computational energy by acting on both the Hevc coding-tree partitioning and the intra-modes. Combining two Sssrs iterations on this use case, the energy consumption is reduced by up to 77%. Moreover, the combination of the two Sssrs iterations in comparison to using only one reduces the BD-BR bitrate/quality metric by 4% for the same energy consumption.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The approximate computing paradigm provides methods to optimize algorithms while considering both application quality of service and computational complexity. Approximate computing can be applied at different levels of abstraction, from algorithm level to application level. Approximate computing at algorithm level reduces the computational complexity by approximating or skipping computational blocks. A number of applications in the signal and image processing domain integrate algorithms based on discrete optimization techniques. These techniques minimize a cost function by exploring an application parameter search space. In this paper, a new methodology is proposed that exploits the computation-skipping approximate computing concept. The methodology, named Smart Search Space Reduction(Sssr), explores at design time the Pareto relationship between computational complexity and application quality. At run time, an approximation manager can then early select a good candidate configuration. Sssr reduces the run time search space and, in turn, reduces computational complexity. An efficient Sssr technique adjusts at design time the configuration selectivity while selecting at run time the most suitable functions to skip. The real time High Efficiency Video CodingHevc encoder in All Intra(AI) profile is used as a case study to illustrate the benefits of Sssr. In this application, two discrete optimizations are performed. They explore different coding parameters and select the values leading to the minimal cost in terms of a tradeoff between bitrate, quality and computational energy by acting on both the Hevc coding-tree partitioning and the intra-modes. Combining two Sssrs iterations on this use case, the energy consumption is reduced by up to 77%. Moreover, the combination of the two Sssrs iterations in comparison to using only one reduces the BD-BR bitrate/quality metric by 4% for the same energy consumption. |
J6 | Lazcano, Raquel; Madroñal, Daniel; Salvador, Ruben; Desnos, Karol; Pelcat, Maxime; Guerra, R; Fabelo, H; Ortega, S; Lopez, S; Callico, G M; Juarez, Euardo; Sanz, Cesar Porting a PCA-based hyperspectral image dimensionality reduction algorithm for brain cancer detection on a manycore architecture Journal Article Journal of Systems Architecture (JSA), @article{Lazcano_PCA_2017, title = {Porting a PCA-based hyperspectral image dimensionality reduction algorithm for brain cancer detection on a manycore architecture}, author = {Raquel Lazcano and Daniel Madro\~{n}al and Ruben Salvador and Karol Desnos and Maxime Pelcat and R Guerra and H Fabelo and S Ortega and S Lopez and G M Callico and Euardo Juarez and Cesar Sanz}, editor = {Elsevier}, url = {http://www.sciencedirect.com/science/article/pii/S1383762116302934}, doi = {https://doi.org/10.1016/j.sysarc.2017.05.001}, issn = {1383-7621}, year = {2017}, date = {2017-10-01}, journal = {Journal of Systems Architecture (JSA)}, volume = {77}, pages = {101 - 111}, abstract = {Abstract This paper presents a study of the parallelism of a Principal Component Analysis (PCA) algorithm and its adaptation to a manycore MPPA (Massively Parallel Processor Array) architecture, which gathers 256 cores distributed among 16 clusters. This study focuses on porting hyperspectral image processing into manycore platforms by optimizing their processing to fulfill real-time constraints, fixed by the image capture rate of the hyperspectral sensor. Real-time is a challenging objective for hyperspectral image processing, as hyperspectral images consist of extremely large volumes of data and this problem is often solved by reducing image size before starting the processing itself. To tackle the challenge, this paper proposes an analysis of the intrinsic parallelism of the different stages of the PCA algorithm with the objective of exploiting the parallelization possibilities offered by an MPPA manycore architecture. Furthermore, the impact on internal communication when increasing the level of parallelism is also analyzed. Experimenting with medical images obtained from two different surgical use cases, an average speedup of 20 is achieved. Internal communications are shown to rapidly become the bottleneck that reduces the achievable speedup offered by the PCA parallelization. As a result of this study, PCA processing time is reduced to less than 6s, a time compatible with the targeted brain surgery application requiring 1frame-per-minute.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract This paper presents a study of the parallelism of a Principal Component Analysis (PCA) algorithm and its adaptation to a manycore MPPA (Massively Parallel Processor Array) architecture, which gathers 256 cores distributed among 16 clusters. This study focuses on porting hyperspectral image processing into manycore platforms by optimizing their processing to fulfill real-time constraints, fixed by the image capture rate of the hyperspectral sensor. Real-time is a challenging objective for hyperspectral image processing, as hyperspectral images consist of extremely large volumes of data and this problem is often solved by reducing image size before starting the processing itself. To tackle the challenge, this paper proposes an analysis of the intrinsic parallelism of the different stages of the PCA algorithm with the objective of exploiting the parallelization possibilities offered by an MPPA manycore architecture. Furthermore, the impact on internal communication when increasing the level of parallelism is also analyzed. Experimenting with medical images obtained from two different surgical use cases, an average speedup of 20 is achieved. Internal communications are shown to rapidly become the bottleneck that reduces the achievable speedup offered by the PCA parallelization. As a result of this study, PCA processing time is reduced to less than 6s, a time compatible with the targeted brain surgery application requiring 1frame-per-minute. |
J5 | Ammar, Manel; Baklouti, Mouna; Pelcat, Maxime; Desnos, Karol; Abid, Mohamed Comparing Three Clustering-based Scheduling Methods for Energy-Aware Rapid Design of MP2SoCs Journal Article Journal of Signal Processing Systems (JSPS), @article{Ammar_Comparing_2017, title = {Comparing Three Clustering-based Scheduling Methods for Energy-Aware Rapid Design of MP2SoCs}, author = {Manel Ammar and Mouna Baklouti and Maxime Pelcat and Karol Desnos and Mohamed Abid}, editor = {Springer}, doi = {10.1007/s11265-017-1261-7}, issn = {1939-8115}, year = {2017}, date = {2017-08-14}, journal = {Journal of Signal Processing Systems (JSPS)}, abstract = {In recent years, the Electronic Design Automation (EDA) community shifted spotlights from performance to energy efficiency. Consequently, energy consumption becomes a key criterion to take into consideration during Design Space Exploration (DSE). Finding a trade-off between energy consumption and performance early in the design flow in order to satisfy time-to-market is a design challenge of EDA tools. In this paper, we propose the Energy-aWAre Rapid Design of MP2SoC (EWARDS) framework. The EWARDS framework aims at exploring, at design time, the performance and energy capabilities of modern Massively Parallel Multi-Processors System-on-Chip (MP2SoC). The key contribution of the proposed framework is the implementation of an energy-aware scheduling process, named PreesmPE , that combines state-of-the-art power management techniques together with Clustering-based Scheduling. The scheduling process is integrated into a Model-Driven Engineering (MDE)-based DSE approach to optimize both performance and energy efficiency in MP2SoC. Moreover, EWARDS extends the Modeling and Analysis of Real-Time and Embedded systems (MARTE) profile with power aspects of MP2SoC systems providing a high-level design entry. To demonstrate the efficiency of the proposed approach, we conducted experiments using the H.263 codec and the FFT algorithm. The obtained results demonstrate that the energy-aware scheduling process can effectively save energy in MP2SoC systems. They also confirmed that our MDE-based approach accelerates the DSE process while generating energy-efficient design decisions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In recent years, the Electronic Design Automation (EDA) community shifted spotlights from performance to energy efficiency. Consequently, energy consumption becomes a key criterion to take into consideration during Design Space Exploration (DSE). Finding a trade-off between energy consumption and performance early in the design flow in order to satisfy time-to-market is a design challenge of EDA tools. In this paper, we propose the Energy-aWAre Rapid Design of MP2SoC (EWARDS) framework. The EWARDS framework aims at exploring, at design time, the performance and energy capabilities of modern Massively Parallel Multi-Processors System-on-Chip (MP2SoC). The key contribution of the proposed framework is the implementation of an energy-aware scheduling process, named PreesmPE , that combines state-of-the-art power management techniques together with Clustering-based Scheduling. The scheduling process is integrated into a Model-Driven Engineering (MDE)-based DSE approach to optimize both performance and energy efficiency in MP2SoC. Moreover, EWARDS extends the Modeling and Analysis of Real-Time and Embedded systems (MARTE) profile with power aspects of MP2SoC systems providing a high-level design entry. To demonstrate the efficiency of the proposed approach, we conducted experiments using the H.263 codec and the FFT algorithm. The obtained results demonstrate that the energy-aware scheduling process can effectively save energy in MP2SoC systems. They also confirmed that our MDE-based approach accelerates the DSE process while generating energy-efficient design decisions. |
2016 |
|
J4 | Ammar, Manel; Baklouti, Mouna; Pelcat, Maxime; Desnos, Karol; Abid, Mohamed MDE-based Rapid DSE of multi-core embedded systems: The H.264 Decoder Case Study Journal Article Informacije Midem-journal of Microelectronics Electronic Components and Materials, @article{Ammar_MDE_2016, title = {MDE-based Rapid DSE of multi-core embedded systems: The H.264 Decoder Case Study}, author = {Manel Ammar and Mouna Baklouti and Maxime Pelcat and Karol Desnos and Mohamed Abid}, url = {https://hal-univ-rennes1.archives-ouvertes.fr/hal-01502348}, year = {2016}, date = {2016-01-01}, journal = {Informacije Midem-journal of Microelectronics Electronic Components and Materials}, volume = {46}, number = {4}, pages = {219--228}, abstract = {The recent advances in Unified Modeling Language (UML) give a valuable milestone for its application to modern embedded systems design space exploration. However, it is essential to remember that UML is unable to solve the difficulty associated with embedded systems analysis, but it only provides standard modeling means. A reliable Design Space Exploration (DSE) process which suits the peculiarities of complex embedded systems design is necessary to complement the use of UML for design space exploration. In this article, we propose a Model Driven Engineering-based (MDE) co-design flow that combines high-level data-intensive application analysis with rapid prototyping. In order to specify the embedded system, our methodology relies on the Modeling and Analysis of Real-Time and Embedded Systems (MARTE) UML profile. Moreover, the present contribution uses the Parameterized and Interfaced Synchronous Dataflow (pSDF) Model-of-Computation (MoC) and a model based on the IP-XACT standard as intermediate levels of abstraction to facilitate the analysis step in the co-design flow. The rapid prototyping process relies on the pSDF graph of the application and a system-level description of the architecture. This paper presents our Hw/Sw cospecification methodology, including its support for gradual refinement of the high-level models towards lower levels of abstraction for design space exploration purposes.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The recent advances in Unified Modeling Language (UML) give a valuable milestone for its application to modern embedded systems design space exploration. However, it is essential to remember that UML is unable to solve the difficulty associated with embedded systems analysis, but it only provides standard modeling means. A reliable Design Space Exploration (DSE) process which suits the peculiarities of complex embedded systems design is necessary to complement the use of UML for design space exploration. In this article, we propose a Model Driven Engineering-based (MDE) co-design flow that combines high-level data-intensive application analysis with rapid prototyping. In order to specify the embedded system, our methodology relies on the Modeling and Analysis of Real-Time and Embedded Systems (MARTE) UML profile. Moreover, the present contribution uses the Parameterized and Interfaced Synchronous Dataflow (pSDF) Model-of-Computation (MoC) and a model based on the IP-XACT standard as intermediate levels of abstraction to facilitate the analysis step in the co-design flow. The rapid prototyping process relies on the pSDF graph of the application and a system-level description of the architecture. This paper presents our Hw/Sw cospecification methodology, including its support for gradual refinement of the high-level models towards lower levels of abstraction for design space exploration purposes. |
J3 | Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François; Aridhi, Slaheddine On Memory Reuse Between Inputs and Outputs of Dataflow Actors Journal Article Transactions on Embedded Computing Systems (TECS), ACM @article{Desnos_Memory_2016, title = {On Memory Reuse Between Inputs and Outputs of Dataflow Actors}, author = {Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, editor = {ACM}, url = {http://doi.acm.org/10.1145/2871744}, doi = {10.1145/2871744}, issn = {1539-9087}, year = {2016}, date = {2016-01-01}, journal = {Transactions on Embedded Computing Systems (TECS)}, volume = {15}, number = {2}, pages = {30:1--30:25}, publisher = {ACM}, address = {New York, NY, USA}, abstract = {This article introduces a new technique to minimize the memory footprints of Digital Signal Processing (DSP) applications specified with Synchronous Dataflow (SDF) graphs and implemented on shared-memory Multiprocessor System-on-Chip (MPSoCs). In addition to the SDF specification, which captures data dependencies between coarse-grained tasks called actors, the proposed technique relies on two optional inputs abstracting the internal data dependencies of actors: annotations of the ports of actors, and script-based specifications of merging opportunities between input and output buffers of actors. Experimental results on a set of applications show a reduction of the memory footprint by 48% compared to state-of-the-art minimization techniques.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This article introduces a new technique to minimize the memory footprints of Digital Signal Processing (DSP) applications specified with Synchronous Dataflow (SDF) graphs and implemented on shared-memory Multiprocessor System-on-Chip (MPSoCs). In addition to the SDF specification, which captures data dependencies between coarse-grained tasks called actors, the proposed technique relies on two optional inputs abstracting the internal data dependencies of actors: annotations of the ports of actors, and script-based specifications of merging opportunities between input and output buffers of actors. Experimental results on a set of applications show a reduction of the memory footprint by 48% compared to state-of-the-art minimization techniques. |
2014 |
|
J2 | Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François; Aridhi, Slaheddine Memory Analysis and Optimized Allocation of Dataflow Applications on Shared-Memory MPSoCs Journal Article Journal of Signal Processing Systems, @article{Desnos_Memory_2015, title = {Memory Analysis and Optimized Allocation of Dataflow Applications on Shared-Memory MPSoCs}, author = {Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, editor = {Springer}, url = {https://doi.org/10.1007/s11265-014-0952-6}, doi = {10.1007/s11265-014-0952-6}, issn = {1939-8115}, year = {2014}, date = {2014-11-01}, journal = {Journal of Signal Processing Systems}, volume = {80}, number = {1}, pages = {19--37}, abstract = {The majority of applications, ranging from the low complexity to very multifaceted entities requiring dedicated hardware accelerators, are very well suited for Multiprocessor Systems-on-Chips (MPSoCs). It is critical to understand the general characteristics of a given embedded application: its behavior and its requirements in terms of MPSoC resources. This paper presents a complete method to study the important aspect of memory characteristic of an application. This method spans the theoretical, architecture-independent memory characterization to the quasi optimal static memory allocation of an application on a real shared-memory MPSoCs. The application is modeled as an Synchronous Dataflow (SDF) graph which is used to derive a Memory Exclusion Graph (MEG) essential for the analysis and allocation techniques. Practical considerations, such as cache coherence and memory broadcasting, are extensively treated. Memory footprint optimization is demonstrated using the example of a stereo matching algorithm from the computer vision domain. Experimental results show a reduction of the memory footprint by up to 43 % compared to a state-of-the-art minimization technique, a throughput improvement of 33 % over dynamic allocation, and the introduction of a tradeoff between multicore scheduling flexibility and memory footprint.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The majority of applications, ranging from the low complexity to very multifaceted entities requiring dedicated hardware accelerators, are very well suited for Multiprocessor Systems-on-Chips (MPSoCs). It is critical to understand the general characteristics of a given embedded application: its behavior and its requirements in terms of MPSoC resources. This paper presents a complete method to study the important aspect of memory characteristic of an application. This method spans the theoretical, architecture-independent memory characterization to the quasi optimal static memory allocation of an application on a real shared-memory MPSoCs. The application is modeled as an Synchronous Dataflow (SDF) graph which is used to derive a Memory Exclusion Graph (MEG) essential for the analysis and allocation techniques. Practical considerations, such as cache coherence and memory broadcasting, are extensively treated. Memory footprint optimization is demonstrated using the example of a stereo matching algorithm from the computer vision domain. Experimental results show a reduction of the memory footprint by up to 43 % compared to a state-of-the-art minimization technique, a throughput improvement of 33 % over dynamic allocation, and the introduction of a tradeoff between multicore scheduling flexibility and memory footprint. |
J1 | Zhou, Zheng; Plishker, William; Bhattacharyya, Shuvra S; Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François Scheduling of Parallelized Synchronous Dataflow Actors for Multicore Signal Processing Journal Article Journal of Signal Processing Systems (JSPS), @article{Zhou_Scheduling_2014, title = {Scheduling of Parallelized Synchronous Dataflow Actors for Multicore Signal Processing}, author = {Zheng Zhou and William Plishker and Shuvra S. Bhattacharyya and Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan}, editor = {Springer}, url = {https://doi.org/10.1007/s11265-014-0956-2}, doi = {10.1007/s11265-014-0956-2}, issn = {1939-8115}, year = {2014}, date = {2014-10-07}, journal = {Journal of Signal Processing Systems (JSPS)}, volume = {83}, number = {3}, pages = {309--328}, abstract = {Parallelization of Digital Signal Processing (DSP) software is an important trend in Multiprocessor System-on-Chip (MPSoC) implementation. The performance of DSP systems composed of parallelized computations depends on the scheduling technique, which must in general allocate computation and communication resources for competing tasks, and ensure that data dependencies are satisfied. In this paper, we formulate a new type of parallel task scheduling problem called Parallel Actor Scheduling (PAS) for MPSoC mapping of DSP systems that are represented as Synchronous Dataflow (SDF) graphs. In contrast to traditional SDF-based scheduling techniques, which focus on exploiting graph level (inter-actor) parallelism, the PAS problem targets the integrated exploitation of both intra- and inter-actor parallelism for platforms in which individual actors can be parallelized across multiple processing units. We first address a special case of the PAS problem in which all of the actors in the DSP application or subsystem being optimized are parallel actors (i.e., they can be parallelized to exploit multiple cores). For this special case, we develop and experimentally evaluate a two-phase scheduling framework with three work flows that involve particle swarm optimization (PSO) --- PSO with a mixed integer programming formulation, PSO with simulated annealing, and PSO with a fast heuristic based on list scheduling. Then, we extend our scheduling framework to support the general PAS problem, which considers both parallel actors and sequential actors (actors that cannot be parallelized) in an integrated manner. We demonstrate that our PAS-targeted scheduling framework provides a useful range of trade-offs between synthesis time requirements and the quality of the derived solutions. We also demonstrate the performance of our scheduling framework from two aspects: simulations on a diverse set of randomly generated SDF graphs, and implementations of an image processing application and a software defined radio benchmark on a state-of-the-art multicore DSP platform.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Parallelization of Digital Signal Processing (DSP) software is an important trend in Multiprocessor System-on-Chip (MPSoC) implementation. The performance of DSP systems composed of parallelized computations depends on the scheduling technique, which must in general allocate computation and communication resources for competing tasks, and ensure that data dependencies are satisfied. In this paper, we formulate a new type of parallel task scheduling problem called Parallel Actor Scheduling (PAS) for MPSoC mapping of DSP systems that are represented as Synchronous Dataflow (SDF) graphs. In contrast to traditional SDF-based scheduling techniques, which focus on exploiting graph level (inter-actor) parallelism, the PAS problem targets the integrated exploitation of both intra- and inter-actor parallelism for platforms in which individual actors can be parallelized across multiple processing units. We first address a special case of the PAS problem in which all of the actors in the DSP application or subsystem being optimized are parallel actors (i.e., they can be parallelized to exploit multiple cores). For this special case, we develop and experimentally evaluate a two-phase scheduling framework with three work flows that involve particle swarm optimization (PSO) --- PSO with a mixed integer programming formulation, PSO with simulated annealing, and PSO with a fast heuristic based on list scheduling. Then, we extend our scheduling framework to support the general PAS problem, which considers both parallel actors and sequential actors (actors that cannot be parallelized) in an integrated manner. We demonstrate that our PAS-targeted scheduling framework provides a useful range of trade-offs between synthesis time requirements and the quality of the derived solutions. We also demonstrate the performance of our scheduling framework from two aspects: simulations on a diverse set of randomly generated SDF graphs, and implementations of an image processing application and a software defined radio benchmark on a state-of-the-art multicore DSP platform. |
Conferences
2024 |
|
C52 | Vacher, Quentin; Beuve, Nicolas; Allaire, Paul; Marty, Thibaut; Dardaillon, Mickaël; Desnos, Karol Hybrid Genetic Programming and Deep Reinforcement Learning for Low-complexity Robot Arm Trajectory Planning Inproceedings Conference on Evolutionary Computation Theory and Applications (ECTA), Porto, Portugal @inproceedings{Vacher_Hybrid_2024, title = {Hybrid Genetic Programming and Deep Reinforcement Learning for Low-complexity Robot Arm Trajectory Planning}, author = {Quentin Vacher and Nicolas Beuve and Paul Allaire and Thibaut Marty and Micka\"{e}l Dardaillon and Karol Desnos }, year = {2024}, date = {2024-11-20}, booktitle = {Conference on Evolutionary Computation Theory and Applications (ECTA)}, publisher = {Scitepress}, address = {Porto, Portugal}, organization = {International Joint Conference on Computational Intelligence}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C51 | Szolc, Hubert; Desnos, Karol; Kryjak, Tomasz Tangled Program Graphs as an Alternative to DRL-Based Control Algorithms for UAVs Inproceedings Signal Processing: Algorithms, Architectures, Arrangements, and Applications (SPA2024), Poznan, Poland @inproceedings{Szolc_Tangled_2024, title = {Tangled Program Graphs as an Alternative to DRL-Based Control Algorithms for UAVs}, author = {Hubert Szolc and Karol Desnos and Tomasz Kryjak }, year = {2024}, date = {2024-09-25}, booktitle = {Signal Processing: Algorithms, Architectures, Arrangements, and Applications (SPA2024)}, publisher = {IEEE}, address = {Poznan, Poland}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C50 | Renaud, Ophélie; Desnos, Karol; Raffin, Erwan; Nezan, Jean-François Multicore and Network Topology Codesign for Pareto-Optimal Multinode Architecture Inproceedings European Signal Processing Conference (EUSIPCO), Lyon, France @inproceedings{Renaud__2024, title = {Multicore and Network Topology Codesign for Pareto-Optimal Multinode Architecture}, author = {Oph\'{e}lie Renaud and Karol Desnos and Erwan Raffin and Jean-Fran\c{c}ois Nezan}, year = {2024}, date = {2024-08-26}, booktitle = {European Signal Processing Conference (EUSIPCO)}, publisher = {IEEE}, address = {Lyon, France}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C49 | Chillet, Alice; Gerzaguet, Robin; Desnos, Karol; Gautier, Matthieu; Lohan, Elena Simona; Nogues, Erwan; Valkama, Mikko How to Design a Channel-Resilient Database for Radio Frequency Fingerprint Identification? Inproceedings International Conference on Communications (ICC), Denver, CO, USA @inproceedings{Chillet_How_2024, title = {How to Design a Channel-Resilient Database for Radio Frequency Fingerprint Identification?}, author = {Alice Chillet and Robin Gerzaguet and Karol Desnos and Matthieu Gautier and Elena Simona Lohan and Erwan Nogues and Mikko Valkama}, year = {2024}, date = {2024-06-09}, booktitle = {International Conference on Communications (ICC)}, publisher = {IEEE}, address = {Denver, CO, USA}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C48 | Wang, Sunrise; Gac, Nicolas; Miomandre, Hugo; Nezan, Jean-François; Desnos, Karol; Orieux, François Algorithmic and Hardware Design Space Exploration for Radio-Interferometric Algorithms Inproceedings Workshop on Design and Architectures for Signal and Image Processing (DASIP), @inproceedings{Wang_Algorithmic_2024, title = {Algorithmic and Hardware Design Space Exploration for Radio-Interferometric Algorithms}, author = {Sunrise Wang and Nicolas Gac and Hugo Miomandre and Jean-Fran\c{c}ois Nezan and Karol Desnos and Fran\c{c}ois Orieux}, year = {2024}, date = {2024-01-17}, booktitle = {Workshop on Design and Architectures for Signal and Image Processing (DASIP)}, publisher = {Springer}, series = {LNCS}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2023 |
|
C47 | Marie, Alban; Chen, Fu; Desnos, Karol; Zhou, Jinjia; Morin, Luce; Zhang, Lu Towards Machine Perception Aware Image Quality Assessment Inproceedings 25th International Workshop on Multimedia Signal Processing, Poitiers, France @inproceedings{Marie_Towards_2023, title = {Towards Machine Perception Aware Image Quality Assessment}, author = {Alban Marie and Fu Chen and Karol Desnos and Jinjia Zhou and Luce Morin and Lu Zhang }, year = {2023}, date = {2023-09-27}, booktitle = {25th International Workshop on Multimedia Signal Processing}, publisher = {IEEE}, address = {Poitiers, France}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C46 | Chillet, Alice; Boyer, Baptiste; Gerzaguet, Robin; Desnos, Karol; Gautier, Matthieu Tangled Program Graph for Radio-Frequency Fingerprint Identification Inproceedings International Symposium on Personal, Indoor and Mobile Radio Communications (PIMRC), Toronto, Canada @inproceedings{Chillet_Tangled_2023, title = {Tangled Program Graph for Radio-Frequency Fingerprint Identification}, author = {Alice Chillet and Baptiste Boyer and Robin Gerzaguet and Karol Desnos and Matthieu Gautier}, year = {2023}, date = {2023-09-05}, booktitle = {International Symposium on Personal, Indoor and Mobile Radio Communications (PIMRC)}, publisher = {IEEE}, address = {Toronto, Canada}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C45 | Renaud, Ophélie; Haggui, Naouel; Desnos, Karol; Nezan, Jean-François Automated Clustering and Pipelining of Dataflow Actors for Controlled Scheduling Complexity Inproceedings European Signal Processing Conference (EUSIPCO), Helsinki, Finland @inproceedings{Renaud_Automated_2023, title = {Automated Clustering and Pipelining of Dataflow Actors for Controlled Scheduling Complexity}, author = {Oph\'{e}lie Renaud and Naouel Haggui and Karol Desnos and Jean-Fran\c{c}ois Nezan}, year = {2023}, date = {2023-09-04}, booktitle = {European Signal Processing Conference (EUSIPCO)}, publisher = {IEEE}, address = {Helsinki, Finland}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C44 | Marie, Alban; Desnos, Karol; Morin, Luce; Zhang, Lu Evaluation of Image Quality Assessment Metrics for Semantic Segmentation in a Machine-to-Machine Communication Scenario Inproceedings International Conference on Quality of Multimedia Experience (QoMEX), Ghent, Belgium @inproceedings{Marie_Evaluation_2023, title = {Evaluation of Image Quality Assessment Metrics for Semantic Segmentation in a Machine-to-Machine Communication Scenario}, author = {Alban Marie and Karol Desnos and Luce Morin and Lu Zhang }, year = {2023}, date = {2023-06-20}, booktitle = {International Conference on Quality of Multimedia Experience (QoMEX)}, publisher = {IEEE}, address = {Ghent, Belgium}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C43 | Renaud, Ophélie; Gageot, Dylan; Desnos, Karol; Nezan, Jean-François SCAPE: HW-Aware Clustering of Dataflow Actors for Tunable Scheduling Complexity Inproceedings Workshop on Design and Architectures for Signal and Image Processing (DASIP), Toulouse, France @inproceedings{Renaud_SCAPE_2023, title = {SCAPE: HW-Aware Clustering of Dataflow Actors for Tunable Scheduling Complexity}, author = {Oph\'{e}lie Renaud and Dylan Gageot and Karol Desnos and Jean-Fran\c{c}ois Nezan}, year = {2023}, date = {2023-01-16}, booktitle = {Workshop on Design and Architectures for Signal and Image Processing (DASIP)}, publisher = {Springer}, address = {Toulouse, France }, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2022 |
|
C42 | Desnos, Karol; Bourgoin, Thomas; Sourbier, Nicolas; Dardaillon, Mickaël; Gesny, Olivier; Pelcat, Maxime Ultra-Fast Machine Learning Inference through C Code Generation for Tangled Program Graphs Inproceedings International Workshop on Signal Processing Systems (SiPS), Rennes, France @inproceedings{Desnos_Ultra_2022, title = {Ultra-Fast Machine Learning Inference through C Code Generation for Tangled Program Graphs}, author = {Karol Desnos and Thomas Bourgoin and Nicolas Sourbier and Micka\"{e}l Dardaillon and Olivier Gesny and Maxime Pelcat}, year = {2022}, date = {2022-11-02}, booktitle = {International Workshop on Signal Processing Systems (SiPS)}, publisher = {IEEE}, address = {Rennes, France}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C41 | Marie, Alban; Desnos, Karol; Morin, Luce; Zhang, Lu Video Coding for Machines: Large-Scale Evaluation of DNNs Robustness to Compression Artifacts for Semantic Segmentation Inproceedings International Workshop on Multimedia Signal Processing (MMSP), Shanghai, China @inproceedings{Marie_Video_2022, title = {Video Coding for Machines: Large-Scale Evaluation of DNNs Robustness to Compression Artifacts for Semantic Segmentation}, author = {Alban Marie and Karol Desnos and Luce Morin and Lu Zhang}, year = {2022}, date = {2022-09-26}, booktitle = {International Workshop on Multimedia Signal Processing (MMSP)}, publisher = {IEEE}, address = {Shanghai, China}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C40 | Sourbier, Nicolas; Bonnot, Justine; Gesny, Olivier; Majorczyk, Frédéric; Desnos, Karol; Guyet, Thomas; Pelcat, Maxime Imbalanced Classification with TPG Genetic Programming: Impact of Problem Imbalance and Selection Mechanisms Inproceedings Proceedings of the Genetic and Evolutionary Computation Conference Companion (GECCO), Boston, USA @inproceedings{Sourbier_Imbalanced_2022, title = {Imbalanced Classification with TPG Genetic Programming: Impact of Problem Imbalance and Selection Mechanisms}, author = {Nicolas Sourbier and Justine Bonnot and Olivier Gesny and Fr\'{e}d\'{e}ric Majorczyk and Karol Desnos and Thomas Guyet and Maxime Pelcat}, year = {2022}, date = {2022-07-09}, booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference Companion (GECCO)}, publisher = {ACM}, address = {Boston, USA}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C39 | Honorat, Alexandre; Bourgoin, Thomas; Miomandre, Hugo; Desnos, Karol; Menard, Daniel; Nezan, Jean-François Influence of Dataflow Graph Moldable Parameters on Optimization Criteria Inproceedings Workshop on Design and Architectures for Signal and Image Processing (DASIP), Budapest, Hungary @inproceedings{Honorat_Influence_2022, title = {Influence of Dataflow Graph Moldable Parameters on Optimization Criteria}, author = {Alexandre Honorat and Thomas Bourgoin and Hugo Miomandre and Karol Desnos and Daniel Menard and Jean-Fran\c{c}ois Nezan }, year = {2022}, date = {2022-06-20}, booktitle = {Workshop on Design and Architectures for Signal and Image Processing (DASIP)}, address = {Budapest, Hungary}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C38 | Marie, Alban; Desnos, Karol; Morin, Luce; Zhang, Lu Expert Training: Enhancing AI Resilience to Image Coding Artifacts Inproceedings Forthcoming Electronic Imaging, Image Processing: Algorithms and Systems XX, @inproceedings{Marie_Expert_2022, title = {Expert Training: Enhancing AI Resilience to Image Coding Artifacts}, author = {Alban Marie and Karol Desnos and Luce Morin and Lu Zhang}, year = {2022}, date = {2022-01-17}, booktitle = {Electronic Imaging, Image Processing: Algorithms and Systems XX}, publisher = {Society for Imaging Science and Technology}, keywords = {}, pubstate = {forthcoming}, tppubtype = {inproceedings} } |
2021 |
|
C37 | Desnos, Karol; Sourbier, Nicolas; Raumer, Pierre-Yves; Gesny, Olivier; Pelcat, Maxime GEGELATI: Lightweight Artificial Intelligence through Generic and Evolvable Tangled Program Graphs Inproceedings Workshop on Design and Architectures for Signal and Image Processing (DASIP), Budapest, Hungary @inproceedings{Desnos_GEGELATI_2021, title = {GEGELATI: Lightweight Artificial Intelligence through Generic and Evolvable Tangled Program Graphs}, author = {Karol Desnos and Nicolas Sourbier and Pierre-Yves Raumer and Olivier Gesny and Maxime Pelcat }, year = {2021}, date = {2021-01-18}, booktitle = {Workshop on Design and Architectures for Signal and Image Processing (DASIP)}, publisher = {ACM}, address = {Budapest, Hungary}, series = {International Conference Proceedings Series (ICPS)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2020 |
|
C36 | Honorat, Alexandre; Desnos, Karol; Dardaillon, Mickaël; Nezan, Jean-François A Fast Heuristic to Pipeline SDF Graphs Inproceedings International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), Pythagorion, Greece @inproceedings{Honorat_Fast_2020, title = {A Fast Heuristic to Pipeline SDF Graphs}, author = {Alexandre Honorat and Karol Desnos and Micka\"{e}l Dardaillon and Jean-Fran\c{c}ois Nezan}, doi = {10.1007/978-3-030-60939-9_10}, year = {2020}, date = {2020-07-05}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, volume = {12471}, publisher = {Springer}, address = {Pythagorion, Greece}, series = {LNCS}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C35 | Honorat, Alexandre; Desnos, Karol; Bhattacharyya, Shuvra S; Nezan, Jean-François Scheduling of Synchronous Dataflow Graphs with Partially Periodic Real-Time Constraints Inproceedings Conference on Real-Time Networks and Systems (RTNS), Paris, France @inproceedings{Honorat_Scheduling_2020, title = {Scheduling of Synchronous Dataflow Graphs with Partially Periodic Real-Time Constraints}, author = {Alexandre Honorat and Karol Desnos and Shuvra S. Bhattacharyya and Jean-Fran\c{c}ois Nezan}, doi = {10.1145/3394810.3394820}, year = {2020}, date = {2020-06-09}, booktitle = {Conference on Real-Time Networks and Systems (RTNS)}, pages = {22 - 33}, publisher = {ACM}, address = {Paris, France}, series = {ICPS}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C34 | Bonnot, Justine; Desnos, Karol; Menard, Daniel Fast Kriging-based Error Evaluation for Approximate Computing Systems Inproceedings Design, Automation and Test in Europe Conference (DATE), @inproceedings{Bonnot_Fast_2020, title = {Fast Kriging-based Error Evaluation for Approximate Computing Systems}, author = {Justine Bonnot and Karol Desnos and Daniel Menard}, doi = {10.23919/DATE48585.2020.9116320}, year = {2020}, date = {2020-03-09}, booktitle = {Design, Automation and Test in Europe Conference (DATE)}, publisher = {ACM}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2019 |
|
C33 | Palumbo, Francesca; Fanni, Tiziana; Sau, Carlo; Rodriguez, Alfonso; Madroñal, Daniel; Desnos, Karol; Morvan, Antoine; Pelcat, Maxime; Rubattu, Claudio; Lazcano, Raquel; Raffo, Luigi; de la Torre, Eduardo; Juarez, Eduardo; Sanz, Cesar; de la Roja, Pablo Sanchez Hardware/Software Self-Adaptation in CPS: the CERBERO Project Approach Inproceedings International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), Pythagorion, Greece @inproceedings{Palumbo_Hardware_2019, title = {Hardware/Software Self-Adaptation in CPS: the CERBERO Project Approach}, author = {Francesca Palumbo and Tiziana Fanni and Carlo Sau and Alfonso Rodriguez and Daniel Madro\~{n}al and Karol Desnos and Antoine Morvan and Maxime Pelcat and Claudio Rubattu and Raquel Lazcano and Luigi Raffo and Eduardo de la Torre and Eduardo Juarez and Cesar Sanz and Pablo Sanchez de la Roja}, doi = {10.1007/978-3-030-27562-4_30}, year = {2019}, date = {2019-07-07}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, volume = {11733}, pages = {416 - 428}, publisher = {Springer}, address = {Pythagorion, Greece}, series = {LNCS}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C32 | Honorat, Alexandre; Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François Modeling Nested For Loops with Explicit Parallelism in Synchronous Dataflow Graphs Inproceedings International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), Pythagorion, Greece @inproceedings{Honorat_Modeling_2019, title = {Modeling Nested For Loops with Explicit Parallelism in Synchronous Dataflow Graphs}, author = {Alexandre Honorat and Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan }, doi = {10.1007/978-3-030-27562-4_19}, year = {2019}, date = {2019-07-07}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, volume = {11733}, pages = {269 - 280}, publisher = {Springer}, address = {Pythagorion, Greece}, series = {LNCS}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C31 | Bonnot, Justine; Desnos, Karol; Menard, Daniel Accuracy Evaluation Based on Simulation for Finite Precition Systems Using Inferential Statistics Inproceedings International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, UK @inproceedings{Bonnot_Accuracy_2019, title = {Accuracy Evaluation Based on Simulation for Finite Precition Systems Using Inferential Statistics}, author = {Justine Bonnot and Karol Desnos and Daniel Menard}, doi = {10.1109/ICASSP.2019.8682631}, year = {2019}, date = {2019-05-12}, booktitle = {International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, address = {Brighton, UK}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2018 |
|
C30 | Hascoet, Julien; de Dinechin, Benoit Dupont; Desnos, Karol; Nezan, Jean-François A Distributed Framework for Low-Latency OpenVX over the RDMA NoC of a Clustered Manycore Inproceedings High Performance Extreme Computing Conference (HPEC), Waltham, MA, USA @inproceedings{Hascoet_Distributed_2018, title = {A Distributed Framework for Low-Latency OpenVX over the RDMA NoC of a Clustered Manycore}, author = {Julien Hascoet and Benoit Dupont de Dinechin and Karol Desnos and Jean-Fran\c{c}ois Nezan}, doi = {10.1109/HPEC.2018.8547736}, year = {2018}, date = {2018-09-25}, booktitle = {High Performance Extreme Computing Conference (HPEC)}, pages = {1-7}, publisher = {IEEE}, address = {Waltham, MA, USA}, abstract = {Open VX is a standard proposed by the Khronos group for cross-platform acceleration of computer vision and deep learning applications. OpenVX abstracts the target processor architecture complexity and automates the implementation of processing pipelines through high-level optimizations. While highly efficient OpenVX implementations exist for shared memory multi-core processors, targeting OpenVX to clustered manycore processors appears challenging. Indeed, such processors comprise multiple compute units or clusters, each fitted with an on-chip local memory shared by several cores. This paper describes an efficient implementation of OpenVX that targets clustered manycore processors. We propose a framework that includes computation graph analysis, kernel fusion techniques, RDMA-based tiling into local memories, optimization passes, and a distributed execution runtime. This framework is implemented and evaluated on the 2nd-generation Kalray MPPA ® clustered manycore processor. Experimental results show that super-linear speed-ups are obtained for multi-cluster execution by leveraging the bandwidth of on-chip memories and the capabilities of asynchronous RDMA engines.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Open VX is a standard proposed by the Khronos group for cross-platform acceleration of computer vision and deep learning applications. OpenVX abstracts the target processor architecture complexity and automates the implementation of processing pipelines through high-level optimizations. While highly efficient OpenVX implementations exist for shared memory multi-core processors, targeting OpenVX to clustered manycore processors appears challenging. Indeed, such processors comprise multiple compute units or clusters, each fitted with an on-chip local memory shared by several cores. This paper describes an efficient implementation of OpenVX that targets clustered manycore processors. We propose a framework that includes computation graph analysis, kernel fusion techniques, RDMA-based tiling into local memories, optimization passes, and a distributed execution runtime. This framework is implemented and evaluated on the 2nd-generation Kalray MPPA ® clustered manycore processor. Experimental results show that super-linear speed-ups are obtained for multi-cluster execution by leveraging the bandwidth of on-chip memories and the capabilities of asynchronous RDMA engines. |
C29 | Bonnot, Justine; Camus, Vincent; Desnos, Karol; Menard, Daniel CASSIS: Characterization with Adaptive Sample-Size Inferential Statistics Applied to Inexact Circuits Inproceedings European Signal Processing Conference (EUSIPCO), Roma, Italy @inproceedings{Bonnot_Cassis_2018, title = {CASSIS: Characterization with Adaptive Sample-Size Inferential Statistics Applied to Inexact Circuits}, author = {Justine Bonnot and Vincent Camus and Karol Desnos and Daniel Menard}, doi = {10.23919/EUSIPCO.2018.8553451}, year = {2018}, date = {2018-09-03}, booktitle = {European Signal Processing Conference (EUSIPCO)}, pages = {677-681}, publisher = {IEEE}, address = {Roma, Italy}, abstract = {To design faster and more energy-efficient systems, numerous inexact arithmetic operators have been proposed, generally obtained by modifying the logic structure of conventional circuits. However, as the quality of service of an application has to be ensured, these operators need to be precisely characterized to be usable in commercial or real-life applications. The characterization of inexact operators is commonly achieved with exhaustive or random bit-accurate gate-level simulations. However, for high word lengths, the time and memory required for such simulations become prohibitive. Besides, when simulating a random sample, no confidence information is given on the precision of the characterization. To overcome these limitations, CASSIS, a new characterization method for inexact operators is proposed. By exploiting statistical properties of the approximation error, the number of simulations needed for precise characterization can be drastically reduced. From user-defined confidence requirements, the proposed method computes the minimal number of samples to simulate to obtain the desired accuracy on the characterization. For 32-bit inexact adders, the proposed method reduces the number of simulated samples needed up to 50176.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } To design faster and more energy-efficient systems, numerous inexact arithmetic operators have been proposed, generally obtained by modifying the logic structure of conventional circuits. However, as the quality of service of an application has to be ensured, these operators need to be precisely characterized to be usable in commercial or real-life applications. The characterization of inexact operators is commonly achieved with exhaustive or random bit-accurate gate-level simulations. However, for high word lengths, the time and memory required for such simulations become prohibitive. Besides, when simulating a random sample, no confidence information is given on the precision of the characterization. To overcome these limitations, CASSIS, a new characterization method for inexact operators is proposed. By exploiting statistical properties of the approximation error, the number of simulations needed for precise characterization can be drastically reduced. From user-defined confidence requirements, the proposed method computes the minimal number of samples to simulate to obtain the desired accuracy on the characterization. For 32-bit inexact adders, the proposed method reduces the number of simulated samples needed up to 50176. |
C28 | Bonnot, Justine; Desnos, Karol; Menard, Daniel Algorithm-Level Approximation for Fast (or not) Embedded Stereovision Algorithm Inproceedings International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), Pythagorion, Greece @inproceedings{Bonnot_Algorithm_2018, title = {Algorithm-Level Approximation for Fast (or not) Embedded Stereovision Algorithm}, author = {Justine Bonnot and Karol Desnos and Daniel Menard}, url = {https://hal.archives-ouvertes.fr/hal-01879595}, doi = {10.1145/3229631.3229638}, year = {2018}, date = {2018-07-16}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, publisher = {ACM}, address = {Pythagorion, Greece}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C27 | Arrestier, Florian; Desnos, Karol; Pelcat, Maxime; Heulot, Julien; Juarez, Eduardo; Menard, Daniel Delays and States in Dataflow Models of Computation Inproceedings International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), Pythagorion, Greece @inproceedings{Arrestier_SAD_2018, title = {Delays and States in Dataflow Models of Computation}, author = {Florian Arrestier and Karol Desnos and Maxime Pelcat and Julien Heulot and Eduardo Juarez and Daniel Menard}, doi = {10.1145/3229631.3229645}, year = {2018}, date = {2018-07-15}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, pages = {47-54}, publisher = {ACM}, address = {Pythagorion, Greece}, abstract = {Dataflow Models of Computation (MoCs) have proven efficient means for modeling computational aspects of Cyber-Physical System (CPS). Over the years, diverse MoCs have been proposed that offer trade-offs between expressivity, conciseness, predictability, and reconfigurability. While being efficient for modeling coarse grain data and task parallelism, state-of-the-art dataflow MoCs suffer from a lack of semantics to benefit from the lower grained parallelism offered by hierarchically modeled nested loops. In this paper1, a meta-model called State-Aware Dataflow (SAD) is proposed that enhances a dataflow MoC, introducing new semantics to take advantage of such nested loop parallelism. SAD extends the semantics of the targeted MoC with unambiguous data persistence scope. The extended expressiveness and conciseness brought by the SAD meta-model are demonstrated with a reinforcement learning use-case.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Dataflow Models of Computation (MoCs) have proven efficient means for modeling computational aspects of Cyber-Physical System (CPS). Over the years, diverse MoCs have been proposed that offer trade-offs between expressivity, conciseness, predictability, and reconfigurability. While being efficient for modeling coarse grain data and task parallelism, state-of-the-art dataflow MoCs suffer from a lack of semantics to benefit from the lower grained parallelism offered by hierarchically modeled nested loops. In this paper1, a meta-model called State-Aware Dataflow (SAD) is proposed that enhances a dataflow MoC, introducing new semantics to take advantage of such nested loop parallelism. SAD extends the semantics of the targeted MoC with unambiguous data persistence scope. The extended expressiveness and conciseness brought by the SAD meta-model are demonstrated with a reinforcement learning use-case. |
C26 | Bonnot, Justine; Desnos, Karol; Menard, Daniel Stochastic Modeling to Accelerate Approximate Operators Simulation Inproceedings International Symposium on Circuits and Systems (ISCAS), @inproceedings{Bonnot_Stochastic_2018, title = {Stochastic Modeling to Accelerate Approximate Operators Simulation}, author = {Justine Bonnot and Karol Desnos and Daniel Menard}, url = {https://hal.archives-ouvertes.fr/hal-01812706/document}, year = {2018}, date = {2018-05-27}, booktitle = {International Symposium on Circuits and Systems (ISCAS)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C25 | Bonnot, Justine; Desnos, Karol; Pelcat, Maxime; Menard, Daniel A Fast and Fuzzy Functional Simulator of Inexact Arithmetic Operators for Approximate Computing Systems Inproceedings Great Lakes Symposium on VLSI (GLSVLSI), Chicago, USA @inproceedings{Bonnot_Fast_2018, title = {A Fast and Fuzzy Functional Simulator of Inexact Arithmetic Operators for Approximate Computing Systems}, author = {Justine Bonnot and Karol Desnos and Maxime Pelcat and Daniel Menard}, url = {https://hal.archives-ouvertes.fr/hal-01812719/document}, year = {2018}, date = {2018-05-23}, booktitle = {Great Lakes Symposium on VLSI (GLSVLSI)}, publisher = {ACM}, address = {Chicago, USA}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C24 | Madroñal, Daniel; Lazcano, Raquel; Morvan, Antoine; Salvador, Ruben; Desnos, Karol; Juarez, Eduardo; Sanz, Cesar Automatic Instrumentation of Dataflow Application using PAPI Inproceedings International Conference on Computing Frontiers, Ischia, Italy @inproceedings{Madronal_Automatic_2018, title = {Automatic Instrumentation of Dataflow Application using PAPI}, author = {Daniel Madro\~{n}al and Raquel Lazcano and Antoine Morvan and Ruben Salvador and Karol Desnos and Eduardo Juarez and Cesar Sanz }, doi = {10.1145/3203217.3209886}, year = {2018}, date = {2018-05-08}, booktitle = {International Conference on Computing Frontiers}, pages = {232--235}, publisher = {ACM}, address = {Ischia, Italy}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C23 | Rexha, Hergys; Lafond, Sebastien; Desnos, Karol Energy-Efficient Actor Execution for SDF Application on Heterogeneous Architectures Inproceedings International Conference on Parallel, Distributed, and Network-Based Processing (PDP), @inproceedings{Rexha_Energy_2018, title = {Energy-Efficient Actor Execution for SDF Application on Heterogeneous Architectures}, author = {Hergys Rexha and Sebastien Lafond and Karol Desnos}, doi = {10.1109/PDP2018.2018.00083}, year = {2018}, date = {2018-03-21}, booktitle = {International Conference on Parallel, Distributed, and Network-Based Processing (PDP)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C22 | Miomandre, Hugo; Hascoët, Julien; Desnos, Karol; Martin, Kevin; Dinechin, Benoît Dupont De; Nezan, Jean-François Embedded Runtime for Reconfigurable Dataflow Graphs on Manycore Architectures Inproceedings PARMA-DITAM, Manchester, United Kingdom @inproceedings{Miomandre_Embedded_2018, title = {Embedded Runtime for Reconfigurable Dataflow Graphs on Manycore Architectures}, author = {Hugo Miomandre and Julien Hasco\"{e}t and Karol Desnos and Kevin Martin and Beno\^{i}t Dupont De Dinechin and Jean-Fran\c{c}ois Nezan}, url = {https://hal.archives-ouvertes.fr/hal-01704702}, year = {2018}, date = {2018-01-23}, booktitle = {PARMA-DITAM}, address = {Manchester, United Kingdom}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2017 |
|
C21 | Hascoët, Julien; Desnos, Karol; Nezan, Jean-François; de Dinechin, Benoit Dupont Hierarchical Dataflow Model for efficient programming of clustered manycore processors Inproceedings International Conference on Application-specific Systems, Architectures and Processors (ASAP), Seattle, WA, USA @inproceedings{Hascoet_Hierarchical_2017, title = {Hierarchical Dataflow Model for efficient programming of clustered manycore processors}, author = {Julien Hasco\"{e}t and Karol Desnos and Jean-Fran\c{c}ois Nezan and Benoit Dupont de Dinechin}, editor = {IEEE}, url = {https://hal.archives-ouvertes.fr/hal-01564019/file/ASAP-2017-Hierarchical-Dataflow-Model.pdf}, doi = {10.1109/ASAP.2017.7995270}, year = {2017}, date = {2017-07-01}, booktitle = {International Conference on Application-specific Systems, Architectures and Processors (ASAP)}, pages = {137-142}, address = {Seattle, WA, USA}, abstract = {Programming Multiprocessor Systems-on-Chips (MPSoCs) with hundreds of heterogeneous Processing Elements (PEs), complex memory architectures, and Networks-on-Chips (NoCs) remains a challenge for embedded system designers. Dataflow Models of Computation (MoCs) are increasingly used for developing parallel applications as their high-level of abstraction eases the automation of mapping, task scheduling and memory allocation onto MPSoCs. This paper introduces a technique for deploying hierarchical dataflow graphs efficiently onto MPSoC. The proposed technique exploits different granularity of dataflow parallelism to generate both NoC-based communications and nested OpenMP loops. Deployment of an image processing application on a many-core MPSoC results in speedups of up to 58.7 compared to the sequential execution.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Programming Multiprocessor Systems-on-Chips (MPSoCs) with hundreds of heterogeneous Processing Elements (PEs), complex memory architectures, and Networks-on-Chips (NoCs) remains a challenge for embedded system designers. Dataflow Models of Computation (MoCs) are increasingly used for developing parallel applications as their high-level of abstraction eases the automation of mapping, task scheduling and memory allocation onto MPSoCs. This paper introduces a technique for deploying hierarchical dataflow graphs efficiently onto MPSoC. The proposed technique exploits different granularity of dataflow parallelism to generate both NoC-based communications and nested OpenMP loops. Deployment of an image processing application on a many-core MPSoC results in speedups of up to 58.7 compared to the sequential execution. |
C20 | Suriano, Leonardo; Rodriguez, Alfonso; Desnos, Karol; Pelcat, Maxime; de la Torre, Eduardo Analysis of a Heterogeneous Multi-Core, Multi-HW-Accelerator-Based System Designed Using PREESM and SDSoC Inproceedings International Symposium on Reconfigurable Communication-centric Systems-on-Chip (ReCoSoC), Madrid, Spain @inproceedings{Suriano_Analysis_2017, title = {Analysis of a Heterogeneous Multi-Core, Multi-HW-Accelerator-Based System Designed Using PREESM and SDSoC}, author = {Leonardo Suriano and Alfonso Rodriguez and Karol Desnos and Maxime Pelcat and Eduardo de la Torre}, doi = {10.1109/ReCoSoC.2017.8016151}, year = {2017}, date = {2017-07-01}, booktitle = {International Symposium on Reconfigurable Communication-centric Systems-on-Chip (ReCoSoC)}, pages = {1-7}, address = {Madrid, Spain}, abstract = {Nowadays, new heterogeneous system technologies are flooding the market: through the past years, it is possible to observe the move from single CPUs to multi-core devices featuring CPUs, GPUs and large FPGAs, such as Xilinx Zynq-7000 or Zynq UltraScale+ MPSoC architectures. In this context, providing developers with transparent deployment capabilities to efficiently execute different applications on such complex devices is important. In this paper, a design flow that combines, on one side, PREESM, a dataflow-based prototyping framework and, on the other side, Xilinx SDSoC, an HLS-based framework to automatically generate and manage hardware accelerators, is presented. This integration leverages the automatic, static task scheduling obtained from PREESM with asynchronous invocations that trigger the parallel execution of multiple hardware accelerators from some of their associated sequential software threads. An image processing application is used as a proof of concept, showing the interoperability possibilities of both tools, the level of design automation achieved and, for the resulting computing architecture, the good performance scalability according to the number of accelerators and sw threads.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Nowadays, new heterogeneous system technologies are flooding the market: through the past years, it is possible to observe the move from single CPUs to multi-core devices featuring CPUs, GPUs and large FPGAs, such as Xilinx Zynq-7000 or Zynq UltraScale+ MPSoC architectures. In this context, providing developers with transparent deployment capabilities to efficiently execute different applications on such complex devices is important. In this paper, a design flow that combines, on one side, PREESM, a dataflow-based prototyping framework and, on the other side, Xilinx SDSoC, an HLS-based framework to automatically generate and manage hardware accelerators, is presented. This integration leverages the automatic, static task scheduling obtained from PREESM with asynchronous invocations that trigger the parallel execution of multiple hardware accelerators from some of their associated sequential software threads. An image processing application is used as a proof of concept, showing the interoperability possibilities of both tools, the level of design automation achieved and, for the resulting computing architecture, the good performance scalability according to the number of accelerators and sw threads. |
C19 | Deroui, Hamza; Desnos, Karol; Nezan, Jean-François; Munier-Kordon, Alix Throughput Evaluation of DSP Applications based on Hierarchical Dataflow Models Inproceedings International Symposium on Circuits and Systems (ISCAS), Baltimore, MD, USA @inproceedings{Deroui_Throughput_2017, title = {Throughput Evaluation of DSP Applications based on Hierarchical Dataflow Models}, author = {Hamza Deroui and Karol Desnos and Jean-Fran\c{c}ois Nezan and Alix Munier-Kordon}, url = {https://hal.archives-ouvertes.fr/hal-01514641/file/ISCAS2017%20_published.pdf}, doi = {10.1109/ISCAS.2017.8050774}, year = {2017}, date = {2017-01-01}, booktitle = {International Symposium on Circuits and Systems (ISCAS)}, address = {Baltimore, MD, USA}, abstract = {Synchronous Dataflow (SDF) is the most commonly used dataflow Model of Computation (MoC) for the specification of Digital Signal Processing (DSP) systems. The Interface-Based SDF (IBSDF) model extends the semantics of the SDF model by introducing a graph composition mechanism based on hierarchical interfaces. Computing the throughput of an application is essential when designing DSP systems. This article introduces and assesses new methods to compute the throughput of DSP applications specified with IBSDF graphs. First, a basic method inspired from the state-of-the-art techniques that relies on a transformation of the IBSDF graph to an equivalent non-hierarchical graph of potentially exponential size. Second, a new technique that takes advantage of the hierarchy semantics of the IBSDF MoC to speed-up the throughput evaluation without any conversion. The proposed technique makes it possible to compute the throughput of large IBSDF graphs in a few milliseconds, where the basic method fails to produce a result.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Synchronous Dataflow (SDF) is the most commonly used dataflow Model of Computation (MoC) for the specification of Digital Signal Processing (DSP) systems. The Interface-Based SDF (IBSDF) model extends the semantics of the SDF model by introducing a graph composition mechanism based on hierarchical interfaces. Computing the throughput of an application is essential when designing DSP systems. This article introduces and assesses new methods to compute the throughput of DSP applications specified with IBSDF graphs. First, a basic method inspired from the state-of-the-art techniques that relies on a transformation of the IBSDF graph to an equivalent non-hierarchical graph of potentially exponential size. Second, a new technique that takes advantage of the hierarchy semantics of the IBSDF MoC to speed-up the throughput evaluation without any conversion. The proposed technique makes it possible to compute the throughput of large IBSDF graphs in a few milliseconds, where the basic method fails to produce a result. |
C18 | Deroui, Hamza; Desnos, Karol; Nezan, Jean-François; Munier-Kordon, Alix Relaxed Subgraph Execution Model for the Throughput Evaluation of IBSDF Graphs Inproceedings International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), Samos, Greece @inproceedings{Deroui_Relaxed_2017, title = {Relaxed Subgraph Execution Model for the Throughput Evaluation of IBSDF Graphs}, author = {Hamza Deroui and Karol Desnos and Jean-Fran\c{c}ois Nezan and Alix Munier-Kordon}, url = {https://hal.archives-ouvertes.fr/hal-01569593/file/32_Final_Paper.pdf}, year = {2017}, date = {2017-01-01}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, address = {Samos, Greece}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
C17 | Georgakarakos, Georgios; Kanur, Sudeep; Lilius, Johan; Desnos, Karol Task-based Execution of Synchronous Dataflow Graphs for Scalable Multicore Computing Inproceedings International Workshop on Signal Processing Systems (SiPS), Lorient, France @inproceedings{Georgakarakos_Task_2017, title = {Task-based Execution of Synchronous Dataflow Graphs for Scalable Multicore Computing}, author = {Georgios Georgakarakos and Sudeep Kanur and Johan Lilius and Karol Desnos}, editor = {IEEE}, year = {2017}, date = {2017-01-01}, booktitle = {International Workshop on Signal Processing Systems (SiPS)}, address = {Lorient, France}, abstract = {Dataflow models of computation have early on been acknowledged as an attractive methodology to describe parallel algorithms, hence they have become highly relevant for programming in the current multicore processor era. While several frameworks provide tools to create dataflow descriptions of algorithms, generating parallel code for programmable processors is still sub-optimal due to the scheduling overheads and the semantics gap when expressing parallelism with conventional programming languages featuring threads. In this paper we propose an optimization of the parallel code generation process by combining dataflow and task programming models. We develop a task-based code generator for PREESM, a dataflow-based prototyping framework, in order to deploy algorithms described as synchronous dataflow graphs on multicore platforms. Experimental performance comparison of our task generated code against typical thread-based code shows that our approach removes significant scheduling and synchronization overheads while maintaining similar (and occasionally improving) application throughput.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Dataflow models of computation have early on been acknowledged as an attractive methodology to describe parallel algorithms, hence they have become highly relevant for programming in the current multicore processor era. While several frameworks provide tools to create dataflow descriptions of algorithms, generating parallel code for programmable processors is still sub-optimal due to the scheduling overheads and the semantics gap when expressing parallelism with conventional programming languages featuring threads. In this paper we propose an optimization of the parallel code generation process by combining dataflow and task programming models. We develop a task-based code generator for PREESM, a dataflow-based prototyping framework, in order to deploy algorithms described as synchronous dataflow graphs on multicore platforms. Experimental performance comparison of our task generated code against typical thread-based code shows that our approach removes significant scheduling and synchronization overheads while maintaining similar (and occasionally improving) application throughput. |
2016 |
|
C16 | Pelcat, Maxime; Desnos, Karol; Maggiani, Luca; Liu, Yanzhou; Heulot, Julien; Nezan, Jean-François; Bhattacharyya, Shuvra S Models of Architecture: Reproducible Efficiency Evaluation for Signal Processing Systems Inproceedings International Workshop on Signal Processing Systems (SiPS), Dallas, TX, USA @inproceedings{Pelcat_Models_2016, title = {Models of Architecture: Reproducible Efficiency Evaluation for Signal Processing Systems}, author = {Maxime Pelcat and Karol Desnos and Luca Maggiani and Yanzhou Liu and Julien Heulot and Jean-Fran\c{c}ois Nezan and Shuvra S Bhattacharyya}, editor = {IEEE}, url = {https://hal.archives-ouvertes.fr/hal-01390508/file/sips_moa.pdf}, doi = {10.1109/SiPS.2016.29}, year = {2016}, date = {2016-10-01}, booktitle = {International Workshop on Signal Processing Systems (SiPS)}, pages = {121-126}, address = {Dallas, TX, USA}, abstract = {The current trend in high performance and embedded signal processing consists of designing increasingly complex heterogeneous hardware architectures with non-uniform communication resources. In order to take hardware and software design decisions, early evaluations of the system non-functional properties are needed. These evaluations of system efficiency require high-level information on both the algorithms and the architecture. In this paper, we define the notion of Model of Architecture (MoA) and study the combination of a Model of Computation (MoC) and an MoA to provide a design space exploration environment for the study of the algorithmic and architectural choices. A cost is computed from the mapping of an application, represented by a model conforming a MoC onto an architecture represented by a model conforming an MoA. The cost is composed of a processing-related part and a communication-related part. It is an abstract scalar value to be minimized and can represent any non-functional requirement of a system such as memory, energy, throughput or latency.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The current trend in high performance and embedded signal processing consists of designing increasingly complex heterogeneous hardware architectures with non-uniform communication resources. In order to take hardware and software design decisions, early evaluations of the system non-functional properties are needed. These evaluations of system efficiency require high-level information on both the algorithms and the architecture. In this paper, we define the notion of Model of Architecture (MoA) and study the combination of a Model of Computation (MoC) and an MoA to provide a design space exploration environment for the study of the algorithmic and architectural choices. A cost is computed from the mapping of an application, represented by a model conforming a MoC onto an architecture represented by a model conforming an MoA. The cost is composed of a processing-related part and a communication-related part. It is an abstract scalar value to be minimized and can represent any non-functional requirement of a system such as memory, energy, throughput or latency. |
C15 | Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François; Aridhi, Slaheddine Distributed Memory Allocation Technique for Synchronous Dataflow Graphs Inproceedings International Workshop on Signal Processing Systems (SiPS), Dallas, TX, USA @inproceedings{Desnos_Distributed_2016, title = {Distributed Memory Allocation Technique for Synchronous Dataflow Graphs}, author = {Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, url = {https://hal.archives-ouvertes.fr/hal-01390486/document}, doi = {10.1109/SiPS.2016.16}, year = {2016}, date = {2016-10-01}, booktitle = {International Workshop on Signal Processing Systems (SiPS)}, pages = {45-50}, address = {Dallas, TX, USA}, abstract = {This paper introduces a new distributed memory allocation technique for applications modeled with Synchronous Dataflow (SDF) graphs. This technique builds on a State-of-the-Art shared memory allocation technique based on a weighted graph, called Memory Exclusion Graph (MEG). A MEG captures the memory reuse opportunities between memory objects that must be allocated before the execution of an SDF graph. The algorithms detailed in this paper enable a single MEG to be split into separate MEGs, each of which is associated with a memory bank accessible only by one core of the architecture. The proposed technique is implemented within a rapid prototyping framework and is evaluated by deploying real computer vision applications on a Multiprocessor Systemon-Chip (MPSoC). Results show a systematic performance improvement due to better memory usage, with application speedups ranging from 2% up to 380%.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper introduces a new distributed memory allocation technique for applications modeled with Synchronous Dataflow (SDF) graphs. This technique builds on a State-of-the-Art shared memory allocation technique based on a weighted graph, called Memory Exclusion Graph (MEG). A MEG captures the memory reuse opportunities between memory objects that must be allocated before the execution of an SDF graph. The algorithms detailed in this paper enable a single MEG to be split into separate MEGs, each of which is associated with a memory bank accessible only by one core of the architecture. The proposed technique is implemented within a rapid prototyping framework and is evaluated by deploying real computer vision applications on a Multiprocessor Systemon-Chip (MPSoC). Results show a systematic performance improvement due to better memory usage, with application speedups ranging from 2% up to 380%. |
C14 | Ammar, Manel; Baklouti, Mouna; Pelcat, Maxime; Desnos, Karol; Mohamed, Abid Off-Line DVFS Integration in MDE-Based Design Space Exploration Framework for MP2SoC Systems Inproceedings International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises (WETICE), Paris, France @inproceedings{Ammar_Line_2016, title = {Off-Line DVFS Integration in MDE-Based Design Space Exploration Framework for MP2SoC Systems}, author = {Manel Ammar and Mouna Baklouti and Maxime Pelcat and Karol Desnos and Abid Mohamed}, editor = {IEEE}, doi = {10.1109/WETICE.2016.43}, year = {2016}, date = {2016-06-01}, booktitle = {International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises (WETICE)}, pages = {160-165}, address = {Paris, France}, abstract = {As the speed metric of Massively Parallel Multi-Processors System-on-Chip (MP2SoC) systems has increased over time, another metric has become more important: power consumption. Finding a tradeoff between power consumption and performance early in the design flow of MP2SoC systems in order to satisfy time-to-market is the design challenge of Electronic Design Automation (EDA) tools. This paper presents a Design Space Exploration (DSE) framework, named Energy-Aware Rapid Design of MP2SoC (EWARDS), aiming at exploring the performance and power capabilities of modern homogenous MP2SoC systems at design time using Model-Driven Engineering (MDE) techniques. The proposed framework extends the Modeling and Analysis of Real-Time and Embedded systems (MARTE)profile with power aspects of MP2SoC systems providing a high-level design entry. In addition, EWARDS integrates an energy-aware scheduler that strives to balance performance and energy savings by combining clustering scheduling algorithm with off-line Dynamic Voltage and Frequency Scaling (DVFS) power management techniques.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } As the speed metric of Massively Parallel Multi-Processors System-on-Chip (MP2SoC) systems has increased over time, another metric has become more important: power consumption. Finding a tradeoff between power consumption and performance early in the design flow of MP2SoC systems in order to satisfy time-to-market is the design challenge of Electronic Design Automation (EDA) tools. This paper presents a Design Space Exploration (DSE) framework, named Energy-Aware Rapid Design of MP2SoC (EWARDS), aiming at exploring the performance and power capabilities of modern homogenous MP2SoC systems at design time using Model-Driven Engineering (MDE) techniques. The proposed framework extends the Modeling and Analysis of Real-Time and Embedded systems (MARTE)profile with power aspects of MP2SoC systems providing a high-level design entry. In addition, EWARDS integrates an energy-aware scheduler that strives to balance performance and energy savings by combining clustering scheduling algorithm with off-line Dynamic Voltage and Frequency Scaling (DVFS) power management techniques. |
C13 | Ammar, Manel; Baklouti, Mouna; Pelcat, Maxime; Desnos, Karol; Mohamed, Abid On Exploiting Energy-Aware Scheduling Algorithms for MDE-Based Design Space Exploration of MP2SoC Inproceedings International Conference on Parallel, Distributed, and Network-Based Processing (PDP), Heraklion, Greece @inproceedings{Ammar_Exploiting_2016, title = {On Exploiting Energy-Aware Scheduling Algorithms for MDE-Based Design Space Exploration of MP2SoC}, author = {Manel Ammar and Mouna Baklouti and Maxime Pelcat and Karol Desnos and Abid Mohamed}, url = {https://hal.archives-ouvertes.fr/hal-01305971/file/ammar_exploiting_2016.pdf}, doi = {10.1109/PDP.2016.110}, year = {2016}, date = {2016-02-01}, booktitle = {International Conference on Parallel, Distributed, and Network-Based Processing (PDP)}, pages = {643-650}, address = {Heraklion, Greece}, abstract = {Massively Parallel Multi-Processors System-on-Chip (MP2SoC) architectures have been widely deployed to run challenging high-performance computations. However, the ever greater demand for energy efficiency fosters energy budgeting in MP2SoC systems. Nowadays, having the appropriate Electronic Design Automation (EDA) tools for power estimation is mandatory. The major challenge for the design of such tools is to reach a better tradeoff between accuracy and time-to-market. This paper presents a Model Driven Engineering (MDE)-based energy-aware Design Space Exploration (DSE) approach allowing the designer to take the power consumption criterion into account early in the design flow. The originality of this approach is that it integrates the Energy-Aware Duplication (EAD) algorithm that strives to balance schedule lengths and energy savings by considering the most important sources of energy consumption in MP2SoC: the massive number of processing elements (PE) and the high-speed Network-on-Chip (NoC). To demonstrate the effectiveness of the proposed approach, we conducted experiments using the H.263 encoder application. The obtained results demonstrated that EAD can effectively save energy in MP2SoC systems. They also showed that our MDE approach is capable of accelerating the DSE process to make early energy-efficient design decisions.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Massively Parallel Multi-Processors System-on-Chip (MP2SoC) architectures have been widely deployed to run challenging high-performance computations. However, the ever greater demand for energy efficiency fosters energy budgeting in MP2SoC systems. Nowadays, having the appropriate Electronic Design Automation (EDA) tools for power estimation is mandatory. The major challenge for the design of such tools is to reach a better tradeoff between accuracy and time-to-market. This paper presents a Model Driven Engineering (MDE)-based energy-aware Design Space Exploration (DSE) approach allowing the designer to take the power consumption criterion into account early in the design flow. The originality of this approach is that it integrates the Energy-Aware Duplication (EAD) algorithm that strives to balance schedule lengths and energy savings by considering the most important sources of energy consumption in MP2SoC: the massive number of processing elements (PE) and the high-speed Network-on-Chip (NoC). To demonstrate the effectiveness of the proposed approach, we conducted experiments using the H.263 encoder application. The obtained results demonstrated that EAD can effectively save energy in MP2SoC systems. They also showed that our MDE approach is capable of accelerating the DSE process to make early energy-efficient design decisions. |
C12 | Ammar, Manel; Baklouti, Mouna; Pelcat, Maxime; Desnos, Karol; Mohamed, Abid Automatic Generation of S-LAM Descriptions from UML/MARTE for the DSE of Massively Parallel Embedded Systems Inproceedings Software Engineering, Artificial Intelligence, Networking and Parallel/Distributed Computing (SNPD), Takamatsu, Japan. @inproceedings{Ammar_Automatic_2016, title = {Automatic Generation of S-LAM Descriptions from UML/MARTE for the DSE of Massively Parallel Embedded Systems}, author = {Manel Ammar and Mouna Baklouti and Maxime Pelcat and Karol Desnos and Abid Mohamed}, url = {https://hal.archives-ouvertes.fr/hal-01252511/file/ammar_automatic_2015.pdf}, doi = {10.1007/978-3-319-23509-7_14}, year = {2016}, date = {2016-01-01}, booktitle = {Software Engineering, Artificial Intelligence, Networking and Parallel/Distributed Computing (SNPD)}, pages = {195--211}, publisher = {Springer International Publishing}, address = {Takamatsu, Japan.}, abstract = {Massively ParallelMulti-ProcessorsSystem-on-Chip (MP2SoC) architectures require efficient programming models and tools to deal with the massive parallelism present within the architecture. In this paper, we propose a tool which automates the generation of the System-Level Architecture Model (S-LAM) from a Unified Modeling Language-based (UML) model annotated with the Modeling and Analysis of Real-Time and Embedded Systems (MARTE) profile. The S-LAM-based description of the MP2SoC architecture is conformed to the IP-XACT standard. The integration of our generator within a co-design framework provides the specification of the whole MP2SoC system using UML and MARTE. Then, gradual refinements allow the execution of a rapid prototyping process.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Massively ParallelMulti-ProcessorsSystem-on-Chip (MP2SoC) architectures require efficient programming models and tools to deal with the massive parallelism present within the architecture. In this paper, we propose a tool which automates the generation of the System-Level Architecture Model (S-LAM) from a Unified Modeling Language-based (UML) model annotated with the Modeling and Analysis of Real-Time and Embedded Systems (MARTE) profile. The S-LAM-based description of the MP2SoC architecture is conformed to the IP-XACT standard. The integration of our generator within a co-design framework provides the specification of the whole MP2SoC system using UML and MARTE. Then, gradual refinements allow the execution of a rapid prototyping process. |
C11 | Lazcano, Raquel; Madronal, Daniel; Desnos, Karol; Pelcat, Maxime; Guerra, Raul; Lopez, Sebastian; Juarez, Eduardo; Sanz, Cesar Parallelism Exploitation of a Dimensionality Reduction Algorithm Applied to Hyperspectral Images Inproceedings Conference on Design and Architectures for Signal and Image Processing (DASIP), Rennes, France @inproceedings{Lazcano_Parallelism_2016a, title = {Parallelism Exploitation of a Dimensionality Reduction Algorithm Applied to Hyperspectral Images}, author = {Raquel Lazcano and Daniel Madronal and Karol Desnos and Maxime Pelcat and Raul Guerra and Sebastian Lopez and Eduardo Juarez and Cesar Sanz}, url = {https://hal.archives-ouvertes.fr/hal-01415948}, year = {2016}, date = {2016-01-01}, booktitle = {Conference on Design and Architectures for Signal and Image Processing (DASIP)}, address = {Rennes, France}, abstract = {This paper presents a study of the parallelism of a dimensionality reduction algorithm based on the PCA (Principal Component Analysis) algorithm. The study aims at porting hyperspectral image processing on innovative parallel platforms and focuses on optimizing the algorithm execution to achieve realtime processing. Real-time processing constitutes a difficult requirement for processing hyperspectral images due to the high amount of data that they incorporate. An analysis of the levels of parallelism of the different stages of the PCA algorithm is performed to exploit the parallelism offered by a manycore MPPA (Massively Parallel Processor Array) platform, a system that assembles 256 cores in 16 different clusters. A detailed analysis of the Jacobi stage is presented. The Jacobi stage is a computation and resource intensive procedure which replaces the conventional eigenvectors calculation of the PCA algorithm. Experiments on several datasets of spectral signatures extracted from the USGS (United States Geological Survey) database show preliminary speedups between 2.02 and 9.30 on the global algorithm running onto a 16-core cluster when compared with its counterpart sequential version. Parallelism limitations and future directions are discussed to progress towards the full use of a manycore processor for the real-time processing of hyperspectral images.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper presents a study of the parallelism of a dimensionality reduction algorithm based on the PCA (Principal Component Analysis) algorithm. The study aims at porting hyperspectral image processing on innovative parallel platforms and focuses on optimizing the algorithm execution to achieve realtime processing. Real-time processing constitutes a difficult requirement for processing hyperspectral images due to the high amount of data that they incorporate. An analysis of the levels of parallelism of the different stages of the PCA algorithm is performed to exploit the parallelism offered by a manycore MPPA (Massively Parallel Processor Array) platform, a system that assembles 256 cores in 16 different clusters. A detailed analysis of the Jacobi stage is presented. The Jacobi stage is a computation and resource intensive procedure which replaces the conventional eigenvectors calculation of the PCA algorithm. Experiments on several datasets of spectral signatures extracted from the USGS (United States Geological Survey) database show preliminary speedups between 2.02 and 9.30 on the global algorithm running onto a 16-core cluster when compared with its counterpart sequential version. Parallelism limitations and future directions are discussed to progress towards the full use of a manycore processor for the real-time processing of hyperspectral images. |
C10 | Lazcano, Raquel; Sidrach-Cardona, Ignacio; Madroñal, Daniel; Desnos, Karol; Pelcat, Maxime; Juarez, Eduardo; Sanz, Cesar Parallelism exploitation of a PCA algorithm for hyperspectral images using RVC-CAL Inproceedings SPIE Remote Sensing, Edimburgh, UK @inproceedings{Lazcano_Parallelism_2016, title = {Parallelism exploitation of a PCA algorithm for hyperspectral images using RVC-CAL}, author = {Raquel Lazcano and Ignacio Sidrach-Cardona and Daniel Madro\~{n}al and Karol Desnos and Maxime Pelcat and Eduardo Juarez and Cesar Sanz}, doi = {10.1117/12.2241643}, year = {2016}, date = {2016-01-01}, booktitle = {SPIE Remote Sensing}, address = {Edimburgh, UK}, abstract = {Hyperspectral imaging (HI) collects information from across the electromagnetic spectrum, covering a wide range of wavelengths. The tremendous development of this technology within the field of remote sensing has led to new research fields, such as cancer automatic detection or precision agriculture, but has also increased the performance requirements of the applications. For instance, strong time constraints need to be respected, since many applications imply real-time responses. Achieving real-time is a challenge, as hyperspectral sensors generate high volumes of data to process. Thus, so as to achieve this requisite, first the initial image data needs to be reduced by discarding redundancies and keeping only useful information. Then, the intrinsic parallelism in a system specification must be explicitly highlighted. In this paper, the PCA (Principal Component Analysis) algorithm is implemented using the RVC-CAL dataflow language, which specifies a system as a set of blocks or actors and allows its parallelization by scheduling the blocks over different processing units. Two implementations of PCA for hyperspectral images have been compared when aiming at obtaining the first few principal components: first, the algorithm has been implemented using the Jacobi approach for obtaining the eigenvectors; thereafter, the NIPALS-PCA algorithm, which approximates the principal components iteratively, has also been studied. Both implementations have been compared in terms of accuracy and computation time; then, the parallelization of both models has also been analyzed. These comparisons show promising results in terms of computation time and parallelization: the performance of the NIPALS-PCA algorithm is clearly better when only the first principal component is achieved, while the partitioning of the algorithm execution over several cores shows an important speedup for the PCA-Jacobi. Thus, experimental results show the potential of RVC\textendashCAL to automatically generate implementations which process in real-time the large volumes of information of hyperspectral sensors, as it provides advanced semantics for exploiting system parallelization.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Hyperspectral imaging (HI) collects information from across the electromagnetic spectrum, covering a wide range of wavelengths. The tremendous development of this technology within the field of remote sensing has led to new research fields, such as cancer automatic detection or precision agriculture, but has also increased the performance requirements of the applications. For instance, strong time constraints need to be respected, since many applications imply real-time responses. Achieving real-time is a challenge, as hyperspectral sensors generate high volumes of data to process. Thus, so as to achieve this requisite, first the initial image data needs to be reduced by discarding redundancies and keeping only useful information. Then, the intrinsic parallelism in a system specification must be explicitly highlighted.<p> </p> In this paper, the PCA (Principal Component Analysis) algorithm is implemented using the RVC-CAL dataflow language, which specifies a system as a set of blocks or actors and allows its parallelization by scheduling the blocks over different processing units. Two implementations of PCA for hyperspectral images have been compared when aiming at obtaining the first few principal components: first, the algorithm has been implemented using the Jacobi approach for obtaining the eigenvectors; thereafter, the NIPALS-PCA algorithm, which approximates the principal components iteratively, has also been studied. Both implementations have been compared in terms of accuracy and computation time; then, the parallelization of both models has also been analyzed.<p> </p> These comparisons show promising results in terms of computation time and parallelization: the performance of the NIPALS-PCA algorithm is clearly better when only the first principal component is achieved, while the partitioning of the algorithm execution over several cores shows an important speedup for the PCA-Jacobi. Thus, experimental results show the potential of RVC–CAL to automatically generate implementations which process in real-time the large volumes of information of hyperspectral sensors, as it provides advanced semantics for exploiting system parallelization. |
2015 |
|
C9 | Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François; Aridhi, Slaheddine Buffer merging technique for minimizing memory footprints of Synchronous Dataflow specifications Inproceedings International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brisbane, Australia @inproceedings{Desnos_Buffer_2015, title = {Buffer merging technique for minimizing memory footprints of Synchronous Dataflow specifications}, author = {Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, url = {https://hal.archives-ouvertes.fr/hal-01146340/file/20150423-ICASSP15_Desnos_perso%20%281%29.pdf}, doi = {10.1109/ICASSP.2015.7178142}, year = {2015}, date = {2015-04-01}, booktitle = {International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {1111-1115}, address = {Brisbane, Australia}, abstract = {This paper introduces and assesses a new technique to minimize the memory footprints of Digital Signal Processing (DSP) applications specified with Synchronous Dataflow (SDF) graphs and implemented on shared-memory Multiprocessor Systems-on-Chips (MPSoCs). In addition to the SDF specification, which captures data dependencies between coarse-grained tasks called actors, the proposed technique relies on two optional inputs abstracting the internal data dependencies of actors: annotations of the ports of SDF actors, and script-based specifications of merging opportunities between input and output buffers of actors. An automated optimization process is used to exploit these buffer merging opportunities and to minimize the memory footprints of applications. Experimental results on a computer vision application show a reduction of the memory footprint by 34% compared to state-of-the-art minimization techniques.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper introduces and assesses a new technique to minimize the memory footprints of Digital Signal Processing (DSP) applications specified with Synchronous Dataflow (SDF) graphs and implemented on shared-memory Multiprocessor Systems-on-Chips (MPSoCs). In addition to the SDF specification, which captures data dependencies between coarse-grained tasks called actors, the proposed technique relies on two optional inputs abstracting the internal data dependencies of actors: annotations of the ports of SDF actors, and script-based specifications of merging opportunities between input and output buffers of actors. An automated optimization process is used to exploit these buffer merging opportunities and to minimize the memory footprints of applications. Experimental results on a computer vision application show a reduction of the memory footprint by 34% compared to state-of-the-art minimization techniques. |
2014 |
|
C8 | Desnos, Karol; Assad, Safwan El; Arlicot, Aurore; Pelcat, Maxime; Menard, Daniel Efficient multicore implementation of an advanced generator of discrete chaotic sequences Inproceedings International Workshop on Chaos-Information Hiding and Security (C-IHS), London, UK @inproceedings{Desnos_Efficient_2014, title = {Efficient multicore implementation of an advanced generator of discrete chaotic sequences}, author = {Karol Desnos and Safwan El Assad and Aurore Arlicot and Maxime Pelcat and Daniel Menard}, url = {https://hal.archives-ouvertes.fr/hal-01094677/file/ICITST2014-ID-103_v35.pdf}, doi = {10.1109/ICITST.2014.7038770}, year = {2014}, date = {2014-12-01}, booktitle = {International Workshop on Chaos-Information Hiding and Security (C-IHS)}, pages = {31-36}, address = {London, UK}, abstract = {This paper details the design and implementation performances of an efficient generator of chaotic discrete integer valued sequences. The generator exhibits orbits having very large lengths compared to those given in the literature. It is implemented in C language and parallelized using the Parameterized and Interfaced Synchronous Dataflow Model of Computation (PiSDF MoC). The proposed structure is shown to be scalable, parallel and time efficient. The resulting implementation combines a very long minimal chaotic sequence omin > 7*2128 32-bit samples and a very high throughput of 173Mbps on 4 cores of a General Purpose Processor.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper details the design and implementation performances of an efficient generator of chaotic discrete integer valued sequences. The generator exhibits orbits having very large lengths compared to those given in the literature. It is implemented in C language and parallelized using the Parameterized and Interfaced Synchronous Dataflow Model of Computation (PiSDF MoC). The proposed structure is shown to be scalable, parallel and time efficient. The resulting implementation combines a very long minimal chaotic sequence omin > 7*2128 32-bit samples and a very high throughput of 173Mbps on 4 cores of a General Purpose Processor. |
C7 | Ammar, Manel; Baklouti, Mouna; Pelcat, Maxime; Desnos, Karol; Mohamed, Abid MARTE to PiSDF transformation for data-intensive applications analysis Inproceedings Conference on Design and Architectures for Signal and Image Processing (DASIP), Madrid, Spain @inproceedings{Ammar_MARTE_2014, title = {MARTE to PiSDF transformation for data-intensive applications analysis}, author = {Manel Ammar and Mouna Baklouti and Maxime Pelcat and Karol Desnos and Abid Mohamed}, url = {https://hal.archives-ouvertes.fr/hal-01122725/file/DASIP_2014_submission_27.pdf}, year = {2014}, date = {2014-10-01}, booktitle = {Conference on Design and Architectures for Signal and Image Processing (DASIP)}, address = {Madrid, Spain}, abstract = {This paper proposes and validates a new methodology to facilitate the analysis of modern data-intensive applications. A major part of handling the processing needs of these applications consists in using the appropriate Model-of-Computation (MoC) which guarantees accurate performance estimations. Our methodology introduces one major contribution that facilitates the analysis step in the co-design flow. It is based on an intermediate level of abstraction implementing the Parameterized and Interfaced Synchronous Dataflow (ΠSDF) semantics. In the proposed methodology, a system designer models the embedded system using the standardised Modeling and Analysis of Real-Time and Embedded Systems (MARTE) profile. High-level models are then refined towards intermediate level models by following the Model-Driven Engineering (MDE) transformation paradigm. The model-to-model transformation permitting to reach the ΠSDF level starting from a MARTE-compliant model is detailed and validated in this paper. It is shown to facilitate system analysis.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper proposes and validates a new methodology to facilitate the analysis of modern data-intensive applications. A major part of handling the processing needs of these applications consists in using the appropriate Model-of-Computation (MoC) which guarantees accurate performance estimations. Our methodology introduces one major contribution that facilitates the analysis step in the co-design flow. It is based on an intermediate level of abstraction implementing the Parameterized and Interfaced Synchronous Dataflow (ΠSDF) semantics. In the proposed methodology, a system designer models the embedded system using the standardised Modeling and Analysis of Real-Time and Embedded Systems (MARTE) profile. High-level models are then refined towards intermediate level models by following the Model-Driven Engineering (MDE) transformation paradigm. The model-to-model transformation permitting to reach the ΠSDF level starting from a MARTE-compliant model is detailed and validated in this paper. It is shown to facilitate system analysis. |
C6 | Heulot, Julien; Pelcat, Maxime; Desnos, Karol; Nezan, Jean-François; Aridhi, Slaheddine Spider: A Synchronous Parameterized and Interfaced Dataflow-based RTOS for multicore DSPS Inproceedings Embedded Design in Education and Research Conference (EDERC), Milan, Italy @inproceedings{Heulot_Spider_2014, title = {Spider: A Synchronous Parameterized and Interfaced Dataflow-based RTOS for multicore DSPS}, author = {Julien Heulot and Maxime Pelcat and Karol Desnos and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, url = {https://hal.archives-ouvertes.fr/hal-01067052/file/ederc2014.pdf}, doi = {10.1109/EDERC.2014.6924381}, year = {2014}, date = {2014-09-01}, booktitle = {Embedded Design in Education and Research Conference (EDERC)}, pages = {167-171}, address = {Milan, Italy}, abstract = {This paper introduces a novel Real-Time Operating System (RTOS) based on a parameterized dataflow Model of Computation (MoC). This RTOS, called Synchronous Parameterized and Interfaced Dataflow Embedded Runtime (SPiDER), aims at efficiently scheduling Parameterized and Interfaced Synchronous Dataflow (PiSDF) graphs on multicore architectures. It exploits features of PiSDF to locate locally static regions that exhibit predictable application behavior. This paper uses a multicore signal processing benchmark to demonstrate that the SPiDER runtime can exploit more parallelism than a conventional multicore task scheduler. By comparing experimental results of the SPiDER runtime on an 8-core Texas Instruments Keystone I Digital Signal Processor (DSP) with those obtained from the OpenMP framework, latency improvements of up to 26% are demonstrated.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper introduces a novel Real-Time Operating System (RTOS) based on a parameterized dataflow Model of Computation (MoC). This RTOS, called Synchronous Parameterized and Interfaced Dataflow Embedded Runtime (SPiDER), aims at efficiently scheduling Parameterized and Interfaced Synchronous Dataflow (PiSDF) graphs on multicore architectures. It exploits features of PiSDF to locate locally static regions that exhibit predictable application behavior. This paper uses a multicore signal processing benchmark to demonstrate that the SPiDER runtime can exploit more parallelism than a conventional multicore task scheduler. By comparing experimental results of the SPiDER runtime on an 8-core Texas Instruments Keystone I Digital Signal Processor (DSP) with those obtained from the OpenMP framework, latency improvements of up to 26% are demonstrated. |
C5 | Pelcat, Maxime; Desnos, Karol; Heulot, Julien; Guy, Clément; Nezan, Jean-François; Aridhi, Slaheddine Preesm: A dataflow-based rapid prototyping framework for simplifying multicore DSP programming Inproceedings Embedded Design in Education and Research Conference (EDERC), Milan, Italy @inproceedings{Pelcat_Preesm_2014, title = {Preesm: A dataflow-based rapid prototyping framework for simplifying multicore DSP programming}, author = {Maxime Pelcat and Karol Desnos and Julien Heulot and Cl\'{e}ment Guy and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, url = {https://hal.archives-ouvertes.fr/hal-01059313/file/ederc2014.pdf}, doi = {10.1109/EDERC.2014.6924354}, year = {2014}, date = {2014-09-01}, booktitle = {Embedded Design in Education and Research Conference (EDERC)}, pages = {36-40}, address = {Milan, Italy}, abstract = {The high performance Digital Signal Processors (DSPs) currently manufactured by Texas Instruments are heterogeneous multiprocessor architectures. Programming these architectures is a complex task often reserved to specialized engineers because the bottlenecks of both the algorithm and the architecture need to be deeply understood in order to obtain a fairly parallel execution. The PREESM framework objective is to simplify the programming of multicore DSP systems by building on dataflow programming methods. The current functionalities of this scalable framework cover memory and time analysis, as well as automatic deadlock-free code generation. Several tutorials are provided with the tool for fast initiation of C programmers to multicore DSP programming. This paper demonstrates PREESM capabilities by comparing simulation and execution performances on a stereo matching algorithm prototyped on the TMS320C6678 8-core DSP device.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The high performance Digital Signal Processors (DSPs) currently manufactured by Texas Instruments are heterogeneous multiprocessor architectures. Programming these architectures is a complex task often reserved to specialized engineers because the bottlenecks of both the algorithm and the architecture need to be deeply understood in order to obtain a fairly parallel execution. The PREESM framework objective is to simplify the programming of multicore DSP systems by building on dataflow programming methods. The current functionalities of this scalable framework cover memory and time analysis, as well as automatic deadlock-free code generation. Several tutorials are provided with the tool for fast initiation of C programmers to multicore DSP programming. This paper demonstrates PREESM capabilities by comparing simulation and execution performances on a stereo matching algorithm prototyped on the TMS320C6678 8-core DSP device. |
2013 |
|
C4 | Zhou, Zheng; Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François; Plishker, William; Bhattacharyya, Shuvra S Scheduling of parallelized synchronous dataflow actors Inproceedings International Symposium on System on Chip (SoC), Tampere, Finland @inproceedings{Zhou_Scheduling_2013, title = {Scheduling of parallelized synchronous dataflow actors}, author = {Zheng Zhou and Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan and William Plishker and Shuvra S Bhattacharyya}, doi = {10.1109/ISSoC.2013.6675271}, year = {2013}, date = {2013-10-01}, booktitle = {International Symposium on System on Chip (SoC)}, pages = {1-10}, address = {Tampere, Finland}, abstract = {Parallelization of Digital Signal Processing (DSP) software is an important trend for MultiProcessor System-on-Chip (MPSoC) implementation. The performance of DSP systems composed of parallelized computations depends on the scheduling technique, which must in general allocate computation and communication resources for competing tasks, and ensure that data dependencies are satisfied. In this paper, we formulate a new type of parallel task scheduling problem called Parallel Actor Scheduling (PAS) for MPSoC mapping of DSP systems that are represented as Synchronous DataFlow (SDF) graphs. In contrast to traditional SDF-based scheduling techniques, which focus on exploiting graph level (inter-actor) parallelism, the PAS problem targets the integrated exploitation of both intra- and inter-actor parallelism for platforms in which individual actors can be parallelized across multiple processing units. We address a special case of the PAS problem in which all of the actors in the DSP application or subsystem being optimized can be parallelized. For this special case, we develop and experimentally evaluate a two-phase scheduling framework with two work flows - particle swarm optimization with a mixed integer programming formulation, and particle swarm optimization with a fast heuristic based on list scheduling. We demonstrate that our PAS-targeted scheduling framework provides a useful range of trade-offs between synthesis time requirements and the quality of the derived solutions.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Parallelization of Digital Signal Processing (DSP) software is an important trend for MultiProcessor System-on-Chip (MPSoC) implementation. The performance of DSP systems composed of parallelized computations depends on the scheduling technique, which must in general allocate computation and communication resources for competing tasks, and ensure that data dependencies are satisfied. In this paper, we formulate a new type of parallel task scheduling problem called Parallel Actor Scheduling (PAS) for MPSoC mapping of DSP systems that are represented as Synchronous DataFlow (SDF) graphs. In contrast to traditional SDF-based scheduling techniques, which focus on exploiting graph level (inter-actor) parallelism, the PAS problem targets the integrated exploitation of both intra- and inter-actor parallelism for platforms in which individual actors can be parallelized across multiple processing units. We address a special case of the PAS problem in which all of the actors in the DSP application or subsystem being optimized can be parallelized. For this special case, we develop and experimentally evaluate a two-phase scheduling framework with two work flows - particle swarm optimization with a mixed integer programming formulation, and particle swarm optimization with a fast heuristic based on list scheduling. We demonstrate that our PAS-targeted scheduling framework provides a useful range of trade-offs between synthesis time requirements and the quality of the derived solutions. |
C3 | Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François; Bhattacharyya, Shuvra S; Aridhi, Slaheddine PiMM: Parameterized and Interfaced dataflow Meta-Model for MPSoCs runtime reconfiguration Inproceedings International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), Samos, Greece @inproceedings{Desnos_PiMM_2013, title = {PiMM: Parameterized and Interfaced dataflow Meta-Model for MPSoCs runtime reconfiguration}, author = {Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan and Shuvra S Bhattacharyya and Slaheddine Aridhi}, url = {http://kdesnos.fr/wp-content/uploads/publis/Desnos_Pimm_2013.pdf}, doi = {10.1109/SAMOS.2013.6621104}, year = {2013}, date = {2013-07-01}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, pages = {41-48}, address = {Samos, Greece}, abstract = {Dataflow models of computation are widely used for the specification, analysis, and optimization of Digital Signal Processing (DSP) applications. In this paper a new meta-model called PiMM is introduced to address the important challenge of managing dynamics in DSP-oriented representations. PiMM extends a dataflow model by introducing an explicit parameter dependency tree and an interface-based hierarchical compositionality mechanism. PiMM favors the design of highly-efficient heterogeneous multicore systems, specifying algorithms with customizable trade-offs among predictability and exploitation of both static and adaptive task, data and pipeline parallelism. PiMM fosters design space exploration and reconfigurable resource allocation in a flexible dynamic dataflow context.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Dataflow models of computation are widely used for the specification, analysis, and optimization of Digital Signal Processing (DSP) applications. In this paper a new meta-model called PiMM is introduced to address the important challenge of managing dynamics in DSP-oriented representations. PiMM extends a dataflow model by introducing an explicit parameter dependency tree and an interface-based hierarchical compositionality mechanism. PiMM favors the design of highly-efficient heterogeneous multicore systems, specifying algorithms with customizable trade-offs among predictability and exploitation of both static and adaptive task, data and pipeline parallelism. PiMM fosters design space exploration and reconfigurable resource allocation in a flexible dynamic dataflow context. |
C2 | Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François; Aridhi, Slaheddine Pre- and post-scheduling memory allocation strategies on MPSoCs Inproceedings Electronic System Level Synthesis Conference (ESLsyn), Austin, TX, USA @inproceedings{Desnos_Pre_2013, title = {Pre- and post-scheduling memory allocation strategies on MPSoCs}, author = {Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, url = {https://hal.archives-ouvertes.fr/hal-00868945/file/ESLSyn13.pdf}, year = {2013}, date = {2013-05-01}, booktitle = {Electronic System Level Synthesis Conference (ESLsyn)}, pages = {1-6}, publisher = {IEEE}, address = {Austin, TX, USA}, abstract = {This paper introduces and assesses a new method to allocate memory for applications implemented on a shared memory Multiprocessor System-on-Chip (MPSoC). This method first consists of deriving, from a Synchronous Dataflow (SDF) algorithm description, a Memory Exclusion Graph (MEG) that models all the memory objects of the application and their allocation constraints. Based on the MEG, memory allocation can be performed at three different stages of the implementation process: prior to the scheduling process, after an untimed multicore schedule is decided, or after a timed multicore schedule is decided. Each of these three alternatives offers a distinct trade-off between the amount of allocated memory and the flexibility of the application multicore execution. Tested use cases are based on descriptions of real applications and a set of random SDF graphs generated with the SDF For Free (SDF3) tool. Experimental results compare several allocation heuristics at the three implementation stages. They show that allocating memory after an untimed schedule of the application has been decided offers a reduced memory footprint as well as a flexible multicore execution.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper introduces and assesses a new method to allocate memory for applications implemented on a shared memory Multiprocessor System-on-Chip (MPSoC). This method first consists of deriving, from a Synchronous Dataflow (SDF) algorithm description, a Memory Exclusion Graph (MEG) that models all the memory objects of the application and their allocation constraints. Based on the MEG, memory allocation can be performed at three different stages of the implementation process: prior to the scheduling process, after an untimed multicore schedule is decided, or after a timed multicore schedule is decided. Each of these three alternatives offers a distinct trade-off between the amount of allocated memory and the flexibility of the application multicore execution. Tested use cases are based on descriptions of real applications and a set of random SDF graphs generated with the SDF For Free (SDF3) tool. Experimental results compare several allocation heuristics at the three implementation stages. They show that allocating memory after an untimed schedule of the application has been decided offers a reduced memory footprint as well as a flexible multicore execution. |
2012 |
|
C1 | Desnos, Karol; Pelcat, Maxime; Nezan, Jean-François; Aridhi, Slaheddine Memory bounds for the distributed execution of a hierarchical Synchronous Data-Flow graph Inproceedings International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), Samos, Greece @inproceedings{Desnos_Memory_2012, title = {Memory bounds for the distributed execution of a hierarchical Synchronous Data-Flow graph}, author = {Karol Desnos and Maxime Pelcat and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, url = {https://hal.archives-ouvertes.fr/hal-00721335/file/Samos12.pdf}, doi = {10.1109/SAMOS.2012.6404170}, year = {2012}, date = {2012-07-01}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, pages = {160-167}, address = {Samos, Greece}, abstract = {This paper presents an application analysis technique to define the boundary of shared memory requirements of Multiprocessor System-on-Chip (MPSoC) in early stages of development. This technique is part of a rapid prototyping process and is based on the analysis of a hierarchical Synchronous Data-Flow (SDF) graph description of the system application. The analysis does not require any knowledge of the system architecture, the mapping or the scheduling of the system application tasks. The initial step of the method consists of applying a set of transformations to the SDF graph so as to reveal its memory characteristics. These transformations produce a weighted graph that represents the different memory objects of the application as well as the memory allocation constraints due to their relationships. The memory boundaries are then derived from this weighted graph using analogous graph theory problems, in particular the Maximum-Weight Clique (MWC) problem. State-of-the-art algorithms to solve these problems are presented and a heuristic approach is proposed to provide a near-optimal solution of the MWC problem. A performance evaluation of the heuristic approach is presented, and is based on hierarchical SDF graphs of realistic applications. This evaluation shows the efficiency of proposed heuristic approach in finding near optimal solutions.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper presents an application analysis technique to define the boundary of shared memory requirements of Multiprocessor System-on-Chip (MPSoC) in early stages of development. This technique is part of a rapid prototyping process and is based on the analysis of a hierarchical Synchronous Data-Flow (SDF) graph description of the system application. The analysis does not require any knowledge of the system architecture, the mapping or the scheduling of the system application tasks. The initial step of the method consists of applying a set of transformations to the SDF graph so as to reveal its memory characteristics. These transformations produce a weighted graph that represents the different memory objects of the application as well as the memory allocation constraints due to their relationships. The memory boundaries are then derived from this weighted graph using analogous graph theory problems, in particular the Maximum-Weight Clique (MWC) problem. State-of-the-art algorithms to solve these problems are presented and a heuristic approach is proposed to provide a near-optimal solution of the MWC problem. A performance evaluation of the heuristic approach is presented, and is based on hierarchical SDF graphs of realistic applications. This evaluation shows the efficiency of proposed heuristic approach in finding near optimal solutions. |
Demonstrations
2019 |
|
D4 | Pelcat, Maxime; Desnos, Karol; Menard, Daniel; Arrestier, Florian; Honorat, Alexandre; Rubattu, Claudio; Morvan, Antoine; Heulot, Julien; Nezan, Jean-François PREESM: Generating Energy-Optimized Adaptive Software on a Heterogeneous Platform with Preesm Miscellaneous @misc{Pelcat_Preesm_2019, title = {PREESM: Generating Energy-Optimized Adaptive Software on a Heterogeneous Platform with Preesm}, author = {Maxime Pelcat and Karol Desnos and Daniel Menard and Florian Arrestier and Alexandre Honorat and Claudio Rubattu and Antoine Morvan and Julien Heulot and Jean-Fran\c{c}ois Nezan }, year = {2019}, date = {2019-03-26}, abstract = {This Booth demonstrates how PREESM and SPIDER tools generate energy-optimized sensor-based adaptive software on a heterogeneous platform. PREESM is a rapid system prototyping tool provided with a runtime manager named SPIDER. PREESM simulates stream processing applications and generates code for multi/many-cores. Processing can either be statically mapped or adaptively managed by SPIDER. Steps when using PREESM are: 1- Model your Application: PREESM provides you with a dataflow language, designed to express parallelism. 2- Model your Architecture: PREESM simulates and generates code for a wide range of systems (e.g., ARM, DSP, FPGA). 3- Prototype and Run your Design: PREESM takes mapping decisions and provides early design space information such as scheduling, memory use, and core loads. PREESM and SPIDER are available on GitHub, and supported by tutorials and a reactive community. PREESM and SPIDER are part of the H2020 CERBERO toolchain. http://preesm.org}, howpublished = {DATE Conference 2019, University Booth.}, keywords = {}, pubstate = {published}, tppubtype = {misc} } This Booth demonstrates how PREESM and SPIDER tools generate energy-optimized sensor-based adaptive software on a heterogeneous platform. PREESM is a rapid system prototyping tool provided with a runtime manager named SPIDER. PREESM simulates stream processing applications and generates code for multi/many-cores. Processing can either be statically mapped or adaptively managed by SPIDER. Steps when using PREESM are: 1- Model your Application: PREESM provides you with a dataflow language, designed to express parallelism. 2- Model your Architecture: PREESM simulates and generates code for a wide range of systems (e.g., ARM, DSP, FPGA). 3- Prototype and Run your Design: PREESM takes mapping decisions and provides early design space information such as scheduling, memory use, and core loads. PREESM and SPIDER are available on GitHub, and supported by tutorials and a reactive community. PREESM and SPIDER are part of the H2020 CERBERO toolchain. http://preesm.org |
2017 |
|
D3 | Miomandre, Hugo; Hascoët, Julien; Desnos, Karol; Martin, Kevin; Dinechin, Benoît Dupont De; Nezan, Jean-François Demonstrating the SPIDER Runtime for Reconfigurable Dataflow Graphs Execution onto a DMA-based Manycore Processor Miscellaneous @misc{Miomandre_Demonstrating_2017, title = {Demonstrating the SPIDER Runtime for Reconfigurable Dataflow Graphs Execution onto a DMA-based Manycore Processor}, author = {Hugo Miomandre and Julien Hasco\"{e}t and Karol Desnos and Kevin Martin and Beno\^{i}t Dupont De Dinechin and Jean-Fran\c{c}ois Nezan}, url = {https://hal.archives-ouvertes.fr/hal-01637300/document}, year = {2017}, date = {2017-10-01}, howpublished = {IEEE International Workshop on Signal Processing Systems (SiPS)}, keywords = {}, pubstate = {published}, tppubtype = {misc} } |
2015 |
|
D2 | Raffin, Erwan; Nogues, Erwan; Lacour, Morgan; Pelcat, Maxime; Menard, Daniel; Desnos, Karol; Nezan, Jean François Real-Time Low Power Software HEVC Decoder on Embedded GPP: A Side-by-Side Comparison Miscellaneous @misc{Raffin_Real_2015, title = {Real-Time Low Power Software HEVC Decoder on Embedded GPP: A Side-by-Side Comparison}, author = {Erwan Raffin and Erwan Nogues and Morgan Lacour and Maxime Pelcat and Daniel Menard and Karol Desnos and Jean Fran\c{c}ois Nezan}, url = {https://hal.archives-ouvertes.fr/hal-01205912/document}, year = {2015}, date = {2015-09-15}, booktitle = {Design and Architectures for Signal and Image Processing (DASIP)}, address = {Cracow, Poland}, keywords = {}, pubstate = {published}, tppubtype = {misc} } |
2012 |
|
D1 | Heulot, Julien; Desnos, Karol; Nezan, Jean-François; Pelcat, Maxime; Raulet, Mickaël; Yviquel, Hervé; Lagalaye, Perre-Laurent; Lann, Jean-Christophe Le An Experimental Toolchain Based on High-Level Dataflow Models of Computation for Heterogeneous MPSoC Miscellaneous @misc{Heulot_Experimental_2012, title = {An Experimental Toolchain Based on High-Level Dataflow Models of Computation for Heterogeneous MPSoC}, author = {Julien Heulot and Karol Desnos and Jean-Fran\c{c}ois Nezan and Maxime Pelcat and Micka\"{e}l Raulet and Herv\'{e} Yviquel and Perre-Laurent Lagalaye and Jean-Christophe Le Lann}, url = {https://hal-ensta-bretagne.archives-ouvertes.fr/hal-00749175/document}, year = {2012}, date = {2012-10-01}, booktitle = {Proceedings of the 2012 Conference on Design and Architectures for Signal and Image Processing}, pages = {1-2}, keywords = {}, pubstate = {published}, tppubtype = {misc} } |
Technical Reports
2021 |
|
T5 | Bourgoin, Thomas; Sourbier, Nicolas; Dardaillon, Mickaël; Desnos, Karol Génération de code pour une bibliothèque d'apprentissage par renforcement Technical Report @techreport{Bourgoin_Generation_2021, title = {G\'{e}n\'{e}ration de code pour une biblioth\`{e}que d'apprentissage par renforcement}, author = {Thomas Bourgoin and Nicolas Sourbier and Micka\"{e}l Dardaillon and Karol Desnos}, url = {http://kdesnos.fr/wp-content/uploads/publis/20211004-Internship_Report_Bourgoin_final.pdf}, year = {2021}, date = {2021-10-04}, institution = {Vaader Team, IETR - UMR CNRS 6164}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } |
2020 |
|
T4 | Raumer, Pierre-Yves; Sourbier, Nicolas; Dardaillon, Mickaël; Pelcat, Maxime; Desnos, Karol Reinforcement Learning Library based on Tangled Program Graphs: Development of New Learning Environments and Library Features Technical Report @techreport{Raumer_Reinforcement_2021, title = {Reinforcement Learning Library based on Tangled Program Graphs: Development of New Learning Environments and Library Features}, author = {Pierre-Yves Raumer and Nicolas Sourbier and Micka\"{e}l Dardaillon and Maxime Pelcat and Karol Desnos}, url = {http://kdesnos.fr/wp-content/uploads/publis/20200831_tech_report.pdf}, year = {2020}, date = {2020-08-28}, institution = {Vaader Team, IETR - UMR CNRS 6164}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } |
2017 |
|
T3 | Dejean-Servières, Mathieu; Desnos, Karol; Abdelouahab, Kamel; Hamidouche, Wassim; Morin, Luce; Pelcat, Maxime Study of the Impact of Standard Image Compression Techniques on Performance of Image Classification with a Convolutional Neural Network Technical Report @techreport{Dejean_Study_2017, title = {Study of the Impact of Standard Image Compression Techniques on Performance of Image Classification with a Convolutional Neural Network}, author = {Mathieu Dejean-Servi\`{e}res and Karol Desnos and Kamel Abdelouahab and Wassim Hamidouche and Luce Morin and Maxime Pelcat}, url = {https://hal.archives-ouvertes.fr/hal-01725126/document}, year = {2017}, date = {2017-12-20}, institution = {INSA Rennes ; IETR}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } |
2015 |
|
T2 | Pelcat, Maxime; Desnos, Karol; Maggiani, Luca; Liu, Yanzhou; Heulot, Julien; Nezan, Jean-François; Bhattacharyya, Shuvra S Models of Architecture Technical Report @techreport{Pelcat_MoA_2015, title = {Models of Architecture}, author = {Maxime Pelcat and Karol Desnos and Luca Maggiani and Yanzhou Liu and Julien Heulot and Jean-Fran\c{c}ois Nezan and Shuvra S Bhattacharyya}, url = {https://hal.archives-ouvertes.fr/hal-01244470/file/MoA.pdf}, year = {2015}, date = {2015-12-15}, number = {PREESM/2015-12TR01, 2015}, institution = {IETR/INSA Rennes ; Scuola Superiore Sant\'{A}nna, Pisa ; Institut Pascal, Clermont Ferrand ; University of Maryland, College Park ; Tampere University of Technology, Tampere}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } |
2014 |
|
T1 | Pelcat, Maxime; Desnos, Karol; Heulot, Julien; Guy, Clément; Nezan, Jean-François; Aridhi, Slaheddine Dataflow-based rapid prototyping for multicore DSP systems Technical Report @techreport{Pelcat_2014_dataflow, title = {Dataflow-based rapid prototyping for multicore DSP systems}, author = {Maxime Pelcat and Karol Desnos and Julien Heulot and Cl\'{e}ment Guy and Jean-Fran\c{c}ois Nezan and Slaheddine Aridhi}, url = {http://preesm.sourceforge.net/website/data/uploads/technical_reports/Technical_Report_PREESM_2014-05TR01.pdf}, year = {2014}, date = {2014-04-16}, journal = {Tech. Rep. PREESM/2014--05TRO 1, Institut National des Sciences Appliqu\'{e}es de Rennes}, institution = {INSA Rennes}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } |