@article {18553, title = {Enabling Automated Bug Detection for IP-based Designs using High-Level Synthesis}, journal = {IEEE Design \& Test}, year = {In Press}, month = {2018}, doi = {10.1109/MDAT.2018.2824121}, author = {Fezzardi, Pietro and Pilato, Christian and Ferrandi, Fabrizio} } @conference {18577, title = {Efficient Configurations for Block Ciphers with Unified ENC/DEC Paths}, booktitle = {Proceedings of the IEEE International Symposium on Hardware Oriented Security and Trust (HOST) 2017}, year = {2018}, author = {Banik, Subhadeep and Bogdanov, Andrey and Regazzoni, Francesco} } @conference {18559, title = {Exploring the Vulnerability of R-LWE Encryption to Fault Attacks}, booktitle = {Workshop on Cryptography and Security in Computing Systems of the HiPEAC2018 Conference, CS2 {\textquoteright}18}, year = {2018}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, author = {Valencia, Felipe and Oder, Tobias and G{\"u}neysu, Tim and Regazzoni, Francesco} } @inbook {18529, title = {Extinguishing Ransomware - A Hybrid Approach to Android Ransomware Detection}, booktitle = {Foundations and Practice of Security}, volume = {10723}, year = {2018}, month = {02/2018}, pages = {242-258}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, abstract = {Mobile ransomware is on the rise and effective defense from it is of utmost importance to guarantee security of mobile users{\textquoteright} data. Current solutions provided by antimalware vendors are signature-based and thus ineffective in removing ransomware and restoring the infected devices and files. Also, current state-of-the art literature offers very few solutions to effectively detecting and blocking mobile ransomware. Starting from these considerations, we propose a hybrid method able to effectively counter ransomware. The proposed method first examines applications to be used on a device prior to their installation (static approach) and then observes their behavior at runtime and identifies if the system is under attack (dynamic approach). To detect ransomware, the static detection method uses the frequency of opcodes while the dynamic detection method considers CPU usage, memory usage, network usage and system call statistics. We evaluate the performance of our hybrid detection method on a dataset that contains both ransomware and legitimate applications. Additionally, we evaluate the performance of the static and dynamic stand-alone methods for comparison. Our results show that although both static and dynamic detection methods perform well in detecting ransomware, their combination in a form of a hybrid method performs best, being able to detect ransomware with 100{\%} precision and having a false positive rate of less than 4{\%}.}, isbn = {978-3-319-75650-9}, doi = {https://doi.org/10.1007/978-3-319-75650-9_16}, author = {Ferrante, Alberto and Malek, Miroslaw and Martinelli, Fabio and Mercaldo, Francesco and Milosevic, Jelena}, editor = {Imine, Abdessamad and Fernandez, Jos{\'e} M. and Marion, Jean-Yves and Logrippo, Luigi and Garcia-Alfaro, Joaquin} } @conference {18449, title = {Ensemble LSDD-based Change Detection Tests}, booktitle = {IEEE-INNS International Joint Conference on Neural Networks (IJCNN16)}, year = {2016}, month = {07/2016}, address = {Vancouver, Canada}, abstract = {The least squares density difference change detection test (LSDD-CDT) has proven to be an effective method in detecting concept drift by inspecting features derived from the discrepancy between two probability density functions (pdfs). The first pdf is associated with the concept drift free case, the second to the possible post change one. Interestingly, the method permits to control the ratio of false positives. This paper introduces and investigates the performance of a family of LSDD methods constructed by exploring different ensemble options applied to the basic CDT procedure. Experiments show that most of proposed methods are characterized by improved performance in change detection once compared with the direct ensemble-free counterpart.}, author = {Bu, Li and Alippi, Cesare and Zhao, Dongbin} } @conference {18485, title = {Evaluating the Impact of Environmental Factors on Physically Unclonable Functions}, booktitle = {International Symposium on Field-Programmable Gate Arrays FPGA 2016}, series = {Proceedings of the 2016 ACM/SIGDA}, year = {2016}, month = {02/2016}, pages = {279}, publisher = {ACM New York, NY, USA}, organization = {ACM New York, NY, USA}, address = {Monterey, CA, USA}, abstract = {Fabrication process introduces some inherent variability to the attributes of transistors (in particular length, widths, oxide thickness). As a result, every chip is physically unique. Physical uniqueness of microelectronics components can be used for multiple security applications. Physically Unclonable Functions (PUFs) are built to extract the physical uniqueness of microelectronics components and make it usable for secure applications. However, the microelectronics components used by PUFs designs suffer from external, environmental variations that impact the PUF behavior. Variations of temperature gradients during manufacturing can bias the PUF responses. Variations of temperature or thermal noise during PUF operation change the behavior of the circuit, and can introduce errors in PUF responses. Detailed knowledge of the behavior of PUFs operating over various environmental factors is needed to reliably extract and demonstrate uniqueness of the chips. In this work, we present a detailed and exhaustive analysis of the behavior of two PUF designs, a ring oscillator PUF and a timing path violation PUF. We have implemented both PUFs using FPGA fabricated by Xilinx, and analyzed their behavior while varying temperature and supply voltage. Our experiments quantify the robustness of each design, demonstrate their sensitivity to temperature and show the impact which supply voltage has on the uniqueness of the analyzed PUFs. }, isbn = {978-1-4503-3856-1}, doi = {10.1145/2847263.2847308}, url = {http://doi.acm.org/10.1145/2847263.2847308}, author = {Bellon, Sebastien and Favi, Claudio and Malek, Miroslaw and Macchetti, Marco and Regazzoni, Francesco} } @conference {18474, title = {Exploring Energy Efficiency of Lightweight Block Ciphers}, booktitle = {Selected Areas in Cryptography: 22nd International Conference (SAC)2015}, series = {Lecture Notes in Computer Science}, volume = {9566}, year = {2015}, month = {08/2015}, pages = {178-194}, publisher = {Springer}, organization = {Springer}, address = {Sackville, NB, Canada}, abstract = {In the last few years, the field of lightweight cryptography has seen an influx in the number of block ciphers and hash functions being proposed. One of the metrics that define a good lightweight design is the energy consumed per unit operation of the algorithm. For block ciphers, this operation is the encryption of one plaintext. By studying the energy consumption model of a CMOS gate, we arrive at the conclusion that the energy consumed per cycle during the encryption operation of an r-round unrolled architecture of any block cipher is a quadratic function in r. We then apply our model to 9 well known lightweight block ciphers, and thereby try to predict the optimal value of r at which an r-round unrolled architecture for a cipher is likely to be most energy efficient. We also try to relate our results to some physical design parameters like the signal delay across a round and algorithmic parameters like the number of rounds taken to achieve full diffusion of a difference in the plaintext/key.}, keywords = {AES, lightweight block cipher, Low Power Energy Circuits}, isbn = {978-3-319-31300-9}, issn = {0302-9743}, doi = {10.1007/978-3-319-31301-6}, url = {http://dx.doi.org/10.1007/978-3-319-31301-6}, author = {Banik, Subhadeep and Bogdanov, Andrey and Regazzoni, Francesco} } @article {18473, title = {Exploring Energy Efficiency of Lightweight Block Ciphers}, journal = {(IACR) Cryptology ePrint Archive}, volume = {2015}, year = {2015}, month = {09/2015}, chapter = {847}, abstract = {In the last few years, the field of lightweight cryptography has seen an influx in the number of block ciphers and hash functions being proposed. One of the metrics that define a good lightweight design is the energy consumed per unit operation of the algorithm. For block ciphers, this operation is the encryption of one plaintext. By studying the energy consumption model of a CMOS gate, we arrive at the conclusion that the total energy consumed during the encryption operation of an r-round unrolled architecture of any block cipher is a quadratic function in r. We then apply our model to 9 well known lightweight block ciphers, and thereby try to predict the optimal value of r at which an r-round unrolled architecture for a cipher is likely to be most energy efficient. We also try to relate our results to some physical design parameters like the signal delay across a round and algorithmic parameters like the number of rounds taken to achieve full diffusion of a difference in the plaintext/key. }, keywords = {implementation AES, lightweight block cipher, Low Power Energy Circuits}, url = {http://eprint.iacr.org/2015/847}, author = {Banik, Subhadeep and Bogdanov, Andrey and Regazzoni, Francesco} } @conference {18475, title = {Exploring the energy consumption of lightweight blockciphers in FPGA}, booktitle = {International Conference on ReConFigurable Computing and FPGAs, ReConFig 2015}, year = {2015}, month = {02/2016}, pages = {1-6}, publisher = {IEEE}, organization = {IEEE}, edition = {2015}, address = {Rivera Maya, Mexico City}, abstract = {Internet of things and cyber-physical systems requiring security functionality has pushed for the design of a number of block ciphers and hash functions specifically developed for being implemented in resource constrained devices. Initially the optimization was mainly on area and power consumption, but, nowadays the attention is more on the energy consumption. In this paper, for the first time, we look at energy consumption of lightweight block ciphers implemented in reconfigurable devices, and we analyze the effects that round unrolling might have on the energy consumed during the encryption. Concentrating on applications that require a number of parallel encryptions, we instantiate several designs on the target FPGA and we analyze how the energy consumption varies in each algorithm when changing the amount of unrolled rounds. Our results, obtained on the Xc6slx45t device of the Spartan6 family, demonstrate that Present is the most energy efficient algorithm and that the relation between the energy consumption and the number of unrolled rounds measured on FPGA is similar to the one measured on dedicated hardware.}, keywords = {cryptography, cyber-physical systems, encryption, lightweight block cipher}, isbn = {978-1-4673-9406-2}, doi = {10.1109/ReConFig.2015.7393308}, url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7390332}, author = {Banik, Subhadeep and Bogdanov, Andrey and Regazzoni, Francesco} } @conference {18464, title = {Embedded Systems Education: Job Market Expectations}, booktitle = {Workshop on Embedded and Cyber-Physical Systems Education (WESE) }, year = {2014}, month = {10/2014}, publisher = {ACM}, organization = {ACM}, address = {New Delhi, India}, abstract = {In the fifteen years since the first Embedded Systems Design Master studies were proposed the embedded systems world has radically changed. The spectrum of application areas has increased beyond any expectation, and the increasing presence of embedded systems in the physical world has led to "cyber-physical systems." Devices tend to become a commodity in many cases, while sensors and IPs acquire a larger share of the market. The whole industrial ecosystem is changing as well, with "application" companies becoming increasingly present and SMEs emerging as major players. It becomes mandatory to reconsider the competences and capacities that should be provided in a Master of Science course oriented to Embedded Systems Design, so as to meet new and diverse requests that come from job market and prospective employers. Within the frame of the Nano-Tera Swiss Federal program (www.nano-tera.ch), the educational project Future Embedded Systems Education (FESTE) aimed at identifying requests coming from the job market, so as to outline the renewed professional profile for young Embedded Systems Designers. The results indicate that programming, networking, real time and system architecture know-how combined with soft skills such as teamwork and communication are in demand and frequently come under disguised names such as automation or control engineering.}, keywords = {Cyber-Physical Systems Education, embedded systems, Nano Tera program}, isbn = {978-1-4503-3090-9}, url = {http://doi.acm.org/10.1145/2829957.2829961}, author = {Sami, Mariagiovanna and Malek, Miroslaw and Bondi, Umberto and Regazzoni, Francesco} } @conference {18165, title = {ExCovery {\textendash} A Framework for Distributed System Experiments and a Case Study of Service Discovery}, booktitle = {28th International Parallel \& Distributed Processing Symposium, Workshops and Phd Forum (IPDPSW)}, year = {2014}, month = {05/2014}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Phoenix, AZ, USA}, abstract = {Experiments are a fundamental part of science. They are needed when the system under evaluation is too complex to be analytically described and they serve to empirically validate hypotheses. This work presents the experimentation framework ExCovery for dependability analysis of distributed processes. It provides concepts that cover the description, execution, measurement and storage of experiments. These concepts foster transparency and repeatability of experiments for further sharing and comparison. ExCovery has been tried and refined in a manifold of dependability related experiments during the last two years. A case study is provided to describe service discovery as experiment process. A working prototype for IP networks runs on the Distributed Embedded System (DES) wireless testbed at the Freie Universit{\"a}t Berlin.}, keywords = {Distributed Systems, Experiment Framework, Experiments, Tool Description}, isbn = {978-1-4799-4116-2/14}, doi = {10.1109/IPDPSW.2014.147}, url = {http://andreas-dittrich.eu/2014/03/excovery}, author = {Dittrich, Andreas and Wanja, Stefan and Malek, Miroslaw} } @conference {18072, title = {An eda-friendly protection scheme against side-channel attacks}, booktitle = {Design, Automation and Test in Europe (DATE)}, year = {2013}, month = {March}, address = {Grenoble, France}, author = {Bayrak, Ali Galip and Velickovic, Nikola and Regazzoni, Francesco and Novo Bruna, David and Brisk, Philip and Ienne, Paolo} } @conference {157.mariani2012parma, title = {Evaluating Run-time Resource Management Policies for Multi-core Embedded Platforms with the EMME Evaluation Framework}, booktitle = {Workshop on Parallel Programming and Run-time Management Techniques for Many-core Architectures}, year = {2012}, month = {February}, keywords = {EMME, multi-core, run-time resource management, simulation}, author = {Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina} } @conference {154.BaChDe11.ISWCS, title = {Energy-Throughput Simulation Approach for Heterogeneous LTE scenarios}, booktitle = {ISWCS{\textquoteright}11: Proceedings of The Eighth International Symposium on Wireless Communication Systems}, year = {2011}, month = {November 6-9}, pages = {1{\textendash}5}, address = {Aachen, Germany}, abstract = {In order to increase overall LTE system performance femtocells have been proposed as a user-based solution promising to give much better service to the user specially indoor. Their deployment should improve the total system capacity noticeably and decrease drastically the power consumption. On the other hand these small indoor cells make the network planning strategies much more complex given the uncertainty of their position and their load; femtocells are after all managed by the users. Goal of this work is to provide a simulation approach to determine the effects of heterogeneous cell deployment on the performance of an LTE network. The simulation framework allows to realistically compare the power consumption and throughput of the overall system. The key components are the combination of indoor and outdoor propagation modeling, and the diversification of femto, micro and macro-cell energy consumption models. The model further contains complex city-like building structures, multiple communication layers (eNodeBs and femtocells) distributed over a three-dimensional map and numerous users moving across different areas while adapting their service requirements. The simulation approach results in relatively computationally inexpensive simulations and allows to model the expected throughput and energy consumption for various heterogeneous LTE scenarios.}, keywords = {long term evolution (LTE)}, doi = {http://dx.doi.org/10.1109/ISWCS.2011.6125377}, author = {Baddour, Rami and Chiumento, Alessandro and Desset, Claude} } @conference {18079, title = {Exploring the Feasibility of Low Cost Fault Injection Attacks on Sub-Threshold Devices through an Example of a 65nm AES Implementation}, booktitle = {7th Workshop on RFID Security and Privacy (RFIDSec)}, year = {2011}, month = {June}, address = {Amherst, Massachussets, USA}, author = {Barenghi, Alessandro and Hocquet, C{\'e}dric and Bol, David and Standaert, Fran{\c c}ois-Xavier and Regazzoni, Francesco and Koren, Israel} } @conference {114.MaYkZhZhLa10, title = {An Efficient Run-Time Management Methodology for Stereo Matching Application}, booktitle = {2PARMA: Proceedings of the Workshop on Parallel Programming and Run-time Management Techniques for Many-core Architectures}, year = {2010}, month = {February}, address = {Hannover, Germany}, abstract = {This paper presents a methodology for Run-Time Management (RTM) of algorithmic parameters. The RTM is able to trade-off the algorithm output quality and the execution time. Thus, once a requirement in terms of maximum execution time is set, the RTM dynamically tunes the parameters in order to maximize the output quality while respecting the given requirement. The run-time decision making relies on design-time modeling techniques able to characterize key relations between algorithm parameters, execution time and output quality. Models generated during the design-time analysis are accurate enough to drive the RTM in its decision making while enough generic to model application behaviors over datasets which were not included at design-time. In this paper the methodology is applied on the Stereo Matching application, a computational intensive artificial vision application aimed at inferring object depths using two or more cameras. Experimental results prove the effectiveness of the methodology which is able to identify high quality solutions respecting required deadline while introducing negligible overhead.}, author = {Mariani, Giovanni and Ykman-Couvreur, Chantal and Zhang, Ke and Zhang, Lu and Lafruit, Gauthier} } @conference {135.VuLuErKu10.2, title = {An enhanced workflow management for Utility Management System}, booktitle = {Proceedings of the International Congress on Ultra Modern Telecommunications and Control Systems (ICUMT 2010)}, year = {2010}, month = {October 18-20}, address = {Moscow, Russia}, abstract = {The emerging computational grid infrastructure consists of widely distributed heterogeneous resources, which makes mapping of increasingly complex applications a very challenging task. Utility Management Systems (UMS) manage very large number of workflows with very high resource requirements and thereby optimization of resource utilization has to be adapted. In this work we propose architecture that implements a novel concept for dynamical execution of a scheduling algorithm using near real-time feedback from the execution monitoring process. An Artificial Neural Network (ANN) was trained for workflow scheduling. In the case study, we first perform experiments with same number of workflows and then introduce two additional in the system observing its behavior with and without proposed improvements. Performance tests show that significant improvements of overall execution time can be achieved by introducing adaptive Artificial Neural Network.}, doi = {http://dx.doi.org/10.1109/ICUMT.2010.5676601}, author = {Vukmirovi{\'c}, Srdjan and Lukovi{\'c}, Slobodan and Erdeljan, Aleksandar and Kuli{\'c}, Filip} } @conference {133.LuNi10.2, title = {Enhancing Network-on-Chip Components to Support Security of Processing Elements}, booktitle = {Proceedings of the 5th Workshop on Embedded Systems Security (WESS{\textquoteright}2010) A Workshop of the Embedded Systems Week (ESWEEK {\textquoteright}10)}, year = {2010}, month = {October 24}, address = {Scottsdale, AZ, USA}, abstract = {Network-on-Chip (NoC) has emerged as a promising solution for scalable communication among steadily growing number of cores integrated in MultiProcessor System-on-Chips (MPSoCs). The increasing system heterogeneity together with the possibility of recon guration makes the overall system security one of the major concerns in MPSoC design. On the other hand, modular and scalable design of NoCs enables their enhancements in various directions for supporting services other than simple data routing. In this work we propose and implement a solution to secure attached processing units from a bu er over ow type of the attacks that comes in a form of a protection module that is embedded into the Network Interface of the NoC. At the same time, our solution prevents potential propagation of the attack through the NoC towards other processors. We prove feasibility via prototype realization in FPGA technology for a MicroBlaze processor on Xilinx Virtex-II Pro board.}, doi = {http://dx.doi.org/10.1145/1873548.1873560}, author = {Lukovi{\'c}, Slobodan and Christianos, Nikolaos} } @article {108.DeFe09.3, title = {Enabling Self-adaptivity in Component-based Streaming Applications}, journal = {ACM SIGBED Review - Special Issue on the 2nd International Workshop on Adaptive and Reconfigurable Embedded Systems}, volume = {6}, number = {{3}}, year = {2009}, note = {Special Issue on the 2nd International Workshop on Adaptive and Reconfigurable Embedded Systems (APRES{\textquoteright}09)}, month = {10/2009}, pages = {14:1-14:4}, publisher = {ACM SIGBED}, abstract = {Self-adaptivity is the capability of a system to adapt itself dynamically to achieve its goals. By means of this mechanism the system is able to autonomously modify its behavior or the way in which applications are run and implemented to achieve the goals set.In this paper we propose a framework that uses a component-based approach to implement self-adaptivity at application level. By using this mechanism, the framework provides the ability to perform both adaptation on the structure of the application (i.e., how the components are connected together) and on internal parameters of each component. At application level, there is a mechanism to monitor different parameters and to check whether the system is meeting the assigned goals or not. A controller drives adaptations when goals are not met.}, keywords = {component-based design, self-adaptive systems}, issn = {1551-3688}, doi = {http://dx.doi.org/10.1145/1851340.1851356}, author = {Derin, Onur and Ferrante, Alberto} } @article {18064, title = {Evaluating Resistance of MCML Technology to Power Analysis Attacks Using a Simulation-Based Methodology}, journal = {Springer Transactions on Computational Science}, volume = {5430}, year = {2009}, month = {February}, pages = {230{\textendash}243}, author = {Regazzoni, Francesco and Eisenbarth, Thomas and Poschmann, Axel and Groschdl, Johann and Gurkaynak, Frank and Macchetti, Marco and Toprak, Zeynep and Pozzi, Laura and Paar, Christof and Leblebici, Yusuf and Ienne, Paolo} } @conference {90.MaPaZaSi08, title = {An Efficient Design Space Exploration Methodology for Multi-Cluster VLIW Architectures based on Artificial Neural Networks}, booktitle = {Proc. IFIP International Conference on Very Large Scale Integration VLSI - SoC 2008}, year = {2008}, month = {October 13-15}, address = {Rhodes Island, Greece}, abstract = {Multi-Cluster Very Long Instruction Word (VLIW) architectures are currently designed by using platform-based synthesis techniques. In these approaches, a wide range of platform parameters is tuned to find the best trade-offs in terms of the selected figures of merit (such as energy, delay and area). This optimization phase is called Design Space Exploration (DSE) and it generally consists of a Multi-Objective Optimization (MOO) problem. The design space for a Multi-Cluster architecture is too large to be evaluated comprehensively. So far, several heuristic techniques have been proposed to address the MOO problem, but they are characterized by low efficiency to identify the Pareto front. In this paper, we propose an efficient DSE methodology leveraging neural networks. In particular, an initial design-of-experiments phase is used for generating a coarse view of the target design space; neural networks are then trained and used to refine the exploration, by identifying efficiently the Pareto points of the design space. This process is iteratively repeated until the target criterion (convergence of the Pareto coverage) is satisfied. A set of experimental results are reported to trade-off accuracy and efficiency of the proposed techniques with actual workloads.}, keywords = {design space exploration, multi-objective optimization, neural networks, response surface, system-on-chip (SoC), very long instruction words (VLIW)}, author = {Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina} } @conference {81.sliceb08, title = {An Enhanced Service Provider Communication Interface with Client Priorization}, booktitle = {proceedings of IEEE/WFMC International Conference on e-Business}, year = {2008}, month = {July 26-29}, abstract = {With the increased dynamics of modern life, the efficiency and reliability of everyday services is emerging to be a fundamental concern. On the other hand, modern telecommunication technologies, like wireless Internet access, are penetrating all segments of our life. However, many every day activities and services still do not fully exploit new technologies. We propose an approach that enables increased deployment of E-commerce concepts in the fields where their usage was either small or negligible. Moreover, in the scope of the same concept, we introduce prioritization of clients in services where it was not commonly present to date. A solution for enhanced communication interface between service provider and customers is developed. As a case study, the system is designed and optimized for an implementation in a fast-food chain. The proposed solution is aiming at increasing of quality of service for customers, and at the same time increasing the operational efficiency of the provider. The main idea behind this approach is to enable customers to use their mobile devices, such as cell phones or PDAs, for browsing offered services or goods, viewing current service conditions and placing orders. We will detail mathematical model underneath and describe the implementation on both server and client side.}, author = {Lukovi{\'c}, Slobodan and Puzovi{\'c}, Nikola and Stanisavljevi{\'c}, Milo{\v s}} } @conference {74.Munich, title = {Executable Models and Verification from MARTE and SysML: a Comparative Study of Code Generation Capabilities}, booktitle = {Proceedings of MARTE Workshop (DATE08)}, year = {2008}, month = {March}, address = {Munich, Germany}, abstract = {In this paper two well known UML profiles, namely SysML and MARTE are closely examined and compared. Both profiles are well suited for the description of embedded systems, although focusing on different aspects and can therefore be considered as complementary. While SysML targets system engineering descriptions in a high level of abstraction and provide diagrams for requirements specification, MARTE is tailored for systems in which Real Time constraints play a major role. Expressiveness of such profiles and their matching with languages that represent the next step in the development of Hardware/Software systems will be the main subject of this work. A Wireless Sensor Network scenario is taken as a reference case study and used to illustrate a practical application of MDA.}, keywords = {automatic generation of code, high level design, profiling, unified modeling language (UML)}, author = {Mura, Marcello and Panda, Amrit and Prevostini, Mauro} } @conference {21.968073, title = {Efficient AES implementations for ARM based platforms}, booktitle = {SAC {\textquoteright}04: Proceedings of the 2004 ACM symposium on Applied computing}, year = {2004}, pages = {841{\textendash}845}, publisher = {ACM Press, New York, USA}, organization = {ACM Press, New York, USA}, address = {Nicosia, Cyprus}, abstract = {The Advanced Encryption Standard (AES) contest, started by the U.S. National Institute of Standards and Technology (NIST), saw the Rijndael [13] algorithm as its winner [11]. Although the AES is fully defined in terms of functionality, it requires best exploitation of architectural parameters in order to reach the optimum performance on specific architectures. Our work concentrates on ARM cores [1] widely used in the embedded industry. Most promising implementation choices for the common ARM Instruction Set Architecture (ISA) are identified, and a new implementation for the linear mixing layer is proposed. The performance improvement over current implementations is demonstrated by a case study on the Intel StrongARM SA-1110 Microprocessor [2]. Further improvements based on exploitation of memory hierarchies are also described, and the corresponding performance figures are presented.}, keywords = {advanced encryption standard (AES), ARM microprocessor, cache memories, code optimisation}, isbn = {1-58113-812-1}, doi = {http://doi.acm.org/10.1145/967900.968073}, author = {Atasu, Kubilay and Breveglieri, Luca and Macchetti, Marco} } @conference {8.752733, title = {Efficient Software Implementation of AES on 32-Bit Platforms}, booktitle = {CHES {\textquoteright}02: Revised Papers from the 4th International Workshop on Cryptographic Hardware and Embedded Systems}, year = {2003}, pages = {159{\textendash}171}, publisher = {Springer-Verlag}, organization = {Springer-Verlag}, address = {London, UK}, abstract = {Rijndael is the winner algorithm of the AES contest; therefore it should become the most used symmetric-key cryptographic algorithm. One important application of this new standard is cryptography on smart cards. In this paper we present an optimisation of the Rijndael algorithm to speed up execution on 32-bits processors with memory constraints, such as those used in smart cards. First a theoretical analysis of the Rijndael algorithm and of the proposed optimisation is discussed, and then simulation results of the optimised algorithm on different processors are presented and compared with other reference implementations, as known from the technical literature.}, isbn = {3-540-00409-2}, author = {Bertoni, Guido Marco and Breveglieri, Luca and Fragneto, Pasqualina and Macchetti, Marco and Marchesin, Stefano} } @conference {6.BoSaSciSiZaZa2002, title = {Energy Estimation and Optimization of Embedded VLIW Processors based on Instruction Clustering}, booktitle = {39th Design Automation Conference}, year = {2002}, month = {June 10-14}, pages = {886-891}, address = {New Orleans}, abstract = {Aim of this paper is to propose a methodology for the definition of an instruction-level energy estimation framework for VLIW (Very Long Instruction Word) processors. The power modeling methodology is the key issue to define an effective energy-aware software optimisation strategy for state-of-the-art ILP (Instruction Level Parallelism) processors. The methodology is based on an energy model for VLIW processors that exploits instruction clustering to achieve an efficient and fine grained energy estimation. The approach aims at reducing the complexity of the characterization problem for VLIW processors from exponential, with respect to the number of parallel operations in the same very long instruction, to quadratic, with respect to the number of instruction clusters. Furthermore, the paper proposes a spatial scheduling algorithm based on a low-power reordering of the parallel operations within the same long instruction. Experimental results have been carried out on the Lx processor, a 4-issue VLIW core jointly designed by HPLabs and STMicroelectronics. The results have shown an average error of 1:9\% between the cluster-based estimation model and the reference design, with a standard deviation of 5:8\%. For the Lx architecture, the spatial instruction scheduling algorithm provides an average energy saving of 12\%.}, keywords = {power estimation, VLIW architectures}, doi = {http://dx.doi.org/10.1109/DAC.2002.1012747}, author = {Bona, Andrea and Sami, Mariagiovanna and Sciuto, Donatella and Silvano, Cristina and Zaccaria, Vittorio and Zafalon, Roberto} } @conference {3.CaPoMaMaBeBreFra2001, title = {Efficient C implementation of the ECC and AES cryptographic systems}, booktitle = {Technology Leadership Day - organized by the MicroSwiss Network}, year = {2001}, month = {October 10}, address = {Fribourg}, author = {Cassoli, Federico and Polloni, Flavio and Marchesin, Stefano and Macchetti, Marco and Bertoni, Guido Marco and Breveglieri, Luca and Fragneto, Pasqualina} }