@article {18547, title = {Anomaly and Change Detection in Graph Streams through Constant-Curvature Manifold Embeddings}, journal = {IJCNN 2018 : International Joint Conference on Neural Networks}, year = {2018}, month = {07/2018}, author = {Zambon, Daniele and Livi, Lorenzo and Alippi, Cesare} } @conference {18592, title = {Arbon Demonstrator Eye-on-the-Grid, from Concept to Results}, booktitle = {SCCER-FURIES Annual Conference}, year = {2018}, month = {12/2018}, url = {https://sccer-furies.epfl.ch/wp-content/uploads/2019/01/Arbon-demo-FURIES-Annual-meeting.pdf}, author = {Lukovi{\'c}, Slobodan and Gasparin, Alberto and Witzig, Jens and Herbst, Ingo} } @book {18544, title = {Artificial Intelligence in the Age of Neural Networks and Brain Computing}, series = {Academic Press }, year = {2018}, pages = {420}, edition = {1}, abstract = {Artificial Intelligence in the Age of Neural Networks and Brain Computing demonstrates that existing disruptive implications and applications of AI is a development of the unique attributes of neural networks, mainly machine learning, distributed architectures, massive parallel processing, black-box inference, intrinsic nonlinearity and smart autonomous search engines. The book covers the major basic ideas of brain-like computing behind AI, provides a framework to deep learning, and launches novel and intriguing paradigms as future alternatives. The success of AI-based commercial products proposed by top industry leaders, such as Google, IBM, Microsoft, Intel and Amazon can be interpreted using this book. }, author = {Kozma, Robert and Alippi, Cesare and Choe, Yoonsuck and Morabito, Francesco} } @conference {18488, title = {Adaptable AES implementation with power-gating support}, booktitle = {International Conference on Computing Frontiers CF{\textquoteright}16}, series = {Proceedings of the ACM International Conference on Computing Frontiers}, year = {2016}, month = {05/2016}, pages = {331-334}, publisher = {ACM Ney York, NY, USA}, organization = {ACM Ney York, NY, USA}, address = {Como, Italy}, abstract = {In this paper, we propose a reconfigurable design of the Advanced Encryption Standard capable of adapting at runtime to the requirements of the target application. Reconfiguration is achieved by activating only a specific subset of all the instantiated processing elements. Further, we explore the effectiveness of power gating and clock gating methodologies to minimize the energy consumption of the processing elements not involved in computation.}, keywords = {AES implementation, power analysis attacks, power modeling}, isbn = {978-1-4503-4128-8}, doi = {10.1145/2903150.2903488}, url = {http://doi.acm.org/10.1145/2903150.2903488}, author = {Banik, Subhadeep and Bogdanov, Andrey and Fanni, Tiziana and Sau, Carlo and Raffo, Luigi and Palumbo, Francesca and Regazzoni, Francesco} } @conference {18578, title = {Atomic-AES: A Compact Implementation of the AES Encryption/Decryption Core}, booktitle = {Proceedings of 17th International Conference on Cryptology in India (INDOCRYPT) 2016}, year = {2016}, author = {Banik, Subhadeep and Bogdanov, Andrey and Regazzoni, Francesco} } @article {18482, title = {Automatic Application of Power Analysis Countermeasures}, journal = {IEEE Transactions on Computers }, volume = {64}, issue = {2}, year = {2015}, month = {02/2015}, pages = {329-341}, type = {journal}, chapter = {329}, abstract = {We introduce a compiler that automatically inserts software countermeasures to protect cryptographic algorithms against power-based side-channel attacks. The compiler first estimates which instruction instances leak the most information through side-channels. This information is obtained either by dynamic analysis, evaluating an information theoretic metric over the power traces acquired during the execution of the input program, or by static analysis. As information leakage implies a loss of security, the compiler then identifies (groups of) instruction instances to protect with a software countermeasure such as random precharging or Boolean masking. As software protection incurs significant overhead in terms of cryptosystem runtime and memory usage, the compiler protects the minimum number of instruction instances to achieve a desired level of security. The compiler is evaluated on two block ciphers, AES and Clefia; our experiments demonstrate that the compiler can automatically identify and protect the most important instruction instances. To date, these software countermeasures have been inserted manually by security experts, who are not necessarily the main cryptosystem developers. Our compiler offers significant productivity gains for cryptosystem developers who wish to protect their implementations from side-channel attacks}, keywords = {cryptographic algorithms protection, cryptography, data protection, power analysis attacks, program compilers, side-channel attacks}, issn = {0018-9340}, doi = {10.1109/TC.2013.219}, url = {http://dx.doi.org/10.1109/TC.2013.219}, author = {Bayrak, Ali Galip and Regazzoni, Francesco and Novo, David and Brisk, Philip and Standaert, Fran{\c c}ois-Xavier and Ienne, Paolo} } @conference {18469, title = {Accelerating differential power analysis on heterogeneous systems}, booktitle = {The 9th Workshop on Embedded Systems Security (WESS) 2014}, year = {2014}, month = {10/2014}, publisher = {ACM}, organization = {ACM}, address = {New Delhi, India}, abstract = {Differential Power Analysis (DPA) attacks allows discovering the secret key stored into secure embedded systems by exploiting the correlation between the power consumed by a device and the data being processed. The computation involved is generally relatively simple, however, if the used power traces are composed by a large number of points, the processing time can be long. In this paper we aim at speeding up the so called correlation power analysis (CPA). To do so, we used the OpenCL framework to distribute the workload of the attack over an heterogeneous platform composed by a CPU and multiple accelerators. We concentrate in the computation of the Pearson{\textquoteright}s correlation coefficients, as they cover approximately 80\% of the overall execution time, and we further optimize the attack by minimizing the data transfers between the host processor and the GPUs. Our results show performance improvements of up to 9x when compared with the reference parallel implementation}, keywords = {heterogeneous systems, power analysis}, isbn = {978-1-4503-2932-3}, doi = {10.1145/2668322.2668326}, url = {http://doi.acm.org/10.1145/2668322.2668326}, author = {Amaral, Joao and Regazzoni, Francesco and Tomas, Pedro and Chaves, Ricardo} } @conference {17770, title = {Adapting Multi-Agent Systems Approach for Integration of Prosumers in Smart Grids}, booktitle = {Proceedings of the IEEE Eurocon 2013}, year = {2013}, month = {July}, abstract = {Massive deployment of distributed energy resources, predominately renewable, is expected to be a general trend in the near future. Integration of such elements in distribution grid represents one of key challenges that Smart Grids will have to face with. In this work we aim at bridging industrial and academic views on future trends in this field. We list main issues to be tackled and we propose a conceptual system-level solution based on Multi-Agent Systems. We show basic functionalities and structure for supporting ICT architecture.}, author = {Lukovi{\'c}, Slobodan}, editor = {Kovac, Boksov} } @conference {17578, title = {An Algorithm for Extended Dynamic Range Video in Embedded Systems}, booktitle = {SENSORNETS 2013 - 2nd International Conference on Sensor Networks}, year = {2013}, month = {02/2013}, publisher = {INSTICC}, organization = {INSTICC}, address = {Barcelona, Spain}, keywords = {embedded systems, extended dynamic range, HDR, image processing, sensor, videocamera}, author = {Ferrante, Alberto and Chelodi, Massimo and Bruschi, Francesco and Mozzetti, Valeria} } @article {17735, title = {ARTE: an Application-specific Run-Time Management Framework for Multi-cores based on Queuing Models}, journal = {Parallel Computing}, year = {2013}, author = {Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina} } @article {18060, title = {Automatic Application of Power Analysis Countermeasures}, journal = {IEEE Transactions on Computers}, volume = {PP}, issue = {99}, year = {2013}, month = {12/2013}, abstract = {We introduce a compiler that automatically inserts software countermeasures to protect cryptographic algorithms against power-based side-channel attacks. The compiler first estimates which instruction instances leak the most information through side-channels. This information is obtained either by dynamic analysis, evaluating an information theoretic metric over the power traces acquired during the execution of the input program, or by static analysis. As information leakage implies a loss of security, the compiler then identifies (groups of) instruction instances to protect with a software countermeasure such as random precharging or Boolean masking. As software protection incurs significant overhead in terms of cryptosystem runtime and memory usage, the compiler protects the minimum number of instruction instances to achieve a desired level of security. The compiler is evaluated on two block ciphers, AES and Clefia; our experiments demonstrate that the compiler can automatically identify and protect the most important instruction instances. To date, these software countermeasures have been inserted manually by security experts, who are not necessarily the main cryptosystem developers. Our compiler offers significant productivity gains for cryptosystem developers who wish to protect their implementations from side-channel attacks.}, issn = {0018-9340}, doi = {10.1109/TC.2013.219}, author = {Bayrak, Ali Galip and Regazzoni, Francesco and Novo Bruna, David and Brisk, Philip and Standaert, Fran{\c c}ois-Xavier and Ienne, Paolo} } @article {155.CaDeMeTuSt12.VLSI, title = {Adaptivity Support for MPSoCs based on Process Migration in Polyhedral Process Networks}, journal = {VLSI Design}, volume = {2012}, number = {Article ID 987209}, year = {2012}, note = {Special issue on Application-Driven Design of Processor, Memory, and Communication Architectures for MPSoCs}, month = {February}, pages = {15 pages}, publisher = {Hindawi}, abstract = {System adaptivity is becoming an important feature of modern embedded multiprocessor systems. To achieve the goal of system adaptivity when executing Polyhedral Process Networks (PPNs) on a generic tiled Network-on-Chip (NoC) MPSoC platform, we propose an approach to enable the run-time migration of processes among the available platform resources. In our approach, process migration is allowed by a middleware layer which comprises two main components. The first component concerns the inter-tile data communication between processes. We develop and evaluate a number of different communication approaches which implement the semantics of the PPN model of computation on a generic NoC platform. The presented communication approaches do not depend on the mapping of processes, and have been implemented on a Network-on-Chip multiprocessor platform prototyped on an FPGA. Their comparison in terms of the introduced overhead is presented in two case studies with different communication characteristics. The second middleware component allows the actual run-time migration of PPN processes. To this end, we propose and evaluate a process migration mechanism which leverages the PPN model of computation to guarantee a predictable and efficient migration procedure. The efficiency and applicability of the proposed migration mechanism is shown in a real-life case study.}, keywords = {middleware, network-on-chip (NoC), polyhedral process networks (PPN), process migration, system adaptivity}, author = {Cannella, Emanuele and Derin, Onur and Meloni, Paolo and Tuveri, Giuseppe and Stefanov, Todor} } @conference {150.KaLu11, title = {Adoption of Model-Driven methodology to aggregations design in Power Grid}, booktitle = {INDIN {\textquoteright}11: Proceedings of the 9th IEEE International Conference on Industrial Informatics}, year = {2011}, month = {July 26-29}, pages = {1{\textendash}6}, address = {Caparica, Lisbon, Portugal}, abstract = {Economical and environmental concerns push toward novel solutions for sustainable, renewable and intelligent energy power grid, the Smart Grid. Very often, this includes aggregation of renewable resources and intelligent loads such as electrical vehicles. Such complex system involve a number of various stakeholders coming from different areas of expertise. Even so, on-going projects do not apply unique formal language. In order to better correlate the projects, model-driven methodology and SysML are proposed for system design.}, keywords = {EVs agregation, model-driven design, smart grid, SysML}, doi = {http://dx.doi.org/10.1109/INDIN.2011.6034936}, author = {Kaitovi{\'c}, Igor and Lukovi{\'c}, Slobodan} } @conference {18493, title = {ADSC: Application-Driven Storage Control for Energy Efficiency}, booktitle = {Information and Communication on Technology for the Fight against Global Warming - First International Conference ICT-GLOW}, series = {Lecture Notes in Computer Science }, volume = {6868}, year = {2011}, month = {08/2011}, pages = {165-179}, publisher = {Springer}, organization = {Springer}, address = {Toulouse, France}, abstract = {While performance and quality of service are the main criteria for application data management on storage units, energy efficiency is increasingly being stated as an additional criterion for evaluation. Due to the increasing energy consumption of storage subsystems, improving their energy efficiency is an important issue. In this paper we present a novel approach to storage management whereby both mid-level (file placement) and low level (disk mode) aspects are controlled, in a tiered storage architecture. The proposed mechanism is based on policies, and it is implemented via fuzzy logic rules, in contrast to attempting to build a model of the storage subsystem. The inputs to the storage management system are high level (application), mid level (file system) and low level (disk access patterns) information. The effectiveness of our approach has been validated by means of a case study using a TPC-C benchmark modified to access file level data. Results from this simulation are presented.}, isbn = {978-3-642-23446-0}, doi = {10.1007/978-3-642-23447-7_15}, url = {http://dx.doi.org/10.1007/978-3-642-23447-7_15}, author = {Cappiello, Cinzia and Hinostroza, Alicia and Pernici, Barbara and Sami, Mariagiovanna and Henis, Ealan and Kat, Ronen I. and Meth, Kalman Z. and Mura, Marcello} } @inbook {141.aetherinbook.2011, title = {AETHER: Self-Adaptive Networked Entities: Autonomous Computing Elements for Future Pervasive Applications and Technologies}, booktitle = {Reconfigurable Computing: From FPGAs to Hardware/Software Codesign}, year = {2011}, pages = {149{\textendash}184}, publisher = {Springer}, organization = {Springer}, address = {New York, USA}, abstract = {The AETHER project has laid the foundation of a complete new framework for designing and programming computing resources that live in changing environments and need to re-configure their objectives in a dynamic way. This chapter contributes to a strategic research agenda in the field of self-adaptive computing systems. It brings inputs to the reconfigurable hardware community and proposes directions to go for reconfigurable hardware and research on self-adaptive computing; it tries to identify some of the most promising future technologies for reconfiguration, while pointing out the main foreseen Challenges for reconfigurable hardware. This chapter presents the main solutions the AETHER project proposed for some of the major concerns in trying to engineer a self-adaptive computing system. The text exposes the AETHER vision of self-adaptation and its requirements. It describes and discusses the proposed solutions for tackling self-adaptivity at the various levels of abstractions. It exposes how the developed technologies could be put together in a real methodology and how self-adaptation could then be used in potential applications. Finally and based on lessons learned from AETHER, we discuss open issues and research opportunities and put those in perspective along other investigations and roadmaps.}, isbn = {978-1-4614-0061-5}, author = {Gamrat, Christian and Philippe, Jean-Marc and Jesshope, Chris and Shafarenko, Alex and Bisdounis, Labros and Bondi, Umberto and Ferrante, Alberto and Cabestany, Joan and Huebner, Michael and Parsinnen, Juha and Kadlec, Jiri and Danek, Martin and Tain, Benoit and Eisenbach, Susan and Auguin, Michel and Diguet, Jean-Philippe and Lenormand, Eric and Roux, Jean-Luc}, editor = {Cardoso, Joao Manuel Pai and Huebner, Michael} } @conference {149.MaPaSiZa.SASP11, title = {ARTE: an Application-specific Run-Time Management Framework for Multi-core Systems}, booktitle = {Proceedings IEEE SASP{\textquoteright}11 - Symposium on Application Specific Processors}, year = {2011}, month = {June}, address = {San Diego, CA, USA}, abstract = {Programmable multi-core and many-core platforms increase exponentially the challenge of task mapping and scheduling, provided that enough task-parallelism does exist for each application. This problem worsens when dealing with small ecosystems such as embedded systems-on-chip. In fact, in this case, the assumption of exploiting a traditional operating system is out of context given the memory available to satisfy the run-time footprint of such a configuration. An efficient Run-time Resource Management (RRM) becomes of paramount importance to dispatch tasks to the cores by taking into account the task-parallelization options that each application provides. State-of-the-art approaches to RRM try to allocate re- sources to maximize the instantaneous throughput while meeting a power budget constraint. In this paper, we will show that queuing theory can be an alternative yet effective way of solving resource allocation by presenting ARTE, an Application-specific Run-Time managEment framework. The framework exploits few assumptions about the target many-core computing fabric such as the availability of performance (throughput) information about the platform applications. We will show that this information can be combined, at run-time, with queuing models to enhance the response time of the applications by pounding the actual effect on the system power consumption better than previous approaches. Experimental results show that, compared to reference state-of-the-art RRM techniques, ARTE is able to efficiently improve system performance by pro-actively reducing the response time while meeting the same power consumption requirements. Besides, we will show that the run-time overhead of ARTE does not signicantly impact neither the system performance nor the on-chip-memory occupation.}, doi = {http://dx.doi.org/10.1109/SASP.2011.5941085}, author = {Mariani, Giovanni and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @conference {124.LuKaBo10, title = {Adopting system engineering methodology to Virtual Power Systems design flow}, booktitle = {CPSWEEK/GREEMBED 2010: Proceedings of the First Workshop on Green and Smart Embedded System Technology: Infrastructures, Methods and Tools}, year = {2010}, month = {April}, address = {Stockholm, Sweden}, abstract = {The concept of Virtual Power System (VPS) emerges as a promising response for increased concerns on secure, sustainable and at the same time {\textquoteright}clean{\textquoteright} energy supply requests. This novel concept aims at boosting operational efficiency of Distributed Energy Resources (DER) but also at establishing them as an autonomous commercial actor on the open energy market. Nevertheless, VPSs are fairly complex HW/SW systems that require holistic multidisciplinary approach and also novel specification, modeling and analysis instruments to facilitate mutual understanding among stakeholders from different fields. We introduce UML/SysML based modeling methodology to describe such power system related issues aiming at providing an unified modeling instrument applicable for VPSs design flow. In the proposed system engineering methodology, system representation starts from a very general context description and gets refined through different levels of abstraction down to concrete embedded systems employed to perform defined tasks.}, author = {Lukovi{\'c}, Slobodan and Kaitovi{\'c}, Igor and Bondi, Umberto} } @conference {75.BoPaSa08, title = {An adaptable FPGA-based System for Regular Expression Matching}, booktitle = {Proceedings of Design, Automation and Test in Europe (DATE) Conference}, year = {2008}, month = {March 10-14}, address = {Munich, Germany}, abstract = {In many applications string pattern matching is one of the most intensive tasks in terms of computation time and memory accesses. Network Intrusion Detection Systems and DNA Sequence Matching are two examples. Since software solutions are not able to satisfy the performance requirements, specialized hardware architectures are required. In this paper we propose a complete framework for regular expression matching, both in its architecture and compiler. This special-purpose processor is programmed using regular expressions as programming language. With the parallelism exploited in the design it is possible to achieve a throughput greater than one character per clock cycle, requiring O(n) memory space. The VHDL description of the proposed architecture is fully configurable. A design space exploration to find the optimal architecture based on area and performance cost-function is presented.}, keywords = {FPGA-based design, regular expression matching}, doi = {http://dx.doi.org/10.1109/DATE.2008.4484852}, author = {Bonesana, Ivano and Paolieri, Marco and Santambrogio, Marco Domenico} } @conference {80.LuFi08, title = {An Automated Design Flow for NoC-based MPSoCs on FPGA}, booktitle = {RSP 2008, The 19th IEEE/IFIP International Symposium on Rapid System Prototyping}, year = {2008}, month = {June 2-5}, address = {Monterey, USA/CA}, abstract = {Increased dynamics of the embedded devices market makes reduced time-to-market emerge as one of most challenging tasks in modern embedded system design. The complexity of Multiprocessor Systems-on-Chip (MPSoCs) rapidly increases and Networks-on-Chips (NoCs) have emerged as design strategy to cope with it. In order to allow fast generation of these platforms in the development phase, a full design flow is required. On the other hand, modern FPGAs provide the possibility for fast and low-cost prototyping, representing an efficient response to these needs. In this paper we present a framework, based on the Xilinx Embedded Development Kit (EDK) design flow, for the generation of MPSoCs based on NoCs. The tool provides system designers with the possibility to easily and quickly generate desired architectures that can be helpful for testing, debugging and verifying purposes. Our integrated design flow takes as input a textual description of the system and produces as final result a configuration bitstream file. The framework has been tested and verified on a Xilinx Virtex-II Pro board.}, keywords = {FPGA, multiprocessor system-on-chip (MPSoC), network-on-chip (NoC), reconfigurable systems, security}, doi = {http://dx.doi.org/10.1109/RSP.2008.31}, author = {Lukovi{\'c}, Slobodan and Fiorin, Leandro} } @conference {61.PaMaSiLo07, title = {Application-Specific Topology Design Customization for STNoC}, booktitle = {DSD07, in proceedings of 10th EUROMICRO Conference on Digital System Design Architectures, Methods and Tools (DSD 07)}, year = {2007}, month = {August 29-31}, address = {L{\"u}beck, Germany}, abstract = {Customized network-oriented communication architectures have recently become a must to support high bandwidth SoCs. To this end, a corresponding communication design flow is necessary to support the design space exploration of complex SoCs with tight design constraints. In order to exploit the benefits introduced by the NoC approach for the on-chip communication, the paper presents a Pareto Simulated Annealing (PSA) approach for the customization of the network topology. The proposed PSA approach has been applied to STNoC, the Network on Chip developed by STMicroelectronics. Starting from the ring topology, the proposed application-specific design flow tries to find a set of customized topologies (optimized in terms of performance and area/energy overhead) by adding custom links up to the spidergon topology.}, keywords = {application specific design, mapping, network-on-chip (NoC), STNoC, topology customization}, doi = {http://dx.doi.org/10.1109/DSD.2007.4341522}, author = {Palermo, Gianluca and Mariani, Giovanni and Silvano, Cristina and Locatelli, Riccardo and Coppola, Marcello} } @conference {48.Giaconia2007, title = {Area and Power Efficient Synthesis of DPA-Resistant Cryptographic SBoxes}, booktitle = {International Conference on VLSI Design \& Embedded Systems}, year = {2007}, month = {January 6-10}, address = {Bangalore, India}, abstract = {This paper presents a novel design methodology for the hardware implementation of non-linear bijective functions, commonly used in most symmetric-key cryptographic algorithms and known as substitution boxes (S-boxes). The proposed technique thwarts a particularly relevant class of side-channel attacks against cryptographic hardware, that of differential power analysis attacks (DPA). In the proposed approach, the cost of the countermeasure is kept low in terms of silicon process overheads (standard CMOS gates used), area requirement, power consumption and latency, when compared to existing countermeasures. Its effectiveness is proven by showing resistance to simulated DPA attacks using power curves derived with SPICE simulation.}, keywords = {differential power analysis (DPA), low power design, side channel attacks}, doi = {http://dx.doi.org/10.1109/VLSID.2007.44}, author = {Giaconia, Matteo and Macchetti, Marco and Regazzoni, Francesco and Schramm, Kai} } @conference {42.MaChen2006, title = {ASIC Hardware Implementation of the IDEA NXT Encryption Algorithm}, booktitle = {IEEE International Symposium on Circuits and Systems}, year = {2006}, month = {May 21-24}, address = {Kos, Greece}, abstract = {Symmetric-key block ciphers are often used to provide data confidentiality with low complexity, especially in the case of dedicated hardware implementations. IDEA NXT is a novel block cipher family, which has many interesting features and is targeted to multimedia streaming encryption. Different values can be assigned to the hardware architecture parameters in order to scale the security and the performance of the cipher. In this paper, we implement the IDEA NXT algorithm in custom silicon, using a commercial technology library; different optimizations are applied in order to satisfy different constraints in terms of latency and area occupation, maintaining a high level of security. After giving an overview of the IDEA NXT design, a discussion of the implementation choices and trade offs is given, highlighting the similarities and the main differences with regards to other block ciphers. To the authors{\textquoteright} knowledge this is the first paper describing such work.}, doi = {http://dx.doi.org/10.1109/ISCAS.2006.1693715}, author = {Macchetti, Marco and Chen, Wenyu} } @conference {28.1049903, title = {Application-Driven Optimization of VLIW Architectures: A Hardware-Software Approach}, booktitle = {RTAS {\textquoteright}05: Proceedings of the 11th IEEE Real Time on Embedded Technology and Applications Symposium}, year = {2005}, month = {03/2005}, pages = {128{\textendash}137}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {A large number of embedded multimedia applications are characterized by high instruction-level parallelism (ILP) expecially in the most critical internal loop bodies. Very Large Instruction Word (VLIW) architectures Application Specific Instruction Set Processors (ASIP) are best suited to exploit such parallelism. Fast design space exploration and optimization of VLIW architecture to a specific application target is increasingly becoming the crucial factor to achieve higher efficiency designs in a relatively small amount of time. In this paper we propose an example of VLIW architecture application driven optimization using the VEX (VLIW Example) system. A typical image processing application, the Imaging Pipeline, has been chosen as an example.}, keywords = {design space exploration, embedded systems, HW/SW co-design, HW/SW partitioning, system level design, very long instruction words (VLIW)}, isbn = {0-7695-2302-1}, doi = {http://dx.doi.org/10.1109/RTAS.2005.9}, author = {Ferrante, Alberto and Piscopo, Giuseppe and Scaldaferri, Stefano} } @conference {36.RegNacLaj2005, title = {Automatic Synthesis of the Hardware/Software Interface in Multiprocessor Architectures}, booktitle = {FDL{\textquoteright}05 - Forum on Specification and Design Languages}, year = {2005}, month = {September 27-30}, address = {Lausanne, Switzerland}, abstract = {Although Moore{\textquoteright}s Law, in principle, enables a huge number of components to be integrated into a single chip, design methods that will allow system architects to put the components together to achieve cost, power and time-to-market targets are severely lacking. System-level design and optimization techniques can significantly reduce the design gap by providing solutions that achieve correct-by-construction rather than the correct-by-iteration approach. This paper presents a programmatic interface generation tool for automating the generation of the hardware/software interfaces in the context of multiprocessor Systems-On-Chips. The solutions that we present are of crucial importance in a platform based design environment for building a flexible system with reusable IPs and CPU cores.}, keywords = {HW/SW co-design, system-on-chip (SoC)}, author = {Regazzoni, Francesco and Nacul, Andre Costi and Lajolo, Marcello} } @conference {17.NegBon2004, title = {The ALaRI Intranet: a Remote Collaboration Platform for a Worldwide Learning and Research Network}, booktitle = {World Conference on Educational Multimedia, Hypermedia and Telecommunications 04 (ED-MEDIA 04)}, year = {2004}, pages = {5042-5047}, publisher = {AACE Press}, organization = {AACE Press}, address = {Lugano, Switzerland}, abstract = {The ALaRI Intranet is a web-based remote learning, tutoring and collaboration platform that has been developed within the ANTITESYS project. ANTITESYS is a EU project involving some of the major academic and industrial institutions in Europe; its aim is to foster academic-industrial collaboration in the field of embedded systems whilst forming selected students by means of a one-year master program, held at the ALaRI institute sited in Lugano, Switzerland. What makes this scenario very unique lies in the roles played by the industrial and academic partners of ANTITESYS. The two sides contribute to the training of the master students in different ways, but both share the problem of integrating remote and face-to-face meetings with the students and with the other stakeholders. In this paper, we present the requirements gathering process and the design phase of the ALaRI Intranet, plus some details about its actual implementation and some initial usage figures.}, keywords = {case study, remote cooperation}, author = {Negri, Luca and Bondi, Umberto} } @conference {20.989053, title = {An ASIC design for a high speed implementation of the hash function SHA-256 (384, 512)}, booktitle = {GLSVLSI {\textquoteright}04: Proceedings of the 14th ACM Great Lakes symposium on VLSI}, year = {2004}, pages = {421{\textendash}425}, publisher = {ACM Press, New York, USA}, organization = {ACM Press, New York, USA}, address = {Boston, MA, USA}, abstract = {An implementation of the hash functions SHA-256, 384 and 512 is presented, obtaining a high clock rate through a reduction of the critical path length, both in the Expander and in the Compressor of the hash scheme. The critical path is shown to be the smallest achievable. Synthesis results show that the new scheme can reach a clock rate well exceeding 1 GHz using a 0.13?m technology.}, isbn = {1-58113-853-9}, doi = {http://doi.acm.org/10.1145/988952.989053}, author = {Dadda, Luigi and Macchetti, Marco and Owen, Jeff} } @conference {11.BiMaBeBreZaFra2003, title = {About the Performances of the Advanced Encryption Standard in Embedded Systems with Cache Memory}, booktitle = {ISCAS 2003}, year = {2003}, month = {May 25-28}, pages = {145-148}, address = {Bangkok}, abstract = {Modern networked embedded systems represent a growing market segment in which security is becoming an essential requirement. The Advanced Encryption Standard (AES) specification is becoming the default choice for such type of systems; however, a proper software implementation of AES is of fundamental importance in order to achieve significant performance. Current implementations presented in literature differ in terms of the amount of look-up tables used for pre-computing the functions of the encryption/decryption phase. This raises some questions regarding which AES implementation is optimal for a specific system configuration that, up to now, have been only empirically solved. In this work, we present an analytical model to study and evaluate the performance of the possible AES implementations in the early phases of system development. We then show that the proposed high-level timing model captures, with significant accuracy, the actual performance of current AES applications and thus it can be used for early evaluation of optimal AES implementations and to support the design space exploration phase. Validating experiments have been carried out on the Lx architecture, a scalable and customizable VLIW architecture developed by STMicroelectronics and HP Labs. Some final considerations are eventually reported about the relevant characteristics of the analyzed implementations and the role of the cache memory.}, doi = {http://dx.doi.org/10.1109/ISCAS.2003.1206212}, author = {Bircan, Aril and Macchetti, Marco and Bertoni, Guido Marco and Breveglieri, Luca and Zaccaria, Vittorio and Fragneto, Pasqualina} } @article {15.AlGaSte2003, title = {An Application Level Synthesis Methodology for Multidimensional Embedded Processing Systems}, journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems}, volume = {22}, number = {11}, year = {2003}, month = {November}, pages = {1457-1470}, abstract = {The implementation of multidimensional systems in embedded devices is a major design challenge due to the high algorithmic complexity of the applications. The authors suggest a novel application-level synthesis methodology for those parts of the embedded application which are characterized by being Lebesgue measurable (the computation involved in signal and image processing systems is Lebesgue measurable). The synthesis methodology, based on perturbation analysis, supports the design of analog, digital, or mixed implementations at the very high level of the system design cycle. The outputs of the methodology are quantitative indications regarding the maximum performance loss tolerable by the subsystems composing the application. Such information, augmented with a stochastic description of the tolerated perturbations, can be related to lower synthesis levels and guide the designer toward the final implementation of the embedded device. The perturbation analysis is based on randomized algorithms for an effective evaluation of the performance loss of the computational flow once affected by behavioral perturbations and a Tabu-search-inspired optimizing algorithm for distributing the tolerable performance loss at the system output along the computational subsystems composing the possibly multidimensional processing.}, keywords = {application-level synthesis, multidimensional systems, randomized algorithms, robustness analysis, tabu search, yield maximization}, doi = {http://dx.doi.org/10.1109/TCAD.2003.818304}, author = {Alippi, Cesare and Galbusera, Andrea and Stellini, Marco} } @conference {5.AlGaSte2002, title = {An Application Level Synthesis Methodology for Embedded Systems}, booktitle = {ISCAS 2002}, year = {2002}, month = {May 26-29}, pages = {473-476}, address = {Scottsdale}, abstract = {Time-to-market, cost and power consumption requirements are pushing research in embedded systems towards the development of sophisticated CAD environments. The paper suggests a novel synthesis methodology for embedded devices based on an application level perturbation analysis. The methodology is based on randomised algorithms for evaluating the effective performance loss of the computational flow induced by perturbations and a Tabu-search optimising algorithm for distributing the tolerable performance loss along the computational subsystems composing the computation.}, doi = {http://dx.doi.org/10.1109/ISCAS.2002.1010743}, author = {Alippi, Cesare and Galbusera, Andrea and Stellini, Marco} }