@conference {82.BeBrFaRe08, title = {A 640 Mbit/s 32-bit Pipelined Implementation of the AES Algorithm}, booktitle = {SECRYPT}, year = {2008}, month = {July 26}, address = {Porto, Portugal}, abstract = {Due to the diffusion of cryptography in real time applications, performances in cipher and decipher operations are nowadays more important than in the past. On the other side, while facing the problem for embedded systems, additional constraints of area and power consumption must be considered. Many optimized software implementations, instruction set extensions and co-processors, were studied in the past with the aim to either increase performances or to keep the cost low. This paper presents a co-processor that aims to be an intermediate solution, suitable for such applications that require a throughput in the Megabit range and where the die size is a bit relaxed as constraint. To achieve this goal, the core is designed to operate at 32 bits and the throughput is guaranteed by a 2 stage pipeline with data forwarding. The obtained results synthesizing our coprocessor by means of the CMOS $0.18$ $μ$m standard cell library show that the throughput reaches 640 Mbit/s while the circuit size is of only 20 K equivalent gates. }, keywords = {cryptography, security}, author = {Bertoni, Guido Marco and Breveglieri, Luca and Farina, Roberto and Regazzoni, Francesco} } @conference {39.1169233, title = {Speeding Up AES By Extending a 32 bit Processor Instruction Set}, booktitle = {ASAP {\textquoteright}06: Proceedings of the IEEE 17th International Conference on Application-specific Systems, Architectures and Processors (ASAP{\textquoteright}06)}, year = {2006}, pages = {275-282}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {Nowadays the need of speed in cipher and decipher operations is more important than in the past. This is due to the diffusion of real time applications, which fact involves the use of cryptography. Many co-processors for cryptography were studied and presented in the past, but only few works were addressed to the enhancement of the instruction set architecture (ISA) of the embedded processor. This paper presents an extension of the ISA of a 32 bit processor, that aims at speeding up the software implementations of the AES algorithm. After the identification of the most frequently executed and the most time consuming sections of the algorithm, a set of dedicated instructions is designed in order to improve the performances of the cipher operations. We validate our instruction set extension by measuring the speed up for different optimized implementations of AES using an ARM processor simulator, but the enhancements we propose are general enough to be applied to almost all 32 bit processors.}, keywords = {cryptography, HW/SW co-design, instruction set extension}, isbn = {0-7695-2682-9}, doi = {http://dx.doi.org/10.1109/ASAP.2006.62}, author = {Bertoni, Guido Marco and Breveglieri, Luca and Farina, Roberto and Regazzoni, Francesco} } @Patent {22.pat20040228482PATENT, title = {Method of implementing one-to-one binary function and relative hardware device, especially for a Rijndael S-box}, number = {US 10/816,791 -- EP 20030425211}, year = {2004}, month = {10/2004}, type = {Grant}, chapter = {US 7502464 B2 -- EP 1465365 A1 }, abstract = {A method for implementing one-to-one binary functions defined on the Galois field GF(2^8) is very useful for forming fast and low power hardware devices regardless of the binary function. The method includes decoding an input byte for generating at least one bit string that contains only one active bit, and logically combining the bits of the bit string according to the binary function for generating a 256-bit string representing a corresponding output byte. The 256-bit string is then encoded in a byte for obtaining the output byte.}, issn = {US 7502464 B2}, author = {Macchetti, Marco and Fragneto, Pasqualina and Bertoni, Guido Marco} } @conference {11.BiMaBeBreZaFra2003, title = {About the Performances of the Advanced Encryption Standard in Embedded Systems with Cache Memory}, booktitle = {ISCAS 2003}, year = {2003}, month = {May 25-28}, pages = {145-148}, address = {Bangkok}, abstract = {Modern networked embedded systems represent a growing market segment in which security is becoming an essential requirement. The Advanced Encryption Standard (AES) specification is becoming the default choice for such type of systems; however, a proper software implementation of AES is of fundamental importance in order to achieve significant performance. Current implementations presented in literature differ in terms of the amount of look-up tables used for pre-computing the functions of the encryption/decryption phase. This raises some questions regarding which AES implementation is optimal for a specific system configuration that, up to now, have been only empirically solved. In this work, we present an analytical model to study and evaluate the performance of the possible AES implementations in the early phases of system development. We then show that the proposed high-level timing model captures, with significant accuracy, the actual performance of current AES applications and thus it can be used for early evaluation of optimal AES implementations and to support the design space exploration phase. Validating experiments have been carried out on the Lx architecture, a scalable and customizable VLIW architecture developed by STMicroelectronics and HP Labs. Some final considerations are eventually reported about the relevant characteristics of the analyzed implementations and the role of the cache memory.}, doi = {http://dx.doi.org/10.1109/ISCAS.2003.1206212}, author = {Bircan, Aril and Macchetti, Marco and Bertoni, Guido Marco and Breveglieri, Luca and Zaccaria, Vittorio and Fragneto, Pasqualina} } @conference {8.752733, title = {Efficient Software Implementation of AES on 32-Bit Platforms}, booktitle = {CHES {\textquoteright}02: Revised Papers from the 4th International Workshop on Cryptographic Hardware and Embedded Systems}, year = {2003}, pages = {159{\textendash}171}, publisher = {Springer-Verlag}, organization = {Springer-Verlag}, address = {London, UK}, abstract = {Rijndael is the winner algorithm of the AES contest; therefore it should become the most used symmetric-key cryptographic algorithm. One important application of this new standard is cryptography on smart cards. In this paper we present an optimisation of the Rijndael algorithm to speed up execution on 32-bits processors with memory constraints, such as those used in smart cards. First a theoretical analysis of the Rijndael algorithm and of the proposed optimisation is discussed, and then simulation results of the optimised algorithm on different processors are presented and compared with other reference implementations, as known from the technical literature.}, isbn = {3-540-00409-2}, author = {Bertoni, Guido Marco and Breveglieri, Luca and Fragneto, Pasqualina and Macchetti, Marco and Marchesin, Stefano} } @article {13.MaBer2003, title = {Hardware Implementation of the Rijndael Sbox: a Case Study}, journal = {ST Journal of System Research}, year = {2003}, month = {July}, pages = {84-91}, abstract = {The Rijndael algorithm was officially selected as the Advanced Encryption Standard in 2001 and will replace the DES in all applications, including Smart Card based products. For this kind of platform, a compact, area efficient hardware implementation of the algorithm is highly desirable. This paper describes such an implementation, which we have based on GF(28) finite field decomposition. We present our results from mappings on the STMicroelectronics ASIC technology library and discuss area, timing and power consumption figures.}, author = {Macchetti, Marco and Bertoni, Guido Marco} } @Patent {10.pat20030068036PATENT, title = {Method and circuit for data encryption/decryption}, number = {US 09/974,705}, year = {2003}, month = {April}, type = {Grant}, chapter = {US7801301 B2}, abstract = {Data are converted between an unencrypted and an encrypted format according to the Rijndael algorithm, including a plurality of rounds. Each round is comprised of fixed set of transformations applied to a two-dimensional array, designated state, of rows and columns of bit words. At least a part of said transformations are applied on a transposed version of the state, wherein rows and columns are transposed for the columns and rows, respectively.}, issn = {US7801301B2}, author = {Macchetti, Marco and Marchesin, Stefano and Bondi, Umberto and Breveglieri, Luca and Bertoni, Guido Marco and Fragneto, Pasqualina} } @conference {3.CaPoMaMaBeBreFra2001, title = {Efficient C implementation of the ECC and AES cryptographic systems}, booktitle = {Technology Leadership Day - organized by the MicroSwiss Network}, year = {2001}, month = {October 10}, address = {Fribourg}, author = {Cassoli, Federico and Polloni, Flavio and Marchesin, Stefano and Macchetti, Marco and Bertoni, Guido Marco and Breveglieri, Luca and Fragneto, Pasqualina} }