Skinny-C
 All Data Structures Files Functions Variables Groups Pages
Skinny128.cpp
1 /*
2  * Copyright (C) 2017 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "Skinny128.h"
24 #include "Crypto.h"
25 #include "utility/EndianUtil.h"
26 #include "utility/RotateUtil.h"
27 #include "utility/ProgMemUtil.h"
28 #include <string.h>
29 
95 #if defined(__AVR__)
96 #define USE_AVR_INLINE_ASM 1
97 #endif
98 
99 #ifndef CRYPTO_LITTLE_ENDIAN
100 #error "Arduino platforms are assumed to be little-endian"
101 #endif
102 
109 Skinny128::Skinny128(uint32_t *schedule, uint8_t rounds)
110  : s(schedule), r(rounds)
111 {
112 }
113 
119 {
120 }
121 
126 size_t Skinny128::blockSize() const
127 {
128  return 16;
129 }
130 
131 #if USE_AVR_INLINE_ASM
132 
133 // Force the sboxes to be aligned on a 256-byte boundary.
134 // This makes sbox lookups more efficient.
135 #define ALIGN256 __attribute__((aligned(256)))
136 
137 // S-box tables for Skinny-128. We only use this for AVR platforms,
138 // as there will be issues with constant cache behaviour on ARM.
139 // It would be nice to avoid this for AVR as well, but the S-box
140 // operations are simply too slow using bit operations on AVR.
141 static uint8_t const sbox[256] PROGMEM ALIGN256 = {
142  0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a,
143  0x53, 0x73, 0x5b, 0x7b, 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b,
144  0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, 0xe5, 0xcc, 0xe8, 0xc1,
145  0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9,
146  0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8,
147  0x03, 0xb0, 0x0b, 0xb9, 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d,
148  0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, 0x62, 0x4a, 0x6c, 0x45,
149  0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d,
150  0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc,
151  0x04, 0xb4, 0x0d, 0xbd, 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed,
152  0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, 0x36, 0x8e, 0x38, 0x82,
153  0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29,
154  0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78,
155  0x50, 0x70, 0x59, 0x79, 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab,
156  0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, 0xe6, 0xce, 0xea, 0xc2,
157  0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb,
158  0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e,
159  0x97, 0x27, 0x9f, 0x2f, 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f,
160  0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, 0xa2, 0x18, 0xae, 0x16,
161  0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf,
162  0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe,
163  0xd7, 0xf7, 0xdf, 0xff,
164 };
165 static uint8_t const sbox_inv[256] PROGMEM ALIGN256 = {
166  0xac, 0xe8, 0x68, 0x3c, 0x6c, 0x38, 0xa8, 0xec, 0xaa, 0xae, 0x3a, 0x3e,
167  0x6a, 0x6e, 0xea, 0xee, 0xa6, 0xa3, 0x33, 0x36, 0x66, 0x63, 0xe3, 0xe6,
168  0xe1, 0xa4, 0x61, 0x34, 0x31, 0x64, 0xa1, 0xe4, 0x8d, 0xc9, 0x49, 0x1d,
169  0x4d, 0x19, 0x89, 0xcd, 0x8b, 0x8f, 0x1b, 0x1f, 0x4b, 0x4f, 0xcb, 0xcf,
170  0x85, 0xc0, 0x40, 0x15, 0x45, 0x10, 0x80, 0xc5, 0x82, 0x87, 0x12, 0x17,
171  0x42, 0x47, 0xc2, 0xc7, 0x96, 0x93, 0x03, 0x06, 0x56, 0x53, 0xd3, 0xd6,
172  0xd1, 0x94, 0x51, 0x04, 0x01, 0x54, 0x91, 0xd4, 0x9c, 0xd8, 0x58, 0x0c,
173  0x5c, 0x08, 0x98, 0xdc, 0x9a, 0x9e, 0x0a, 0x0e, 0x5a, 0x5e, 0xda, 0xde,
174  0x95, 0xd0, 0x50, 0x05, 0x55, 0x00, 0x90, 0xd5, 0x92, 0x97, 0x02, 0x07,
175  0x52, 0x57, 0xd2, 0xd7, 0x9d, 0xd9, 0x59, 0x0d, 0x5d, 0x09, 0x99, 0xdd,
176  0x9b, 0x9f, 0x0b, 0x0f, 0x5b, 0x5f, 0xdb, 0xdf, 0x16, 0x13, 0x83, 0x86,
177  0x46, 0x43, 0xc3, 0xc6, 0x41, 0x14, 0xc1, 0x84, 0x11, 0x44, 0x81, 0xc4,
178  0x1c, 0x48, 0xc8, 0x8c, 0x4c, 0x18, 0x88, 0xcc, 0x1a, 0x1e, 0x8a, 0x8e,
179  0x4a, 0x4e, 0xca, 0xce, 0x35, 0x60, 0xe0, 0xa5, 0x65, 0x30, 0xa0, 0xe5,
180  0x32, 0x37, 0xa2, 0xa7, 0x62, 0x67, 0xe2, 0xe7, 0x3d, 0x69, 0xe9, 0xad,
181  0x6d, 0x39, 0xa9, 0xed, 0x3b, 0x3f, 0xab, 0xaf, 0x6b, 0x6f, 0xeb, 0xef,
182  0x26, 0x23, 0xb3, 0xb6, 0x76, 0x73, 0xf3, 0xf6, 0x71, 0x24, 0xf1, 0xb4,
183  0x21, 0x74, 0xb1, 0xf4, 0x2c, 0x78, 0xf8, 0xbc, 0x7c, 0x28, 0xb8, 0xfc,
184  0x2a, 0x2e, 0xba, 0xbe, 0x7a, 0x7e, 0xfa, 0xfe, 0x25, 0x70, 0xf0, 0xb5,
185  0x75, 0x20, 0xb0, 0xf5, 0x22, 0x27, 0xb2, 0xb7, 0x72, 0x77, 0xf2, 0xf7,
186  0x2d, 0x79, 0xf9, 0xbd, 0x7d, 0x29, 0xb9, 0xfd, 0x2b, 0x2f, 0xbb, 0xbf,
187  0x7b, 0x7f, 0xfb, 0xff,
188 };
189 
190 // Figure out how to do lookups from a pgmspace sbox table on this platform.
191 #if defined(RAMPZ)
192 #define SBOX(reg) \
193  "mov r30," reg "\n" \
194  "elpm " reg ",Z\n"
195 #elif defined(__AVR_HAVE_LPMX__)
196 #define SBOX(reg) \
197  "mov r30," reg "\n" \
198  "lpm " reg ",Z\n"
199 #elif defined(__AVR_TINY__)
200 #define SBOX(reg) \
201  "mov r30," reg "\n" \
202  "ld " reg ",Z\n"
203 #else
204 #define SBOX(reg) \
205  "mov r30," reg "\n" \
206  "lpm\n" \
207  "mov " reg ",r0\n"
208 #endif
209 
210 // Mix the columns during an encryption round.
211 #define MIX_COLUMNS(row0, row1, row2, row3) \
212  "eor " row1 "," row2 "\n" \
213  "eor " row2 "," row0 "\n" \
214  "mov __tmp_reg__," row3 "\n" \
215  "eor __tmp_reg__," row2 "\n" \
216  "mov " row3 "," row2 "\n" \
217  "mov " row2 "," row1 "\n" \
218  "mov " row1 "," row0 "\n" \
219  "mov " row0 ",__tmp_reg__\n"
220 
221 // Inverse mix of the columns during a decryption round.
222 #define MIX_COLUMNS_INV(row0, row1, row2, row3) \
223  "mov __tmp_reg__," row3 "\n" \
224  "mov " row3 "," row0 "\n" \
225  "mov " row0 "," row1 "\n" \
226  "mov " row1 "," row2 "\n" \
227  "eor " row3 ",__tmp_reg__\n" \
228  "eor __tmp_reg__," row0 "\n" \
229  "mov " row2 ",__tmp_reg__\n" \
230  "eor " row1 "," row2 "\n"
231 
232 #else // !USE_AVR_INLINE_ASM
233 
234 inline uint32_t skinny128_sbox(uint32_t x)
235 {
236  /* Original version from the specification is equivalent to:
237  *
238  * #define SBOX_MIX(x)
239  * (((~((((x) >> 1) | (x)) >> 2)) & 0x11111111U) ^ (x))
240  * #define SBOX_SWAP(x)
241  * (((x) & 0xF9F9F9F9U) |
242  * (((x) >> 1) & 0x02020202U) |
243  * (((x) << 1) & 0x04040404U))
244  * #define SBOX_PERMUTE(x)
245  * ((((x) & 0x01010101U) << 2) |
246  * (((x) & 0x06060606U) << 5) |
247  * (((x) & 0x20202020U) >> 5) |
248  * (((x) & 0xC8C8C8C8U) >> 2) |
249  * (((x) & 0x10101010U) >> 1))
250  *
251  * x = SBOX_MIX(x);
252  * x = SBOX_PERMUTE(x);
253  * x = SBOX_MIX(x);
254  * x = SBOX_PERMUTE(x);
255  * x = SBOX_MIX(x);
256  * x = SBOX_PERMUTE(x);
257  * x = SBOX_MIX(x);
258  * return SBOX_SWAP(x);
259  *
260  * However, we can mix the bits in their original positions and then
261  * delay the SBOX_PERMUTE and SBOX_SWAP steps to be performed with one
262  * final permuatation. This reduces the number of shift operations.
263  */
264  uint32_t y;
265 
266  /* Mix the bits */
267  x = ~x;
268  x ^= (((x >> 2) & (x >> 3)) & 0x11111111U);
269  y = (((x << 5) & (x << 1)) & 0x20202020U);
270  x ^= (((x << 5) & (x << 4)) & 0x40404040U) ^ y;
271  y = (((x << 2) & (x << 1)) & 0x80808080U);
272  x ^= (((x >> 2) & (x << 1)) & 0x02020202U) ^ y;
273  y = (((x >> 5) & (x << 1)) & 0x04040404U);
274  x ^= (((x >> 1) & (x >> 2)) & 0x08080808U) ^ y;
275  x = ~x;
276 
277  /* Permutation generated by http://programming.sirrida.de/calcperm.php
278  The final permutation for each byte is [2 7 6 1 3 0 4 5] */
279  return ((x & 0x08080808U) << 1) |
280  ((x & 0x32323232U) << 2) |
281  ((x & 0x01010101U) << 5) |
282  ((x & 0x80808080U) >> 6) |
283  ((x & 0x40404040U) >> 4) |
284  ((x & 0x04040404U) >> 2);
285 }
286 
287 inline uint32_t skinny128_inv_sbox(uint32_t x)
288 {
289  /* Original version from the specification is equivalent to:
290  *
291  * #define SBOX_MIX(x)
292  * (((~((((x) >> 1) | (x)) >> 2)) & 0x11111111U) ^ (x))
293  * #define SBOX_SWAP(x)
294  * (((x) & 0xF9F9F9F9U) |
295  * (((x) >> 1) & 0x02020202U) |
296  * (((x) << 1) & 0x04040404U))
297  * #define SBOX_PERMUTE_INV(x)
298  * ((((x) & 0x08080808U) << 1) |
299  * (((x) & 0x32323232U) << 2) |
300  * (((x) & 0x01010101U) << 5) |
301  * (((x) & 0xC0C0C0C0U) >> 5) |
302  * (((x) & 0x04040404U) >> 2))
303  *
304  * x = SBOX_SWAP(x);
305  * x = SBOX_MIX(x);
306  * x = SBOX_PERMUTE_INV(x);
307  * x = SBOX_MIX(x);
308  * x = SBOX_PERMUTE_INV(x);
309  * x = SBOX_MIX(x);
310  * x = SBOX_PERMUTE_INV(x);
311  * return SBOX_MIX(x);
312  *
313  * However, we can mix the bits in their original positions and then
314  * delay the SBOX_PERMUTE_INV and SBOX_SWAP steps to be performed with one
315  * final permuatation. This reduces the number of shift operations.
316  */
317  uint32_t y;
318 
319  /* Mix the bits */
320  x = ~x;
321  y = (((x >> 1) & (x >> 3)) & 0x01010101U);
322  x ^= (((x >> 2) & (x >> 3)) & 0x10101010U) ^ y;
323  y = (((x >> 6) & (x >> 1)) & 0x02020202U);
324  x ^= (((x >> 1) & (x >> 2)) & 0x08080808U) ^ y;
325  y = (((x << 2) & (x << 1)) & 0x80808080U);
326  x ^= (((x >> 1) & (x << 2)) & 0x04040404U) ^ y;
327  y = (((x << 5) & (x << 1)) & 0x20202020U);
328  x ^= (((x << 4) & (x << 5)) & 0x40404040U) ^ y;
329  x = ~x;
330 
331  /* Permutation generated by http://programming.sirrida.de/calcperm.php
332  The final permutation for each byte is [5 3 0 4 6 7 2 1] */
333  return ((x & 0x01010101U) << 2) |
334  ((x & 0x04040404U) << 4) |
335  ((x & 0x02020202U) << 6) |
336  ((x & 0x20202020U) >> 5) |
337  ((x & 0xC8C8C8C8U) >> 2) |
338  ((x & 0x10101010U) >> 1);
339 }
340 
341 #endif // !USE_AVR_INLINE_ASM
342 
343 void Skinny128::encryptBlock(uint8_t *output, const uint8_t *input)
344 {
345 #if USE_AVR_INLINE_ASM
346 #if defined(RAMPZ)
347  uint32_t sbox_addr = (uint32_t)sbox;
348 #else
349  uint16_t sbox_addr = (uint16_t)sbox;
350 #endif
351  __asm__ __volatile__ (
352  // Load the input block from Z[0..15] into r8..r23.
353  "ld r8,Z\n"
354  "ldd r9,Z+1\n"
355  "ldd r10,Z+2\n"
356  "ldd r11,Z+3\n"
357  "ldd r12,Z+4\n"
358  "ldd r13,Z+5\n"
359  "ldd r14,Z+6\n"
360  "ldd r15,Z+7\n"
361  "ldd r16,Z+8\n"
362  "ldd r17,Z+9\n"
363  "ldd r18,Z+10\n"
364  "ldd r19,Z+11\n"
365  "ldd r20,Z+12\n"
366  "ldd r21,Z+13\n"
367  "ldd r22,Z+14\n"
368  "ldd r23,Z+15\n"
369 
370  // Set up Z to point to the start of the sbox table.
371  "ldd r30,%A3\n"
372  "ldd r31,%B3\n"
373 #if defined(RAMPZ)
374  "in __tmp_reg__,%5\n"
375  "push __tmp_reg__\n"
376  "ldd __tmp_reg__,%C3\n"
377  "out %5,__tmp_reg__\n"
378 #endif
379 
380  // Top of the loop.
381  "1:\n"
382 
383  // Transform the state using the sbox.
384  SBOX("r8")
385  SBOX("r9")
386  SBOX("r10")
387  SBOX("r11")
388  SBOX("r12")
389  SBOX("r13")
390  SBOX("r14")
391  SBOX("r15")
392  SBOX("r16")
393  SBOX("r17")
394  SBOX("r18")
395  SBOX("r19")
396  SBOX("r20")
397  SBOX("r21")
398  SBOX("r22")
399  SBOX("r23")
400 
401  // XOR the state with the key schedule.
402  "ld __tmp_reg__,X+\n"
403  "eor r8,__tmp_reg__\n"
404  "ld __tmp_reg__,X+\n"
405  "eor r9,__tmp_reg__\n"
406  "ld __tmp_reg__,X+\n"
407  "eor r10,__tmp_reg__\n"
408  "ld __tmp_reg__,X+\n"
409  "eor r11,__tmp_reg__\n"
410  "ld __tmp_reg__,X+\n"
411  "eor r12,__tmp_reg__\n"
412  "ld __tmp_reg__,X+\n"
413  "eor r13,__tmp_reg__\n"
414  "ld __tmp_reg__,X+\n"
415  "eor r14,__tmp_reg__\n"
416  "ld __tmp_reg__,X+\n"
417  "eor r15,__tmp_reg__\n"
418  "ldi r24,0x02\n"
419  "eor r16,r24\n"
420 
421  // Shift the rows.
422  "mov __tmp_reg__,r15\n"
423  "mov r15,r14\n"
424  "mov r14,r13\n"
425  "mov r13,r12\n"
426  "mov r12,__tmp_reg__\n"
427  "mov __tmp_reg__,r19\n"
428  "mov r19,r17\n"
429  "mov r17,__tmp_reg__\n"
430  "mov __tmp_reg__,r18\n"
431  "mov r18,r16\n"
432  "mov r16,__tmp_reg__\n"
433  "mov __tmp_reg__,r20\n"
434  "mov r20,r21\n"
435  "mov r21,r22\n"
436  "mov r22,r23\n"
437  "mov r23,__tmp_reg__\n"
438 
439  // Mix the columns.
440  MIX_COLUMNS( "r8", "r12", "r16", "r20")
441  MIX_COLUMNS( "r9", "r13", "r17", "r21")
442  MIX_COLUMNS("r10", "r14", "r18", "r22")
443  MIX_COLUMNS("r11", "r15", "r19", "r23")
444 
445  // Bottom of the loop.
446  "dec %4\n"
447  "breq 2f\n"
448  "rjmp 1b\n"
449  "2:\n"
450 
451  // Restore the original RAMPZ value.
452 #if defined(RAMPZ)
453  "pop __tmp_reg__\n"
454  "out %5,__tmp_reg__\n"
455 #endif
456 
457  // Store the final state into the output buffer.
458  "ldd r30,%A2\n"
459  "ldd r31,%B2\n"
460  "st Z,r8\n"
461  "std Z+1,r9\n"
462  "std Z+2,r10\n"
463  "std Z+3,r11\n"
464  "std Z+4,r12\n"
465  "std Z+5,r13\n"
466  "std Z+6,r14\n"
467  "std Z+7,r15\n"
468  "std Z+8,r16\n"
469  "std Z+9,r17\n"
470  "std Z+10,r18\n"
471  "std Z+11,r19\n"
472  "std Z+12,r20\n"
473  "std Z+13,r21\n"
474  "std Z+14,r22\n"
475  "std Z+15,r23\n"
476 
477  : : "x"(s), "z"(input), "Q"(output), "Q"(sbox_addr),
478  "r"((uint8_t)r)
479 #if defined(RAMPZ)
480  , "I" (_SFR_IO_ADDR(RAMPZ))
481 #endif
482  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
483  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
484  "r24", "memory"
485  );
486 #else // !USE_AVR_INLINE_ASM
487  uint32_t state[4];
488  const uint32_t *schedule;
489  uint32_t temp;
490 
491  // Unpack the input block into the state array.
492  // Easy since we assume the platform is little-endian.
493  memcpy(state, input, sizeof(state));
494 
495  // Perform all encryption rounds.
496  schedule = s;
497  for (uint8_t index = r; index > 0; --index, schedule += 2) {
498  // Apply the S-box to all bytes in the state.
499  state[0] = skinny128_sbox(state[0]);
500  state[1] = skinny128_sbox(state[1]);
501  state[2] = skinny128_sbox(state[2]);
502  state[3] = skinny128_sbox(state[3]);
503 
504  // Apply the subkey for this round.
505  state[0] ^= schedule[0];
506  state[1] ^= schedule[1];
507  state[2] ^= 0x02;
508 
509  // Shift the cells in the rows right, which moves the cell
510  // values up closer to the MSB. That is, we do a left rotate
511  // on the word to rotate the cells in the word right.
512  state[1] = leftRotate8(state[1]);
513  state[2] = leftRotate16(state[2]);
514  state[3] = leftRotate24(state[3]);
515 
516  // Mix the columns.
517  state[1] ^= state[2];
518  state[2] ^= state[0];
519  temp = state[3] ^ state[2];
520  state[3] = state[2];
521  state[2] = state[1];
522  state[1] = state[0];
523  state[0] = temp;
524  }
525 
526  // Pack the result into the output buffer.
527  memcpy(output, state, sizeof(state));
528 #endif // !USE_AVR_INLINE_ASM
529 }
530 
531 void Skinny128::decryptBlock(uint8_t *output, const uint8_t *input)
532 {
533 #if USE_AVR_INLINE_ASM
534 #if defined(RAMPZ)
535  uint32_t sbox_addr = (uint32_t)sbox_inv;
536 #else
537  uint16_t sbox_addr = (uint16_t)sbox_inv;
538 #endif
539  __asm__ __volatile__ (
540  // Load the input block from Z[0..15] into r8..r23.
541  "ld r8,Z\n"
542  "ldd r9,Z+1\n"
543  "ldd r10,Z+2\n"
544  "ldd r11,Z+3\n"
545  "ldd r12,Z+4\n"
546  "ldd r13,Z+5\n"
547  "ldd r14,Z+6\n"
548  "ldd r15,Z+7\n"
549  "ldd r16,Z+8\n"
550  "ldd r17,Z+9\n"
551  "ldd r18,Z+10\n"
552  "ldd r19,Z+11\n"
553  "ldd r20,Z+12\n"
554  "ldd r21,Z+13\n"
555  "ldd r22,Z+14\n"
556  "ldd r23,Z+15\n"
557 
558  // Set up Z to point to the start of the sbox table.
559  "ldd r30,%A3\n"
560  "ldd r31,%B3\n"
561 #if defined(RAMPZ)
562  "in __tmp_reg__,%5\n"
563  "push __tmp_reg__\n"
564  "ldd __tmp_reg__,%C3\n"
565  "out %5,__tmp_reg__\n"
566 #endif
567 
568  // Top of the loop.
569  "1:\n"
570 
571  // Inverse mix of the columns.
572  MIX_COLUMNS_INV( "r8", "r12", "r16", "r20")
573  MIX_COLUMNS_INV( "r9", "r13", "r17", "r21")
574  MIX_COLUMNS_INV("r10", "r14", "r18", "r22")
575  MIX_COLUMNS_INV("r11", "r15", "r19", "r23")
576 
577  // Inverse shift of the rows.
578  "mov __tmp_reg__,r12\n"
579  "mov r12,r13\n"
580  "mov r13,r14\n"
581  "mov r14,r15\n"
582  "mov r15,__tmp_reg__\n"
583  "mov __tmp_reg__,r19\n"
584  "mov r19,r17\n"
585  "mov r17,__tmp_reg__\n"
586  "mov __tmp_reg__,r18\n"
587  "mov r18,r16\n"
588  "mov r16,__tmp_reg__\n"
589  "mov __tmp_reg__,r23\n"
590  "mov r23,r22\n"
591  "mov r22,r21\n"
592  "mov r21,r20\n"
593  "mov r20,__tmp_reg__\n"
594 
595  // XOR the state with the key schedule.
596  "ld __tmp_reg__,-X\n"
597  "eor r15,__tmp_reg__\n"
598  "ld __tmp_reg__,-X\n"
599  "eor r14,__tmp_reg__\n"
600  "ld __tmp_reg__,-X\n"
601  "eor r13,__tmp_reg__\n"
602  "ld __tmp_reg__,-X\n"
603  "eor r12,__tmp_reg__\n"
604  "ld __tmp_reg__,-X\n"
605  "eor r11,__tmp_reg__\n"
606  "ld __tmp_reg__,-X\n"
607  "eor r10,__tmp_reg__\n"
608  "ld __tmp_reg__,-X\n"
609  "eor r9,__tmp_reg__\n"
610  "ld __tmp_reg__,-X\n"
611  "eor r8,__tmp_reg__\n"
612  "ldi r24,0x02\n"
613  "eor r16,r24\n"
614 
615  // Transform the state using the inverse sbox.
616  SBOX("r8")
617  SBOX("r9")
618  SBOX("r10")
619  SBOX("r11")
620  SBOX("r12")
621  SBOX("r13")
622  SBOX("r14")
623  SBOX("r15")
624  SBOX("r16")
625  SBOX("r17")
626  SBOX("r18")
627  SBOX("r19")
628  SBOX("r20")
629  SBOX("r21")
630  SBOX("r22")
631  SBOX("r23")
632 
633  // Bottom of the loop.
634  "dec %4\n"
635  "breq 2f\n"
636  "rjmp 1b\n"
637  "2:\n"
638 
639  // Restore the original RAMPZ value.
640 #if defined(RAMPZ)
641  "pop __tmp_reg__\n"
642  "out %5,__tmp_reg__\n"
643 #endif
644 
645  // Store the final state into the output buffer.
646  "ldd r30,%A2\n"
647  "ldd r31,%B2\n"
648  "st Z,r8\n"
649  "std Z+1,r9\n"
650  "std Z+2,r10\n"
651  "std Z+3,r11\n"
652  "std Z+4,r12\n"
653  "std Z+5,r13\n"
654  "std Z+6,r14\n"
655  "std Z+7,r15\n"
656  "std Z+8,r16\n"
657  "std Z+9,r17\n"
658  "std Z+10,r18\n"
659  "std Z+11,r19\n"
660  "std Z+12,r20\n"
661  "std Z+13,r21\n"
662  "std Z+14,r22\n"
663  "std Z+15,r23\n"
664 
665  : : "x"(s + r * 2), "z"(input), "Q"(output), "Q"(sbox_addr),
666  "r"((uint8_t)r)
667 #if defined(RAMPZ)
668  , "I" (_SFR_IO_ADDR(RAMPZ))
669 #endif
670  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
671  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
672  "r24", "memory"
673  );
674 #else // !USE_AVR_INLINE_ASM
675  uint32_t state[4];
676  const uint32_t *schedule;
677  uint32_t temp;
678 
679  // Unpack the input block into the state array.
680  // Easy since we assume the platform is little-endian.
681  memcpy(state, input, sizeof(state));
682 
683  /* Perform all decryption rounds */
684  schedule = &(s[r * 2 - 2]);
685  for (uint8_t index = r; index > 0; --index, schedule -= 2) {
686  // Inverse mix of the columns.
687  temp = state[3];
688  state[3] = state[0];
689  state[0] = state[1];
690  state[1] = state[2];
691  state[3] ^= temp;
692  state[2] = temp ^ state[0];
693  state[1] ^= state[2];
694 
695  // Inverse shift of the rows.
696  state[1] = leftRotate24(state[1]);
697  state[2] = leftRotate16(state[2]);
698  state[3] = leftRotate8(state[3]);
699 
700  // Apply the subkey for this round.
701  state[0] ^= schedule[0];
702  state[1] ^= schedule[1];
703  state[2] ^= 0x02;
704 
705  // Apply the inverse of the S-box to all bytes in the state.
706  state[0] = skinny128_inv_sbox(state[0]);
707  state[1] = skinny128_inv_sbox(state[1]);
708  state[2] = skinny128_inv_sbox(state[2]);
709  state[3] = skinny128_inv_sbox(state[3]);
710  }
711 
712  // Pack the result into the output buffer.
713  memcpy(output, state, sizeof(state));
714 #endif // !USE_AVR_INLINE_ASM
715 }
716 
718 {
719  clean(s, r * 2 * sizeof(uint32_t));
720 }
721 
722 #if USE_AVR_INLINE_ASM
723 
724 // Permutes the bytes within a TKn value while expanding the key schedule.
725 // PT = [9, 15, 8, 13, 10, 14, 12, 11, 0, 1, 2, 3, 4, 5, 6, 7]
726 #define PERMUTE_TKn() \
727  "mov __tmp_reg__,r8\n" /* tmp = TK[0] */ \
728  "mov r8,r17\n" /* TK[0] = TK[9] */ \
729  "mov r17,r9\n" /* TK[9] = TK[1] */ \
730  "mov r9,r23\n" /* TK[1] = TK[15] */ \
731  "mov r23,r15\n" /* TK[15] = TK[7] */ \
732  "mov r15,r19\n" /* TK[7] = TK[11] */ \
733  "mov r19,r11\n" /* TK[11] = TK[3] */ \
734  "mov r11,r21\n" /* TK[3] = TK[13] */ \
735  "mov r21,r13\n" /* TK[13] = TK[5] */ \
736  "mov r13,r22\n" /* TK[5] = TK[14] */ \
737  "mov r22,r14\n" /* TK[14] = TK[6] */ \
738  "mov r14,r20\n" /* TK[6] = TK[12] */ \
739  "mov r20,r12\n" /* TK[12] = TK[4]) */ \
740  "mov r12,r18\n" /* TK[4] = TK[10] */ \
741  "mov r18,r10\n" /* TK[10] = TK[2] */ \
742  "mov r10,r16\n" /* TK[2] = TK[8] */ \
743  "mov r16,__tmp_reg__\n" /* TK[8] = tmp (original TK[0]) */
744 
745 #else // !USE_AVR_INLINE_ASM
746 
747 // Permutes the bytes within a TKn value while expanding the key schedule.
748 // PT = [9, 15, 8, 13, 10, 14, 12, 11, 0, 1, 2, 3, 4, 5, 6, 7]
749 #define skinny128_permute_tk(tk) \
750  do { \
751  uint32_t row2 = tk[2]; \
752  uint32_t row3 = tk[3]; \
753  tk[2] = tk[0]; \
754  tk[3] = tk[1]; \
755  row3 = (row3 << 16) | (row3 >> 16); \
756  tk[0] = ((row2 >> 8) & 0x000000FFU) | \
757  ((row2 << 16) & 0x00FF0000U) | \
758  ( row3 & 0xFF00FF00U); \
759  tk[1] = ((row2 >> 16) & 0x000000FFU) | \
760  (row2 & 0xFF000000U) | \
761  ((row3 << 8) & 0x0000FF00U) | \
762  ( row3 & 0x00FF0000U); \
763  } while (0)
764 
765 #endif // !USE_AVR_INLINE_ASM
766 
773 void Skinny128::setTK1(const uint8_t *key, bool tweaked)
774 {
775 #if USE_AVR_INLINE_ASM
776  __asm__ __volatile__ (
777  // Load the TK1 bytes into r8..r23.
778  "ld r8,Z\n"
779  "ldd r9,Z+1\n"
780  "ldd r10,Z+2\n"
781  "ldd r11,Z+3\n"
782  "ldd r12,Z+4\n"
783  "ldd r13,Z+5\n"
784  "ldd r14,Z+6\n"
785  "ldd r15,Z+7\n"
786  "ldd r16,Z+8\n"
787  "ldd r17,Z+9\n"
788  "ldd r18,Z+10\n"
789  "ldd r19,Z+11\n"
790  "ldd r20,Z+12\n"
791  "ldd r21,Z+13\n"
792  "ldd r22,Z+14\n"
793  "ldd r23,Z+15\n"
794 
795  // Set rc to zero (stored in r25).
796  "clr r25\n"
797 
798  // Top of the loop.
799  "1:\n"
800 
801  // Generate the rc value for the next round.
802  // rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01;
803  // We don't need to do "rc &= 0x3F" because it is effectively
804  // done for us by "andi" instructions in the following step.
805  "clr r24\n"
806  "lsl r25\n"
807  "bst r25,6\n"
808  "bld r24,0\n"
809  "eor r25,r24\n"
810  "bst r25,5\n"
811  "bld r24,0\n"
812  "eor r25,r24\n"
813  "ldi r24,1\n"
814  "eor r25,r24\n"
815 
816  // Store the first 8 bytes of TK1 into the key schedule and XOR with rc.
817  "mov r24,r25\n"
818  "andi r24,0x0F\n"
819  "eor r24,r8\n"
820  "st X+,r24\n"
821  "st X+,r9\n"
822  "mov __tmp_reg__,%3\n"
823  "eor __tmp_reg__,r10\n"
824  "st X+,__tmp_reg__\n"
825  "st X+,r11\n"
826  "mov r24,r25\n"
827  "swap r24\n"
828  "andi r24,0x03\n"
829  "eor r24,r12\n"
830  "st X+,r24\n"
831  "st X+,r13\n"
832  "st X+,r14\n"
833  "st X+,r15\n"
834 
835  // Permute TK1 for the next round.
836  PERMUTE_TKn()
837 
838  // Bottom of the loop.
839  "dec %2\n"
840  "breq 2f\n"
841  "rjmp 1b\n"
842  "2:\n"
843 
844  : : "x"(s), "z"(key), "r"(r), "r"((uint8_t)(tweaked ? 0x02 : 0x00))
845  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
846  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
847  "r24", "r25", "memory"
848  );
849 #else // !USE_AVR_INLINE_ASM
850  uint32_t TK1[4];
851  uint32_t *schedule = s;
852  uint8_t rc = 0;
853 
854  // Unpack the incoming key value into the TK1 array.
855  // Easy since we assume the platform is little-endian.
856  memcpy(TK1, key, sizeof(TK1));
857 
858  // Generate the key schedule words for all rounds.
859  for (uint8_t index = r; index > 0; --index, schedule += 2) {
860  // XOR the round constants with the current schedule words.
861  // The round constants for the 3rd and 4th rows are
862  // fixed and will be applied during encrypt/decrypt.
863  rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01;
864  rc &= 0x3F;
865  schedule[0] = TK1[0] ^ (rc & 0x0F);
866  schedule[1] = TK1[1] ^ (rc >> 4);
867 
868  // If we have a tweak, then we need to XOR a 1 bit into the
869  // second bit of the top cell of the third column as recommended
870  // by the SKINNY specification.
871  if (tweaked)
872  schedule[0] ^= 0x00020000;
873 
874  // Permute TK1 for the next round.
875  skinny128_permute_tk(TK1);
876  }
877 
878  // Clean up and exit.
879  clean(TK1);
880 #endif // !USE_AVR_INLINE_ASM
881 }
882 
891 void Skinny128::xorTK1(const uint8_t *key)
892 {
893 #if USE_AVR_INLINE_ASM
894  __asm__ __volatile__ (
895  // Load the TK1 bytes into r8..r23.
896  "ld r8,Z\n"
897  "ldd r9,Z+1\n"
898  "ldd r10,Z+2\n"
899  "ldd r11,Z+3\n"
900  "ldd r12,Z+4\n"
901  "ldd r13,Z+5\n"
902  "ldd r14,Z+6\n"
903  "ldd r15,Z+7\n"
904  "ldd r16,Z+8\n"
905  "ldd r17,Z+9\n"
906  "ldd r18,Z+10\n"
907  "ldd r19,Z+11\n"
908  "ldd r20,Z+12\n"
909  "ldd r21,Z+13\n"
910  "ldd r22,Z+14\n"
911  "ldd r23,Z+15\n"
912 
913  // Top of the loop.
914  "1:\n"
915 
916  // XOR the first two rows of TK1 with the key schedule.
917  "ld __tmp_reg__,X\n"
918  "eor __tmp_reg__,r8\n"
919  "st X+,__tmp_reg__\n"
920  "ld __tmp_reg__,X\n"
921  "eor __tmp_reg__,r9\n"
922  "st X+,__tmp_reg__\n"
923  "ld __tmp_reg__,X\n"
924  "eor __tmp_reg__,r10\n"
925  "st X+,__tmp_reg__\n"
926  "ld __tmp_reg__,X\n"
927  "eor __tmp_reg__,r11\n"
928  "st X+,__tmp_reg__\n"
929  "ld __tmp_reg__,X\n"
930  "eor __tmp_reg__,r12\n"
931  "st X+,__tmp_reg__\n"
932  "ld __tmp_reg__,X\n"
933  "eor __tmp_reg__,r13\n"
934  "st X+,__tmp_reg__\n"
935  "ld __tmp_reg__,X\n"
936  "eor __tmp_reg__,r14\n"
937  "st X+,__tmp_reg__\n"
938  "ld __tmp_reg__,X\n"
939  "eor __tmp_reg__,r15\n"
940  "st X+,__tmp_reg__\n"
941 
942  // Permute TK1 for the next round.
943  PERMUTE_TKn()
944 
945  // Bottom of the loop.
946  "dec %2\n"
947  "breq 2f\n"
948  "rjmp 1b\n"
949  "2:\n"
950 
951  : : "x"(s), "z"(key), "r"(r)
952  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
953  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
954  "r24", "memory"
955  );
956 #else // !USE_AVR_INLINE_ASM
957  uint32_t TK1[4];
958  uint32_t *schedule = s;
959 
960  // Unpack the incoming key value into the TK1 array.
961  // Easy since we assume the platform is little-endian.
962  memcpy(TK1, key, sizeof(TK1));
963 
964  // XOR against the key schedule words for all rounds.
965  for (uint8_t index = r; index > 0; --index, schedule += 2) {
966  schedule[0] ^= TK1[0];
967  schedule[1] ^= TK1[1];
968  skinny128_permute_tk(TK1);
969  }
970 
971  // Clean up and exit.
972  clean(TK1);
973 #endif // !USE_AVR_INLINE_ASM
974 }
975 
976 #if USE_AVR_INLINE_ASM
977 
978 // Transform the contents of a register using LFSR2 (r24 assumed to be zero).
979 #define LFSR2(reg) \
980  "lsl " reg "\n" \
981  "adc " reg ",__zero_reg__\n" \
982  "bst " reg ",6\n" \
983  "bld r24,0\n" \
984  "eor " reg ",r24\n"
985 
986 // Transform the contents of a register using LFSR3 (r24 assumed to be zero).
987 #define LFSR3(reg) \
988  "bst " reg ",0\n" \
989  "lsr " reg "\n" \
990  "bld " reg ",7\n" \
991  "bst " reg ",5\n" \
992  "bld r24,7\n" \
993  "eor " reg ",r24\n"
994 
995 #else // !USE_AVR_INLINE_ASM
996 
997 inline uint32_t skinny128_LFSR2(uint32_t x)
998 {
999  return ((x << 1) & 0xFEFEFEFEU) ^ (((x >> 7) ^ (x >> 5)) & 0x01010101U);
1000 }
1001 
1002 inline uint32_t skinny128_LFSR3(uint32_t x)
1003 {
1004  return ((x >> 1) & 0x7F7F7F7FU) ^ (((x << 7) ^ (x << 1)) & 0x80808080U);
1005 }
1006 
1007 #endif // !USE_AVR_INLINE_ASM
1008 
1014 void Skinny128::setTK2(const uint8_t *key)
1015 {
1016 #if USE_AVR_INLINE_ASM
1017  __asm__ __volatile__ (
1018  // Load the TK2 bytes into r8..r23.
1019  "ld r8,Z\n"
1020  "ldd r9,Z+1\n"
1021  "ldd r10,Z+2\n"
1022  "ldd r11,Z+3\n"
1023  "ldd r12,Z+4\n"
1024  "ldd r13,Z+5\n"
1025  "ldd r14,Z+6\n"
1026  "ldd r15,Z+7\n"
1027  "ldd r16,Z+8\n"
1028  "ldd r17,Z+9\n"
1029  "ldd r18,Z+10\n"
1030  "ldd r19,Z+11\n"
1031  "ldd r20,Z+12\n"
1032  "ldd r21,Z+13\n"
1033  "ldd r22,Z+14\n"
1034  "ldd r23,Z+15\n"
1035 
1036  // Top of the loop.
1037  "1:\n"
1038 
1039  // XOR the first two rows of TK2 with the key schedule.
1040  "ld __tmp_reg__,X\n"
1041  "eor __tmp_reg__,r8\n"
1042  "st X+,__tmp_reg__\n"
1043  "ld __tmp_reg__,X\n"
1044  "eor __tmp_reg__,r9\n"
1045  "st X+,__tmp_reg__\n"
1046  "ld __tmp_reg__,X\n"
1047  "eor __tmp_reg__,r10\n"
1048  "st X+,__tmp_reg__\n"
1049  "ld __tmp_reg__,X\n"
1050  "eor __tmp_reg__,r11\n"
1051  "st X+,__tmp_reg__\n"
1052  "ld __tmp_reg__,X\n"
1053  "eor __tmp_reg__,r12\n"
1054  "st X+,__tmp_reg__\n"
1055  "ld __tmp_reg__,X\n"
1056  "eor __tmp_reg__,r13\n"
1057  "st X+,__tmp_reg__\n"
1058  "ld __tmp_reg__,X\n"
1059  "eor __tmp_reg__,r14\n"
1060  "st X+,__tmp_reg__\n"
1061  "ld __tmp_reg__,X\n"
1062  "eor __tmp_reg__,r15\n"
1063  "st X+,__tmp_reg__\n"
1064 
1065  // Permute TK2 for the next round.
1066  PERMUTE_TKn()
1067 
1068  // Apply LFSR2 to the first two rows of TK2.
1069  "clr r24\n"
1070  LFSR2("r8")
1071  LFSR2("r9")
1072  LFSR2("r10")
1073  LFSR2("r11")
1074  LFSR2("r12")
1075  LFSR2("r13")
1076  LFSR2("r14")
1077  LFSR2("r15")
1078 
1079  // Bottom of the loop.
1080  "dec %2\n"
1081  "breq 2f\n"
1082  "rjmp 1b\n"
1083  "2:\n"
1084 
1085  : : "x"(s), "z"(key), "r"(r)
1086  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
1087  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
1088  "r24", "memory"
1089  );
1090 #else // !USE_AVR_INLINE_ASM
1091  uint32_t TK2[4];
1092  uint32_t *schedule = s;
1093 
1094  // Unpack the incoming key value into the TK2 array.
1095  // Easy since we assume the platform is little-endian.
1096  memcpy(TK2, key, sizeof(TK2));
1097 
1098  // XOR against the key schedule words for all rounds.
1099  for (uint8_t index = r; index > 0; --index, schedule += 2) {
1100  // XOR TK2 against the key schedule.
1101  schedule[0] ^= TK2[0];
1102  schedule[1] ^= TK2[1];
1103 
1104  // Permute TK2 for the next round.
1105  skinny128_permute_tk(TK2);
1106 
1107  // Apply LFSR2 to the first two rows of TK2.
1108  TK2[0] = skinny128_LFSR2(TK2[0]);
1109  TK2[1] = skinny128_LFSR2(TK2[1]);
1110  }
1111 
1112  // Clean up and exit.
1113  clean(TK2);
1114 #endif // !USE_AVR_INLINE_ASM
1115 }
1116 
1122 void Skinny128::setTK3(const uint8_t *key)
1123 {
1124 #if USE_AVR_INLINE_ASM
1125  __asm__ __volatile__ (
1126  // Load the TK3 bytes into r8..r23.
1127  "ld r8,Z\n"
1128  "ldd r9,Z+1\n"
1129  "ldd r10,Z+2\n"
1130  "ldd r11,Z+3\n"
1131  "ldd r12,Z+4\n"
1132  "ldd r13,Z+5\n"
1133  "ldd r14,Z+6\n"
1134  "ldd r15,Z+7\n"
1135  "ldd r16,Z+8\n"
1136  "ldd r17,Z+9\n"
1137  "ldd r18,Z+10\n"
1138  "ldd r19,Z+11\n"
1139  "ldd r20,Z+12\n"
1140  "ldd r21,Z+13\n"
1141  "ldd r22,Z+14\n"
1142  "ldd r23,Z+15\n"
1143 
1144  // Top of the loop.
1145  "1:\n"
1146 
1147  // XOR the first two rows of TK3 with the key schedule.
1148  "ld __tmp_reg__,X\n"
1149  "eor __tmp_reg__,r8\n"
1150  "st X+,__tmp_reg__\n"
1151  "ld __tmp_reg__,X\n"
1152  "eor __tmp_reg__,r9\n"
1153  "st X+,__tmp_reg__\n"
1154  "ld __tmp_reg__,X\n"
1155  "eor __tmp_reg__,r10\n"
1156  "st X+,__tmp_reg__\n"
1157  "ld __tmp_reg__,X\n"
1158  "eor __tmp_reg__,r11\n"
1159  "st X+,__tmp_reg__\n"
1160  "ld __tmp_reg__,X\n"
1161  "eor __tmp_reg__,r12\n"
1162  "st X+,__tmp_reg__\n"
1163  "ld __tmp_reg__,X\n"
1164  "eor __tmp_reg__,r13\n"
1165  "st X+,__tmp_reg__\n"
1166  "ld __tmp_reg__,X\n"
1167  "eor __tmp_reg__,r14\n"
1168  "st X+,__tmp_reg__\n"
1169  "ld __tmp_reg__,X\n"
1170  "eor __tmp_reg__,r15\n"
1171  "st X+,__tmp_reg__\n"
1172 
1173  // Permute TK3 for the next round.
1174  PERMUTE_TKn()
1175 
1176  // Apply LFSR3 to the first two rows of TK3.
1177  "clr r24\n"
1178  LFSR3("r8")
1179  LFSR3("r9")
1180  LFSR3("r10")
1181  LFSR3("r11")
1182  LFSR3("r12")
1183  LFSR3("r13")
1184  LFSR3("r14")
1185  LFSR3("r15")
1186 
1187  // Bottom of the loop.
1188  "dec %2\n"
1189  "breq 2f\n"
1190  "rjmp 1b\n"
1191  "2:\n"
1192 
1193  : : "x"(s), "z"(key), "r"(r)
1194  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
1195  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
1196  "r24", "memory"
1197  );
1198 #else // !USE_AVR_INLINE_ASM
1199  uint32_t TK3[4];
1200  uint32_t *schedule = s;
1201 
1202  // Unpack the incoming key value into the TK3 array.
1203  // Easy since we assume the platform is little-endian.
1204  memcpy(TK3, key, sizeof(TK3));
1205 
1206  // XOR against the key schedule words for all rounds.
1207  for (uint8_t index = r; index > 0; --index, schedule += 2) {
1208  // XOR TK2 against the key schedule.
1209  schedule[0] ^= TK3[0];
1210  schedule[1] ^= TK3[1];
1211 
1212  // Permute TK3 for the next round.
1213  skinny128_permute_tk(TK3);
1214 
1215  // Apply LFSR3 to the first two rows of TK3.
1216  TK3[0] = skinny128_LFSR3(TK3[0]);
1217  TK3[1] = skinny128_LFSR3(TK3[1]);
1218  }
1219 
1220  // Clean up and exit.
1221  clean(TK3);
1222 #endif // !USE_AVR_INLINE_ASM
1223 }
1224 
1231 Skinny128_Tweaked::Skinny128_Tweaked(uint32_t *schedule, uint8_t rounds)
1232  : Skinny128(schedule, rounds)
1233 {
1234 }
1235 
1241 {
1242  clean(t);
1243 }
1244 
1260 bool Skinny128_Tweaked::setTweak(const uint8_t *tweak, size_t len)
1261 {
1262  if (len != 16)
1263  return false;
1264  xorTK1(t);
1265  if (tweak) {
1266  memcpy(t, tweak, len);
1267  xorTK1(t);
1268  } else {
1269  memset(t, 0, sizeof(t));
1270  }
1271  return true;
1272 }
1273 
1275 {
1276  clean(t);
1277  Skinny128::clear();
1278 }
1279 
1286 {
1287  memset(t, 0, sizeof(t));
1288  setTK1(t, true);
1289 }
1290 
1295  : Skinny128(sched, 40)
1296 {
1297 }
1298 
1304 {
1305  clean(sched);
1306 }
1307 
1313 {
1314  return 16;
1315 }
1316 
1317 bool Skinny128_128::setKey(const uint8_t *key, size_t len)
1318 {
1319  if (len != 16)
1320  return false;
1321  setTK1(key);
1322  return true;
1323 }
1324 
1329  : Skinny128(sched, 48)
1330 {
1331 }
1332 
1338 {
1339  clean(sched);
1340 }
1341 
1347 {
1348  return 32;
1349 }
1350 
1351 bool Skinny128_256::setKey(const uint8_t *key, size_t len)
1352 {
1353  if (len != 32)
1354  return false;
1355  setTK1(key);
1356  setTK2(key + 16);
1357  return true;
1358 }
1359 
1365  : Skinny128_Tweaked(sched, 48)
1366 {
1367 }
1368 
1374 {
1375  clean(sched);
1376 }
1377 
1383 {
1384  return 16;
1385 }
1386 
1387 bool Skinny128_256_Tweaked::setKey(const uint8_t *key, size_t len)
1388 {
1389  if (len != 16)
1390  return false;
1391  resetTweak();
1392  setTK2(key);
1393  return true;
1394 }
1395 
1400  : Skinny128(sched, 56)
1401 {
1402 }
1403 
1409 {
1410  clean(sched);
1411 }
1412 
1418 {
1419  return 48;
1420 }
1421 
1422 bool Skinny128_384::setKey(const uint8_t *key, size_t len)
1423 {
1424  if (len != 48)
1425  return false;
1426  setTK1(key);
1427  setTK2(key + 16);
1428  setTK3(key + 32);
1429  return true;
1430 }
1431 
1437  : Skinny128_Tweaked(sched, 56)
1438 {
1439 }
1440 
1446 {
1447  clean(sched);
1448 }
1449 
1455 {
1456  return 32;
1457 }
1458 
1459 bool Skinny128_384_Tweaked::setKey(const uint8_t *key, size_t len)
1460 {
1461  if (len != 32)
1462  return false;
1463  resetTweak();
1464  setTK2(key);
1465  setTK3(key + 16);
1466  return true;
1467 }
Skinny128_Tweaked(uint32_t *schedule, uint8_t rounds)
Constructs a tweakable Skinny-128 block cipher object.
Definition: Skinny128.cpp:1231
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: Skinny128.cpp:1387
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: Skinny128.cpp:1317
size_t keySize() const
Size of a Skinny128_256 key in bytes.
Definition: Skinny128.cpp:1346
void xorTK1(const uint8_t *key)
XOR's the key schedule with the schedule for TK1.
Definition: Skinny128.cpp:891
size_t keySize() const
Size of a Skinny128_384 key in bytes.
Definition: Skinny128.cpp:1417
virtual ~Skinny128_256_Tweaked()
Destroys this tweakable Skinny-128 block cipher object after clearing sensitive information.
Definition: Skinny128.cpp:1373
size_t keySize() const
Size of a Skinny128_256_Tweaked key in bytes.
Definition: Skinny128.cpp:1382
void encryptBlock(uint8_t *output, const uint8_t *input)
Encrypts a single block using this cipher.
Definition: Skinny128.cpp:343
void setTK3(const uint8_t *key)
XOR's the key schedule with the schedule for TK3.
Definition: Skinny128.cpp:1122
virtual ~Skinny128_Tweaked()
Destroys this tweakable Skinny-128 block cipher object after clearing sensitive information.
Definition: Skinny128.cpp:1240
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: Skinny128.cpp:1422
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: Skinny128.cpp:1459
virtual ~Skinny128()
Destroys this Skinny-128 block cipher object after clearing sensitive information.
Definition: Skinny128.cpp:118
void clear()
Clears all security-sensitive state from this block cipher.
Definition: Skinny128.cpp:717
size_t keySize() const
Size of a Skinny128_128 key in bytes.
Definition: Skinny128.cpp:1312
Abstract base class for SKINNY tweakable block ciphers with 128-bit blocks.
Definition: Skinny128.h:53
virtual ~Skinny128_384()
Destroys this Skinny-128 block cipher object after clearing sensitive information.
Definition: Skinny128.cpp:1408
Skinny128_256()
Constructs a Skinny-128 block cipher with a 256-bit key.
Definition: Skinny128.cpp:1328
Skinny128_384_Tweaked()
Constructs a tweakable Skinny-128 block cipher with a 256-bit key and a 128-bit tweak.
Definition: Skinny128.cpp:1436
Skinny128_256_Tweaked()
Constructs a tweakable Skinny-128 block cipher with a 128-bit key and a 128-bit tweak.
Definition: Skinny128.cpp:1364
void setTK2(const uint8_t *key)
XOR's the key schedule with the schedule for TK2.
Definition: Skinny128.cpp:1014
Abstract base class for SKINNY block ciphers with 128-bit blocks.
Definition: Skinny128.h:28
Skinny128_128()
Constructs a Skinny-128 block cipher with a 128-bit key.
Definition: Skinny128.cpp:1294
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: Skinny128.cpp:1351
Skinny128(uint32_t *schedule, uint8_t rounds)
Constructs a Skinny-128 block cipher object.
Definition: Skinny128.cpp:109
void setTK1(const uint8_t *key, bool tweaked=false)
Clears the key schedule and sets it to the schedule for TK1.
Definition: Skinny128.cpp:773
virtual ~Skinny128_384_Tweaked()
Destroys this tweakable Skinny-128 block cipher object after clearing sensitive information.
Definition: Skinny128.cpp:1445
size_t blockSize() const
Size of a Skinny-128 block in bytes.
Definition: Skinny128.cpp:126
void resetTweak()
Resets the tweak to all-zeroes.
Definition: Skinny128.cpp:1285
virtual ~Skinny128_256()
Destroys this Skinny-128 block cipher object after clearing sensitive information.
Definition: Skinny128.cpp:1337
size_t keySize() const
Size of a Skinny128_384_Tweaked key in bytes.
Definition: Skinny128.cpp:1454
void decryptBlock(uint8_t *output, const uint8_t *input)
Decrypts a single block using this cipher.
Definition: Skinny128.cpp:531
virtual ~Skinny128_128()
Destroys this Skinny-128 block cipher object after clearing sensitive information.
Definition: Skinny128.cpp:1303
Skinny128_384()
Constructs a Skinny-128 block cipher with a 384-bit key.
Definition: Skinny128.cpp:1399
bool setTweak(const uint8_t *tweak, size_t len)
Sets the 128-bit tweak value for this block cipher.
Definition: Skinny128.cpp:1260
void clear()
Clears all security-sensitive state from this block cipher.
Definition: Skinny128.cpp:1274