Arduino Cryptography Library
SpeckSmall.cpp
1 /*
2  * Copyright (C) 2016 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "SpeckSmall.h"
24 #include "Crypto.h"
25 #include "utility/RotateUtil.h"
26 #include "utility/EndianUtil.h"
27 #include <string.h>
28 
57 // The "avr-gcc" compiler doesn't do a very good job of compiling
58 // code involving 64-bit values. So we have to use inline assembly.
59 // It also helps to break the state up into 32-bit quantities
60 // because "asm" supports register names like %A0, %B0, %C0, %D0
61 // for the bytes in a 32-bit quantity, but it does not support
62 // %E0, %F0, %G0, %H0 for the high bytes of a 64-bit quantity.
63 #if defined(__AVR__)
64 #define USE_AVR_INLINE_ASM 1
65 #endif
66 
67 // Pack/unpack byte-aligned big-endian 64-bit quantities.
68 #define pack64(data, value) \
69  do { \
70  uint64_t v = htobe64((value)); \
71  memcpy((data), &v, sizeof(uint64_t)); \
72  } while (0)
73 #define unpack64(value, data) \
74  do { \
75  memcpy(&(value), (data), sizeof(uint64_t)); \
76  (value) = be64toh((value)); \
77  } while (0)
78 
86 {
87 }
88 
89 SpeckSmall::~SpeckSmall()
90 {
91  clean(l);
92 }
93 
94 bool SpeckSmall::setKey(const uint8_t *key, size_t len)
95 {
96  // Try setting the key for the forward encryption direction.
97  if (!SpeckTiny::setKey(key, len))
98  return false;
99 
100 #if USE_AVR_INLINE_ASM
101  // Expand the key schedule to get the l and s values at the end
102  // of the schedule, which will allow us to reverse it later.
103  uint8_t mb = (rounds - 31) * 8;
104  __asm__ __volatile__ (
105  "ld r16,Z+\n" // s = k[0]
106  "ld r17,Z+\n"
107  "ld r18,Z+\n"
108  "ld r19,Z+\n"
109  "ld r20,Z+\n"
110  "ld r21,Z+\n"
111  "ld r22,Z+\n"
112  "ld r23,Z+\n"
113 
114  "mov r24,%3\n" // memcpy(l, k + 1, mb)
115  "3:\n"
116  "ld __tmp_reg__,Z+\n"
117  "st X+,__tmp_reg__\n"
118  "dec r24\n"
119  "brne 3b\n"
120  "sub %A1,%3\n" // return X to its initial value
121  "sbc %B1,__zero_reg__\n"
122 
123  "1:\n"
124 
125  // l[li_out] = (s + rightRotate8_64(l[li_in])) ^ i;
126  "add %A1,%2\n" // X = &(l[li_in])
127  "adc %B1,__zero_reg__\n"
128  "ld r15,X+\n" // x = rightRotate8_64(l[li_in])
129  "ld r8,X+\n"
130  "ld r9,X+\n"
131  "ld r10,X+\n"
132  "ld r11,X+\n"
133  "ld r12,X+\n"
134  "ld r13,X+\n"
135  "ld r14,X+\n"
136 
137  "add r8,r16\n" // x += s
138  "adc r9,r17\n"
139  "adc r10,r18\n"
140  "adc r11,r19\n"
141  "adc r12,r20\n"
142  "adc r13,r21\n"
143  "adc r14,r22\n"
144  "adc r15,r23\n"
145 
146  "eor r8,%4\n" // x ^= i
147 
148  // X = X - li_in + li_out
149  "ldi r24,8\n" // li_in = li_in + 1
150  "add %2,r24\n"
151  "sub %A1,%2\n" // return X to its initial value
152  "sbc %B1,__zero_reg__\n"
153  "ldi r25,0x1f\n"
154  "and %2,r25\n" // li_in = li_in % 4
155  "add %A1,%3\n" // X = &(l[li_out])
156  "adc %B1,__zero_reg__\n"
157 
158  "st X+,r8\n" // l[li_out] = x
159  "st X+,r9\n"
160  "st X+,r10\n"
161  "st X+,r11\n"
162  "st X+,r12\n"
163  "st X+,r13\n"
164  "st X+,r14\n"
165  "st X+,r15\n"
166 
167  "add %3,r24\n" // li_out = li_out + 1
168  "sub %A1,%3\n" // return X to its initial value
169  "sbc %B1,__zero_reg__\n"
170  "and %3,r25\n" // li_out = li_out % 4
171 
172  // s = leftRotate3_64(s) ^ l[li_out];
173  "lsl r16\n" // s = leftRotate1_64(s)
174  "rol r17\n"
175  "rol r18\n"
176  "rol r19\n"
177  "rol r20\n"
178  "rol r21\n"
179  "rol r22\n"
180  "rol r23\n"
181  "adc r16,__zero_reg__\n"
182 
183  "lsl r16\n" // s = leftRotate1_64(s)
184  "rol r17\n"
185  "rol r18\n"
186  "rol r19\n"
187  "rol r20\n"
188  "rol r21\n"
189  "rol r22\n"
190  "rol r23\n"
191  "adc r16,__zero_reg__\n"
192 
193  "lsl r16\n" // s = leftRotate1_64(s)
194  "rol r17\n"
195  "rol r18\n"
196  "rol r19\n"
197  "rol r20\n"
198  "rol r21\n"
199  "rol r22\n"
200  "rol r23\n"
201  "adc r16,__zero_reg__\n"
202 
203  "eor r16,r8\n" // s ^= x
204  "eor r17,r9\n"
205  "eor r18,r10\n"
206  "eor r19,r11\n"
207  "eor r20,r12\n"
208  "eor r21,r13\n"
209  "eor r22,r14\n"
210  "eor r23,r15\n"
211 
212  // Loop
213  "inc %4\n" // ++i
214  "dec %5\n" // --rounds
215  "breq 2f\n"
216  "rjmp 1b\n"
217  "2:\n"
218 
219  "add %A1,%3\n" // X = &(l[li_out])
220  "adc %B1,__zero_reg__\n"
221  "st X+,r16\n" // l[li_out] = s
222  "st X+,r17\n"
223  "st X+,r18\n"
224  "st X+,r19\n"
225  "st X+,r20\n"
226  "st X+,r21\n"
227  "st X+,r22\n"
228  "st X+,r23\n"
229 
230  : : "z"(k), "x"(l),
231  "r"((uint8_t)0), // initial value of li_in
232  "r"((uint8_t)mb), // initial value of li_out
233  "r"(0), // initial value of i
234  "r"(rounds - 1)
235  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
236  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
237  "r24", "r25"
238  );
239  return true;
240 #else
241  // Expand the key schedule to get the l and s values at the end
242  // of the schedule, which will allow us to reverse it later.
243  uint8_t m = rounds - 30;
244  uint8_t li_in = 0;
245  uint8_t li_out = m - 1;
246  uint64_t s = k[0];
247  memcpy(l, k + 1, (m - 1) * sizeof(uint64_t));
248  for (uint8_t i = 0; i < (rounds - 1); ++i) {
249  l[li_out] = (s + rightRotate8_64(l[li_in])) ^ i;
250  s = leftRotate3_64(s) ^ l[li_out];
251  li_in = (li_in + 1) & 0x03;
252  li_out = (li_out + 1) & 0x03;
253  }
254 
255  // Save the final s value in the l array so that we can recover it later.
256  l[li_out] = s;
257  return true;
258 #endif
259 }
260 
261 void SpeckSmall::decryptBlock(uint8_t *output, const uint8_t *input)
262 {
263 #if USE_AVR_INLINE_ASM
264  // Automatically generated by the genspeck tool.
265  uint64_t l[5];
266  uint8_t r = rounds;
267  uint8_t li_in = ((r + 3) & 0x03) * 8;
268  uint8_t li_out = ((((r - 31) & 0x03) * 8) + li_in) & 0x1F;
269  __asm__ __volatile__ (
270  "ldd r25,%4\n"
271  "ldi r24,32\n"
272  "1:\n"
273  "ld __tmp_reg__,X+\n"
274  "st Z+,__tmp_reg__\n"
275  "dec r24\n"
276  "brne 1b\n"
277  "movw r26,r30\n"
278  "sbiw r30,32\n"
279  "add r30,r25\n"
280  "adc r31,__zero_reg__\n"
281  "ld __tmp_reg__,Z\n"
282  "st X+,__tmp_reg__\n"
283  "ldd __tmp_reg__,Z+1\n"
284  "st X+,__tmp_reg__\n"
285  "ldd __tmp_reg__,Z+2\n"
286  "st X+,__tmp_reg__\n"
287  "ldd __tmp_reg__,Z+3\n"
288  "st X+,__tmp_reg__\n"
289  "ldd __tmp_reg__,Z+4\n"
290  "st X+,__tmp_reg__\n"
291  "ldd __tmp_reg__,Z+5\n"
292  "st X+,__tmp_reg__\n"
293  "ldd __tmp_reg__,Z+6\n"
294  "st X+,__tmp_reg__\n"
295  "ldd __tmp_reg__,Z+7\n"
296  "st X+,__tmp_reg__\n"
297  "sub r30,r25\n"
298  "sbc r31,__zero_reg__\n"
299  "movw r26,%A2\n"
300  "ld r15,X+\n"
301  "ld r14,X+\n"
302  "ld r13,X+\n"
303  "ld r12,X+\n"
304  "ld r11,X+\n"
305  "ld r10,X+\n"
306  "ld r9,X+\n"
307  "ld r8,X+\n"
308  "ld r23,X+\n"
309  "ld r22,X+\n"
310  "ld r21,X+\n"
311  "ld r20,X+\n"
312  "ld r19,X+\n"
313  "ld r18,X+\n"
314  "ld r17,X+\n"
315  "ld r16,X\n"
316  "ldd %A2,%6\n"
317  "mov %B2,r25\n"
318  "ldd r25,%5\n"
319  "dec r25\n"
320  "movw r26,r30\n"
321  "adiw r26,40\n"
322  "2:\n"
323  "eor r16,r8\n"
324  "eor r17,r9\n"
325  "eor r18,r10\n"
326  "eor r19,r11\n"
327  "eor r20,r12\n"
328  "eor r21,r13\n"
329  "eor r22,r14\n"
330  "eor r23,r15\n"
331  "bst r16,0\n"
332  "ror r23\n"
333  "ror r22\n"
334  "ror r21\n"
335  "ror r20\n"
336  "ror r19\n"
337  "ror r18\n"
338  "ror r17\n"
339  "ror r16\n"
340  "bld r23,7\n"
341  "bst r16,0\n"
342  "ror r23\n"
343  "ror r22\n"
344  "ror r21\n"
345  "ror r20\n"
346  "ror r19\n"
347  "ror r18\n"
348  "ror r17\n"
349  "ror r16\n"
350  "bld r23,7\n"
351  "bst r16,0\n"
352  "ror r23\n"
353  "ror r22\n"
354  "ror r21\n"
355  "ror r20\n"
356  "ror r19\n"
357  "ror r18\n"
358  "ror r17\n"
359  "ror r16\n"
360  "bld r23,7\n"
361  "ld __tmp_reg__,-X\n"
362  "eor __tmp_reg__,r15\n"
363  "ld r15,-X\n"
364  "eor r15,r14\n"
365  "ld r14,-X\n"
366  "eor r14,r13\n"
367  "ld r13,-X\n"
368  "eor r13,r12\n"
369  "ld r12,-X\n"
370  "eor r12,r11\n"
371  "ld r11,-X\n"
372  "eor r11,r10\n"
373  "ld r10,-X\n"
374  "eor r10,r9\n"
375  "ld r9,-X\n"
376  "eor r9,r8\n"
377  "mov r8,__tmp_reg__\n"
378  "sub r9,r16\n"
379  "sbc r10,r17\n"
380  "sbc r11,r18\n"
381  "sbc r12,r19\n"
382  "sbc r13,r20\n"
383  "sbc r14,r21\n"
384  "sbc r15,r22\n"
385  "sbc r8,r23\n"
386  "or r25,r25\n"
387  "brne 3f\n"
388  "rjmp 4f\n"
389  "3:\n"
390  "dec r25\n"
391  "push r8\n"
392  "push r9\n"
393  "push r10\n"
394  "push r11\n"
395  "push r12\n"
396  "push r13\n"
397  "push r14\n"
398  "push r15\n"
399  "push r16\n"
400  "push r17\n"
401  "push r18\n"
402  "push r19\n"
403  "push r20\n"
404  "push r21\n"
405  "push r22\n"
406  "push r23\n"
407  "ldi r24,24\n"
408  "add %A2,r24\n"
409  "add %B2,r24\n"
410  "ldi r24,0x1F\n"
411  "and %A2,r24\n"
412  "and %B2,r24\n"
413  "ld r16,X+\n"
414  "ld r17,X+\n"
415  "ld r18,X+\n"
416  "ld r19,X+\n"
417  "ld r20,X+\n"
418  "ld r21,X+\n"
419  "ld r22,X+\n"
420  "ld r23,X+\n"
421  "add r30,%B2\n"
422  "adc r31,__zero_reg__\n"
423  "ld r8,Z\n"
424  "ldd r9,Z+1\n"
425  "ldd r10,Z+2\n"
426  "ldd r11,Z+3\n"
427  "ldd r12,Z+4\n"
428  "ldd r13,Z+5\n"
429  "ldd r14,Z+6\n"
430  "ldd r15,Z+7\n"
431  "sub r30,%B2\n"
432  "sbc r31,__zero_reg__\n"
433  "eor r16,r8\n"
434  "eor r17,r9\n"
435  "eor r18,r10\n"
436  "eor r19,r11\n"
437  "eor r20,r12\n"
438  "eor r21,r13\n"
439  "eor r22,r14\n"
440  "eor r23,r15\n"
441  "bst r16,0\n"
442  "ror r23\n"
443  "ror r22\n"
444  "ror r21\n"
445  "ror r20\n"
446  "ror r19\n"
447  "ror r18\n"
448  "ror r17\n"
449  "ror r16\n"
450  "bld r23,7\n"
451  "bst r16,0\n"
452  "ror r23\n"
453  "ror r22\n"
454  "ror r21\n"
455  "ror r20\n"
456  "ror r19\n"
457  "ror r18\n"
458  "ror r17\n"
459  "ror r16\n"
460  "bld r23,7\n"
461  "bst r16,0\n"
462  "ror r23\n"
463  "ror r22\n"
464  "ror r21\n"
465  "ror r20\n"
466  "ror r19\n"
467  "ror r18\n"
468  "ror r17\n"
469  "ror r16\n"
470  "bld r23,7\n"
471  "st -X,r23\n"
472  "st -X,r22\n"
473  "st -X,r21\n"
474  "st -X,r20\n"
475  "st -X,r19\n"
476  "st -X,r18\n"
477  "st -X,r17\n"
478  "st -X,r16\n"
479  "adiw r26,8\n"
480  "eor r8,r25\n"
481  "sub r8,r16\n"
482  "sbc r9,r17\n"
483  "sbc r10,r18\n"
484  "sbc r11,r19\n"
485  "sbc r12,r20\n"
486  "sbc r13,r21\n"
487  "sbc r14,r22\n"
488  "sbc r15,r23\n"
489  "add r30,%A2\n"
490  "adc r31,__zero_reg__\n"
491  "st Z,r15\n"
492  "std Z+1,r8\n"
493  "std Z+2,r9\n"
494  "std Z+3,r10\n"
495  "std Z+4,r11\n"
496  "std Z+5,r12\n"
497  "std Z+6,r13\n"
498  "std Z+7,r14\n"
499  "sub r30,%A2\n"
500  "sbc r31,__zero_reg__\n"
501  "pop r23\n"
502  "pop r22\n"
503  "pop r21\n"
504  "pop r20\n"
505  "pop r19\n"
506  "pop r18\n"
507  "pop r17\n"
508  "pop r16\n"
509  "pop r15\n"
510  "pop r14\n"
511  "pop r13\n"
512  "pop r12\n"
513  "pop r11\n"
514  "pop r10\n"
515  "pop r9\n"
516  "pop r8\n"
517  "rjmp 2b\n"
518  "4:\n"
519  "ldd r26,%A3\n"
520  "ldd r27,%B3\n"
521  "st X+,r15\n"
522  "st X+,r14\n"
523  "st X+,r13\n"
524  "st X+,r12\n"
525  "st X+,r11\n"
526  "st X+,r10\n"
527  "st X+,r9\n"
528  "st X+,r8\n"
529  "st X+,r23\n"
530  "st X+,r22\n"
531  "st X+,r21\n"
532  "st X+,r20\n"
533  "st X+,r19\n"
534  "st X+,r18\n"
535  "st X+,r17\n"
536  "st X,r16\n"
537  : : "x"(this->l), "z"(l), "r"(input), "Q"(output), "Q"(li_out), "Q"(r), "Q"(li_in)
538  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
539  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "memory"
540  , "r24", "r25"
541  );
542 #else
543  uint64_t l[4];
544  uint64_t x, y, s;
545  uint8_t round;
546  uint8_t li_in = (rounds + 3) & 0x03;
547  uint8_t li_out = ((rounds - 31) + li_in) & 0x03;
548 
549  // Prepare the key schedule, starting at the end.
550  for (round = li_in; round != li_out; round = (round + 1) & 0x03)
551  l[round] = this->l[round];
552  s = this->l[li_out];
553 
554  // Unpack the input and convert from big-endian.
555  unpack64(x, input);
556  unpack64(y, input + 8);
557 
558  // Perform all decryption rounds except the last while
559  // expanding the decryption schedule on the fly.
560  for (uint8_t round = rounds - 1; round > 0; --round) {
561  // Decrypt using the current round key.
562  y = rightRotate3_64(x ^ y);
563  x = leftRotate8_64((x ^ s) - y);
564 
565  // Generate the round key for the previous round.
566  li_in = (li_in + 3) & 0x03;
567  li_out = (li_out + 3) & 0x03;
568  s = rightRotate3_64(s ^ l[li_out]);
569  l[li_in] = leftRotate8_64((l[li_out] ^ (round - 1)) - s);
570  }
571 
572  // Perform the final decryption round.
573  y = rightRotate3_64(x ^ y);
574  x = leftRotate8_64((x ^ s) - y);
575 
576  // Pack the output and convert to big-endian.
577  pack64(output, x);
578  pack64(output + 8, y);
579 #endif
580 }
581 
583 {
585  clean(l);
586 }
SpeckSmall()
Constructs a small-memory Speck block cipher with no initial key.
Definition: SpeckSmall.cpp:85
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: SpeckSmall.cpp:94
void clear()
Clears all security-sensitive state from this block cipher.
Definition: SpeckSmall.cpp:582
void decryptBlock(uint8_t *output, const uint8_t *input)
Decrypts a single block using this cipher.
Definition: SpeckSmall.cpp:261
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: SpeckTiny.cpp:109
void clear()
Clears all security-sensitive state from this block cipher.
Definition: SpeckTiny.cpp:453