Arduino Cryptography Library
KeccakCore.cpp
1 /*
2  * Copyright (C) 2015 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "KeccakCore.h"
24 #include "Crypto.h"
25 #include "utility/EndianUtil.h"
26 #include "utility/RotateUtil.h"
27 #include "utility/ProgMemUtil.h"
28 #include <string.h>
29 
42 #if !defined(CRYPTO_LITTLE_ENDIAN)
43 // All of the Arduino platforms we care about are little-endian.
44 #error "KeccakCore is not supported on big-endian platforms yet - todo"
45 #endif
46 
55  : _blockSize(8)
56 {
57  memset(state.A, 0, sizeof(state.A));
58  state.inputSize = 0;
59  state.outputSize = 0;
60 }
61 
67 {
68  clean(state);
69 }
70 
76 size_t KeccakCore::capacity() const
77 {
78  return 1600 - ((size_t)_blockSize) * 8;
79 }
80 
94 void KeccakCore::setCapacity(size_t capacity)
95 {
96  _blockSize = (1600 - capacity) / 8;
97  reset();
98 }
99 
115 {
116  memset(state.A, 0, sizeof(state.A));
117  state.inputSize = 0;
118  state.outputSize = 0;
119 }
120 
133 void KeccakCore::update(const void *data, size_t size)
134 {
135  // Stop generating output while we incorporate the new data.
136  state.outputSize = 0;
137 
138  // Break the input up into chunks and process each in turn.
139  const uint8_t *d = (const uint8_t *)data;
140  while (size > 0) {
141  uint8_t len = _blockSize - state.inputSize;
142  if (len > size)
143  len = size;
144  uint8_t *Abytes = ((uint8_t *)state.A) + state.inputSize;
145  for (uint8_t posn = 0; posn < len; ++posn)
146  Abytes[posn] ^= d[posn];
147  state.inputSize += len;
148  size -= len;
149  d += len;
150  if (state.inputSize == _blockSize) {
151  keccakp();
152  state.inputSize = 0;
153  }
154  }
155 }
156 
167 void KeccakCore::pad(uint8_t tag)
168 {
169  // Padding for SHA3-NNN variants according to FIPS 202 appends "01",
170  // then another "1", then many zero bits, followed by a final "1".
171  // SHAKE appends "1111" first instead of "01". Note that SHA-3 numbers
172  // bits from the least significant, so appending "01" is equivalent
173  // to 0x02 for byte-aligned data, not 0x40.
174  uint8_t size = state.inputSize;
175  uint64_t *Awords = &(state.A[0][0]);
176  Awords[size / 8] ^= (((uint64_t)tag) << ((size % 8) * 8));
177  Awords[(_blockSize - 1) / 8] ^= 0x8000000000000000ULL;
178  keccakp();
179  state.inputSize = 0;
180  state.outputSize = 0;
181 }
182 
194 void KeccakCore::extract(void *data, size_t size)
195 {
196  // Stop accepting input while we are generating output.
197  state.inputSize = 0;
198 
199  // Copy the output data into the caller's return buffer.
200  uint8_t *d = (uint8_t *)data;
201  uint8_t tempSize;
202  while (size > 0) {
203  // Generate another output block if the current one has been exhausted.
204  if (state.outputSize >= _blockSize) {
205  keccakp();
206  state.outputSize = 0;
207  }
208 
209  // How many bytes can we copy this time around?
210  tempSize = _blockSize - state.outputSize;
211  if (tempSize > size)
212  tempSize = size;
213 
214  // Copy the partial output data into the caller's return buffer.
215  memcpy(d, ((uint8_t *)(state.A)) + state.outputSize, tempSize);
216  state.outputSize += tempSize;
217  size -= tempSize;
218  d += tempSize;
219  }
220 }
221 
240 void KeccakCore::encrypt(void *output, const void *input, size_t size)
241 {
242  // Stop accepting input while we are generating output.
243  state.inputSize = 0;
244 
245  // Copy the output data into the caller's return buffer.
246  uint8_t *out = (uint8_t *)output;
247  const uint8_t *in = (const uint8_t *)input;
248  uint8_t tempSize;
249  while (size > 0) {
250  // Generate another output block if the current one has been exhausted.
251  if (state.outputSize >= _blockSize) {
252  keccakp();
253  state.outputSize = 0;
254  }
255 
256  // How many bytes can we extract this time around?
257  tempSize = _blockSize - state.outputSize;
258  if (tempSize > size)
259  tempSize = size;
260 
261  // XOR the partial output data into the caller's return buffer.
262  const uint8_t *d = ((const uint8_t *)(state.A)) + state.outputSize;
263  for (uint8_t index = 0; index < tempSize; ++index)
264  out[index] = in[index] ^ d[index];
265  state.outputSize += tempSize;
266  size -= tempSize;
267  out += tempSize;
268  in += tempSize;
269  }
270 }
271 
276 {
277  clean(state);
278 }
279 
293 void KeccakCore::setHMACKey(const void *key, size_t len, uint8_t pad, size_t hashSize)
294 {
295  uint8_t *Abytes = (uint8_t *)state.A;
296  size_t size = blockSize();
297  reset();
298  if (len <= size) {
299  // Because the state has just been reset, state.A is set to
300  // all-zeroes. We can copy the key directly into the state
301  // and then XOR the block with the pad value.
302  memcpy(Abytes, key, len);
303  } else {
304  // The key is larger than the block size. Hash it down.
305  // Afterwards, state.A will contain the first block of data
306  // to be extracted. We truncate it to the first "hashSize"
307  // bytes and XOR with the padding.
308  update(key, len);
309  this->pad(0x06);
310  memset(Abytes + hashSize, pad, size - hashSize);
311  memset(Abytes + size, 0, sizeof(state.A) - size);
312  size = hashSize;
313  }
314  while (size > 0) {
315  *Abytes++ ^= pad;
316  --size;
317  }
318  keccakp();
319 }
320 
324 void KeccakCore::keccakp()
325 {
326  uint64_t B[5][5];
327 #if defined(__AVR__)
328  // This assembly code was generated by the "genkeccak.c" program.
329  // Do not modify this code directly. Instead modify "genkeccak.c"
330  // and then re-generate the code here.
331  for (uint8_t round = 0; round < 24; ++round) {
332  __asm__ __volatile__ (
333  "push r29\n"
334  "push r28\n"
335  "mov r28,r26\n"
336  "mov r29,r27\n"
337 
338  // Step mapping theta. Compute C.
339  "ldi r20,5\n"
340  "100:\n"
341  "ld r8,Z\n"
342  "ldd r9,Z+1\n"
343  "ldd r10,Z+2\n"
344  "ldd r11,Z+3\n"
345  "ldd r12,Z+4\n"
346  "ldd r13,Z+5\n"
347  "ldd r14,Z+6\n"
348  "ldd r15,Z+7\n"
349  "ldi r19,4\n"
350  "101:\n"
351  "adiw r30,40\n"
352  "ld __tmp_reg__,Z\n"
353  "eor r8,__tmp_reg__\n"
354  "ldd __tmp_reg__,Z+1\n"
355  "eor r9,__tmp_reg__\n"
356  "ldd __tmp_reg__,Z+2\n"
357  "eor r10,__tmp_reg__\n"
358  "ldd __tmp_reg__,Z+3\n"
359  "eor r11,__tmp_reg__\n"
360  "ldd __tmp_reg__,Z+4\n"
361  "eor r12,__tmp_reg__\n"
362  "ldd __tmp_reg__,Z+5\n"
363  "eor r13,__tmp_reg__\n"
364  "ldd __tmp_reg__,Z+6\n"
365  "eor r14,__tmp_reg__\n"
366  "ldd __tmp_reg__,Z+7\n"
367  "eor r15,__tmp_reg__\n"
368  "dec r19\n"
369  "brne 101b\n"
370  "st X+,r8\n"
371  "st X+,r9\n"
372  "st X+,r10\n"
373  "st X+,r11\n"
374  "st X+,r12\n"
375  "st X+,r13\n"
376  "st X+,r14\n"
377  "st X+,r15\n"
378  "subi r30,152\n"
379  "sbc r31,__zero_reg__\n"
380  "dec r20\n"
381  "brne 100b\n"
382  "sbiw r30,40\n"
383  "sbiw r26,40\n"
384 
385  // Step mapping theta. Compute D and XOR with A.
386  "ldd r8,Y+8\n"
387  "ldd r9,Y+9\n"
388  "ldd r10,Y+10\n"
389  "ldd r11,Y+11\n"
390  "ldd r12,Y+12\n"
391  "ldd r13,Y+13\n"
392  "ldd r14,Y+14\n"
393  "ldd r15,Y+15\n"
394  "lsl r8\n"
395  "rol r9\n"
396  "rol r10\n"
397  "rol r11\n"
398  "rol r12\n"
399  "rol r13\n"
400  "rol r14\n"
401  "rol r15\n"
402  "adc r8, __zero_reg__\n"
403  "ldd __tmp_reg__,Y+32\n"
404  "eor r8,__tmp_reg__\n"
405  "ldd __tmp_reg__,Y+33\n"
406  "eor r9,__tmp_reg__\n"
407  "ldd __tmp_reg__,Y+34\n"
408  "eor r10,__tmp_reg__\n"
409  "ldd __tmp_reg__,Y+35\n"
410  "eor r11,__tmp_reg__\n"
411  "ldd __tmp_reg__,Y+36\n"
412  "eor r12,__tmp_reg__\n"
413  "ldd __tmp_reg__,Y+37\n"
414  "eor r13,__tmp_reg__\n"
415  "ldd __tmp_reg__,Y+38\n"
416  "eor r14,__tmp_reg__\n"
417  "ldd __tmp_reg__,Y+39\n"
418  "eor r15,__tmp_reg__\n"
419  "ldi r19,5\n"
420  "103:\n"
421  "ld __tmp_reg__,Z\n"
422  "eor __tmp_reg__,r8\n"
423  "st Z,__tmp_reg__\n"
424  "ldd __tmp_reg__,Z+1\n"
425  "eor __tmp_reg__,r9\n"
426  "std Z+1,__tmp_reg__\n"
427  "ldd __tmp_reg__,Z+2\n"
428  "eor __tmp_reg__,r10\n"
429  "std Z+2,__tmp_reg__\n"
430  "ldd __tmp_reg__,Z+3\n"
431  "eor __tmp_reg__,r11\n"
432  "std Z+3,__tmp_reg__\n"
433  "ldd __tmp_reg__,Z+4\n"
434  "eor __tmp_reg__,r12\n"
435  "std Z+4,__tmp_reg__\n"
436  "ldd __tmp_reg__,Z+5\n"
437  "eor __tmp_reg__,r13\n"
438  "std Z+5,__tmp_reg__\n"
439  "ldd __tmp_reg__,Z+6\n"
440  "eor __tmp_reg__,r14\n"
441  "std Z+6,__tmp_reg__\n"
442  "ldd __tmp_reg__,Z+7\n"
443  "eor __tmp_reg__,r15\n"
444  "std Z+7,__tmp_reg__\n"
445  "adiw r30,40\n"
446  "dec r19\n"
447  "brne 103b\n"
448  "subi r30,192\n"
449  "sbc r31,__zero_reg__\n"
450  "ldd r8,Y+16\n"
451  "ldd r9,Y+17\n"
452  "ldd r10,Y+18\n"
453  "ldd r11,Y+19\n"
454  "ldd r12,Y+20\n"
455  "ldd r13,Y+21\n"
456  "ldd r14,Y+22\n"
457  "ldd r15,Y+23\n"
458  "lsl r8\n"
459  "rol r9\n"
460  "rol r10\n"
461  "rol r11\n"
462  "rol r12\n"
463  "rol r13\n"
464  "rol r14\n"
465  "rol r15\n"
466  "adc r8, __zero_reg__\n"
467  "ld __tmp_reg__,Y\n"
468  "eor r8,__tmp_reg__\n"
469  "ldd __tmp_reg__,Y+1\n"
470  "eor r9,__tmp_reg__\n"
471  "ldd __tmp_reg__,Y+2\n"
472  "eor r10,__tmp_reg__\n"
473  "ldd __tmp_reg__,Y+3\n"
474  "eor r11,__tmp_reg__\n"
475  "ldd __tmp_reg__,Y+4\n"
476  "eor r12,__tmp_reg__\n"
477  "ldd __tmp_reg__,Y+5\n"
478  "eor r13,__tmp_reg__\n"
479  "ldd __tmp_reg__,Y+6\n"
480  "eor r14,__tmp_reg__\n"
481  "ldd __tmp_reg__,Y+7\n"
482  "eor r15,__tmp_reg__\n"
483  "ldi r19,5\n"
484  "104:\n"
485  "ld __tmp_reg__,Z\n"
486  "eor __tmp_reg__,r8\n"
487  "st Z,__tmp_reg__\n"
488  "ldd __tmp_reg__,Z+1\n"
489  "eor __tmp_reg__,r9\n"
490  "std Z+1,__tmp_reg__\n"
491  "ldd __tmp_reg__,Z+2\n"
492  "eor __tmp_reg__,r10\n"
493  "std Z+2,__tmp_reg__\n"
494  "ldd __tmp_reg__,Z+3\n"
495  "eor __tmp_reg__,r11\n"
496  "std Z+3,__tmp_reg__\n"
497  "ldd __tmp_reg__,Z+4\n"
498  "eor __tmp_reg__,r12\n"
499  "std Z+4,__tmp_reg__\n"
500  "ldd __tmp_reg__,Z+5\n"
501  "eor __tmp_reg__,r13\n"
502  "std Z+5,__tmp_reg__\n"
503  "ldd __tmp_reg__,Z+6\n"
504  "eor __tmp_reg__,r14\n"
505  "std Z+6,__tmp_reg__\n"
506  "ldd __tmp_reg__,Z+7\n"
507  "eor __tmp_reg__,r15\n"
508  "std Z+7,__tmp_reg__\n"
509  "adiw r30,40\n"
510  "dec r19\n"
511  "brne 104b\n"
512  "subi r30,192\n"
513  "sbc r31,__zero_reg__\n"
514  "ldd r8,Y+24\n"
515  "ldd r9,Y+25\n"
516  "ldd r10,Y+26\n"
517  "ldd r11,Y+27\n"
518  "ldd r12,Y+28\n"
519  "ldd r13,Y+29\n"
520  "ldd r14,Y+30\n"
521  "ldd r15,Y+31\n"
522  "lsl r8\n"
523  "rol r9\n"
524  "rol r10\n"
525  "rol r11\n"
526  "rol r12\n"
527  "rol r13\n"
528  "rol r14\n"
529  "rol r15\n"
530  "adc r8, __zero_reg__\n"
531  "ldd __tmp_reg__,Y+8\n"
532  "eor r8,__tmp_reg__\n"
533  "ldd __tmp_reg__,Y+9\n"
534  "eor r9,__tmp_reg__\n"
535  "ldd __tmp_reg__,Y+10\n"
536  "eor r10,__tmp_reg__\n"
537  "ldd __tmp_reg__,Y+11\n"
538  "eor r11,__tmp_reg__\n"
539  "ldd __tmp_reg__,Y+12\n"
540  "eor r12,__tmp_reg__\n"
541  "ldd __tmp_reg__,Y+13\n"
542  "eor r13,__tmp_reg__\n"
543  "ldd __tmp_reg__,Y+14\n"
544  "eor r14,__tmp_reg__\n"
545  "ldd __tmp_reg__,Y+15\n"
546  "eor r15,__tmp_reg__\n"
547  "ldi r19,5\n"
548  "105:\n"
549  "ld __tmp_reg__,Z\n"
550  "eor __tmp_reg__,r8\n"
551  "st Z,__tmp_reg__\n"
552  "ldd __tmp_reg__,Z+1\n"
553  "eor __tmp_reg__,r9\n"
554  "std Z+1,__tmp_reg__\n"
555  "ldd __tmp_reg__,Z+2\n"
556  "eor __tmp_reg__,r10\n"
557  "std Z+2,__tmp_reg__\n"
558  "ldd __tmp_reg__,Z+3\n"
559  "eor __tmp_reg__,r11\n"
560  "std Z+3,__tmp_reg__\n"
561  "ldd __tmp_reg__,Z+4\n"
562  "eor __tmp_reg__,r12\n"
563  "std Z+4,__tmp_reg__\n"
564  "ldd __tmp_reg__,Z+5\n"
565  "eor __tmp_reg__,r13\n"
566  "std Z+5,__tmp_reg__\n"
567  "ldd __tmp_reg__,Z+6\n"
568  "eor __tmp_reg__,r14\n"
569  "std Z+6,__tmp_reg__\n"
570  "ldd __tmp_reg__,Z+7\n"
571  "eor __tmp_reg__,r15\n"
572  "std Z+7,__tmp_reg__\n"
573  "adiw r30,40\n"
574  "dec r19\n"
575  "brne 105b\n"
576  "subi r30,192\n"
577  "sbc r31,__zero_reg__\n"
578  "ldd r8,Y+32\n"
579  "ldd r9,Y+33\n"
580  "ldd r10,Y+34\n"
581  "ldd r11,Y+35\n"
582  "ldd r12,Y+36\n"
583  "ldd r13,Y+37\n"
584  "ldd r14,Y+38\n"
585  "ldd r15,Y+39\n"
586  "lsl r8\n"
587  "rol r9\n"
588  "rol r10\n"
589  "rol r11\n"
590  "rol r12\n"
591  "rol r13\n"
592  "rol r14\n"
593  "rol r15\n"
594  "adc r8, __zero_reg__\n"
595  "ldd __tmp_reg__,Y+16\n"
596  "eor r8,__tmp_reg__\n"
597  "ldd __tmp_reg__,Y+17\n"
598  "eor r9,__tmp_reg__\n"
599  "ldd __tmp_reg__,Y+18\n"
600  "eor r10,__tmp_reg__\n"
601  "ldd __tmp_reg__,Y+19\n"
602  "eor r11,__tmp_reg__\n"
603  "ldd __tmp_reg__,Y+20\n"
604  "eor r12,__tmp_reg__\n"
605  "ldd __tmp_reg__,Y+21\n"
606  "eor r13,__tmp_reg__\n"
607  "ldd __tmp_reg__,Y+22\n"
608  "eor r14,__tmp_reg__\n"
609  "ldd __tmp_reg__,Y+23\n"
610  "eor r15,__tmp_reg__\n"
611  "ldi r19,5\n"
612  "106:\n"
613  "ld __tmp_reg__,Z\n"
614  "eor __tmp_reg__,r8\n"
615  "st Z,__tmp_reg__\n"
616  "ldd __tmp_reg__,Z+1\n"
617  "eor __tmp_reg__,r9\n"
618  "std Z+1,__tmp_reg__\n"
619  "ldd __tmp_reg__,Z+2\n"
620  "eor __tmp_reg__,r10\n"
621  "std Z+2,__tmp_reg__\n"
622  "ldd __tmp_reg__,Z+3\n"
623  "eor __tmp_reg__,r11\n"
624  "std Z+3,__tmp_reg__\n"
625  "ldd __tmp_reg__,Z+4\n"
626  "eor __tmp_reg__,r12\n"
627  "std Z+4,__tmp_reg__\n"
628  "ldd __tmp_reg__,Z+5\n"
629  "eor __tmp_reg__,r13\n"
630  "std Z+5,__tmp_reg__\n"
631  "ldd __tmp_reg__,Z+6\n"
632  "eor __tmp_reg__,r14\n"
633  "std Z+6,__tmp_reg__\n"
634  "ldd __tmp_reg__,Z+7\n"
635  "eor __tmp_reg__,r15\n"
636  "std Z+7,__tmp_reg__\n"
637  "adiw r30,40\n"
638  "dec r19\n"
639  "brne 106b\n"
640  "subi r30,192\n"
641  "sbc r31,__zero_reg__\n"
642  "ld r8,Y\n"
643  "ldd r9,Y+1\n"
644  "ldd r10,Y+2\n"
645  "ldd r11,Y+3\n"
646  "ldd r12,Y+4\n"
647  "ldd r13,Y+5\n"
648  "ldd r14,Y+6\n"
649  "ldd r15,Y+7\n"
650  "lsl r8\n"
651  "rol r9\n"
652  "rol r10\n"
653  "rol r11\n"
654  "rol r12\n"
655  "rol r13\n"
656  "rol r14\n"
657  "rol r15\n"
658  "adc r8, __zero_reg__\n"
659  "ldd __tmp_reg__,Y+24\n"
660  "eor r8,__tmp_reg__\n"
661  "ldd __tmp_reg__,Y+25\n"
662  "eor r9,__tmp_reg__\n"
663  "ldd __tmp_reg__,Y+26\n"
664  "eor r10,__tmp_reg__\n"
665  "ldd __tmp_reg__,Y+27\n"
666  "eor r11,__tmp_reg__\n"
667  "ldd __tmp_reg__,Y+28\n"
668  "eor r12,__tmp_reg__\n"
669  "ldd __tmp_reg__,Y+29\n"
670  "eor r13,__tmp_reg__\n"
671  "ldd __tmp_reg__,Y+30\n"
672  "eor r14,__tmp_reg__\n"
673  "ldd __tmp_reg__,Y+31\n"
674  "eor r15,__tmp_reg__\n"
675  "ldi r19,5\n"
676  "107:\n"
677  "ld __tmp_reg__,Z\n"
678  "eor __tmp_reg__,r8\n"
679  "st Z,__tmp_reg__\n"
680  "ldd __tmp_reg__,Z+1\n"
681  "eor __tmp_reg__,r9\n"
682  "std Z+1,__tmp_reg__\n"
683  "ldd __tmp_reg__,Z+2\n"
684  "eor __tmp_reg__,r10\n"
685  "std Z+2,__tmp_reg__\n"
686  "ldd __tmp_reg__,Z+3\n"
687  "eor __tmp_reg__,r11\n"
688  "std Z+3,__tmp_reg__\n"
689  "ldd __tmp_reg__,Z+4\n"
690  "eor __tmp_reg__,r12\n"
691  "std Z+4,__tmp_reg__\n"
692  "ldd __tmp_reg__,Z+5\n"
693  "eor __tmp_reg__,r13\n"
694  "std Z+5,__tmp_reg__\n"
695  "ldd __tmp_reg__,Z+6\n"
696  "eor __tmp_reg__,r14\n"
697  "std Z+6,__tmp_reg__\n"
698  "ldd __tmp_reg__,Z+7\n"
699  "eor __tmp_reg__,r15\n"
700  "std Z+7,__tmp_reg__\n"
701  "adiw r30,40\n"
702  "dec r19\n"
703  "brne 107b\n"
704  "subi r30,232\n"
705  "sbc r31,__zero_reg__\n"
706 
707  // Step mappings rho and pi combined into one step.
708 
709  // B[0][0] = A[0][0]
710  "ld r8,Z\n"
711  "ldd r9,Z+1\n"
712  "ldd r10,Z+2\n"
713  "ldd r11,Z+3\n"
714  "ldd r12,Z+4\n"
715  "ldd r13,Z+5\n"
716  "ldd r14,Z+6\n"
717  "ldd r15,Z+7\n"
718  "st X+,r8\n"
719  "st X+,r9\n"
720  "st X+,r10\n"
721  "st X+,r11\n"
722  "st X+,r12\n"
723  "st X+,r13\n"
724  "st X+,r14\n"
725  "st X+,r15\n"
726 
727  // B[1][0] = leftRotate28_64(A[0][3])
728  "adiw r26,32\n"
729  "ldd r8,Z+24\n"
730  "ldd r9,Z+25\n"
731  "ldd r10,Z+26\n"
732  "ldd r11,Z+27\n"
733  "ldd r12,Z+28\n"
734  "ldd r13,Z+29\n"
735  "ldd r14,Z+30\n"
736  "ldd r15,Z+31\n"
737  "lsl r8\n"
738  "rol r9\n"
739  "rol r10\n"
740  "rol r11\n"
741  "rol r12\n"
742  "rol r13\n"
743  "rol r14\n"
744  "rol r15\n"
745  "adc r8, __zero_reg__\n"
746  "lsl r8\n"
747  "rol r9\n"
748  "rol r10\n"
749  "rol r11\n"
750  "rol r12\n"
751  "rol r13\n"
752  "rol r14\n"
753  "rol r15\n"
754  "adc r8, __zero_reg__\n"
755  "lsl r8\n"
756  "rol r9\n"
757  "rol r10\n"
758  "rol r11\n"
759  "rol r12\n"
760  "rol r13\n"
761  "rol r14\n"
762  "rol r15\n"
763  "adc r8, __zero_reg__\n"
764  "lsl r8\n"
765  "rol r9\n"
766  "rol r10\n"
767  "rol r11\n"
768  "rol r12\n"
769  "rol r13\n"
770  "rol r14\n"
771  "rol r15\n"
772  "adc r8, __zero_reg__\n"
773  "st X+,r13\n"
774  "st X+,r14\n"
775  "st X+,r15\n"
776  "st X+,r8\n"
777  "st X+,r9\n"
778  "st X+,r10\n"
779  "st X+,r11\n"
780  "st X+,r12\n"
781 
782  // B[2][0] = leftRotate1_64(A[0][1])
783  "adiw r26,32\n"
784  "ldd r8,Z+8\n"
785  "ldd r9,Z+9\n"
786  "ldd r10,Z+10\n"
787  "ldd r11,Z+11\n"
788  "ldd r12,Z+12\n"
789  "ldd r13,Z+13\n"
790  "ldd r14,Z+14\n"
791  "ldd r15,Z+15\n"
792  "lsl r8\n"
793  "rol r9\n"
794  "rol r10\n"
795  "rol r11\n"
796  "rol r12\n"
797  "rol r13\n"
798  "rol r14\n"
799  "rol r15\n"
800  "adc r8, __zero_reg__\n"
801  "st X+,r8\n"
802  "st X+,r9\n"
803  "st X+,r10\n"
804  "st X+,r11\n"
805  "st X+,r12\n"
806  "st X+,r13\n"
807  "st X+,r14\n"
808  "st X+,r15\n"
809 
810  // B[3][0] = leftRotate27_64(A[0][4])
811  "adiw r26,32\n"
812  "ldd r8,Z+32\n"
813  "ldd r9,Z+33\n"
814  "ldd r10,Z+34\n"
815  "ldd r11,Z+35\n"
816  "ldd r12,Z+36\n"
817  "ldd r13,Z+37\n"
818  "ldd r14,Z+38\n"
819  "ldd r15,Z+39\n"
820  "lsl r8\n"
821  "rol r9\n"
822  "rol r10\n"
823  "rol r11\n"
824  "rol r12\n"
825  "rol r13\n"
826  "rol r14\n"
827  "rol r15\n"
828  "adc r8, __zero_reg__\n"
829  "lsl r8\n"
830  "rol r9\n"
831  "rol r10\n"
832  "rol r11\n"
833  "rol r12\n"
834  "rol r13\n"
835  "rol r14\n"
836  "rol r15\n"
837  "adc r8, __zero_reg__\n"
838  "lsl r8\n"
839  "rol r9\n"
840  "rol r10\n"
841  "rol r11\n"
842  "rol r12\n"
843  "rol r13\n"
844  "rol r14\n"
845  "rol r15\n"
846  "adc r8, __zero_reg__\n"
847  "st X+,r13\n"
848  "st X+,r14\n"
849  "st X+,r15\n"
850  "st X+,r8\n"
851  "st X+,r9\n"
852  "st X+,r10\n"
853  "st X+,r11\n"
854  "st X+,r12\n"
855 
856  // B[4][0] = leftRotate62_64(A[0][2])
857  "adiw r26,32\n"
858  "ldd r8,Z+16\n"
859  "ldd r9,Z+17\n"
860  "ldd r10,Z+18\n"
861  "ldd r11,Z+19\n"
862  "ldd r12,Z+20\n"
863  "ldd r13,Z+21\n"
864  "ldd r14,Z+22\n"
865  "ldd r15,Z+23\n"
866  "bst r8,0\n"
867  "ror r15\n"
868  "ror r14\n"
869  "ror r13\n"
870  "ror r12\n"
871  "ror r11\n"
872  "ror r10\n"
873  "ror r9\n"
874  "ror r8\n"
875  "bld r15,7\n"
876  "bst r8,0\n"
877  "ror r15\n"
878  "ror r14\n"
879  "ror r13\n"
880  "ror r12\n"
881  "ror r11\n"
882  "ror r10\n"
883  "ror r9\n"
884  "ror r8\n"
885  "bld r15,7\n"
886  "st X+,r8\n"
887  "st X+,r9\n"
888  "st X+,r10\n"
889  "st X+,r11\n"
890  "st X+,r12\n"
891  "st X+,r13\n"
892  "st X+,r14\n"
893  "st X+,r15\n"
894 
895  // B[0][1] = leftRotate44_64(A[1][1])
896  "subi r26,160\n"
897  "sbc r27,__zero_reg__\n"
898  "adiw r30,40\n"
899  "ldd r8,Z+8\n"
900  "ldd r9,Z+9\n"
901  "ldd r10,Z+10\n"
902  "ldd r11,Z+11\n"
903  "ldd r12,Z+12\n"
904  "ldd r13,Z+13\n"
905  "ldd r14,Z+14\n"
906  "ldd r15,Z+15\n"
907  "lsl r8\n"
908  "rol r9\n"
909  "rol r10\n"
910  "rol r11\n"
911  "rol r12\n"
912  "rol r13\n"
913  "rol r14\n"
914  "rol r15\n"
915  "adc r8, __zero_reg__\n"
916  "lsl r8\n"
917  "rol r9\n"
918  "rol r10\n"
919  "rol r11\n"
920  "rol r12\n"
921  "rol r13\n"
922  "rol r14\n"
923  "rol r15\n"
924  "adc r8, __zero_reg__\n"
925  "lsl r8\n"
926  "rol r9\n"
927  "rol r10\n"
928  "rol r11\n"
929  "rol r12\n"
930  "rol r13\n"
931  "rol r14\n"
932  "rol r15\n"
933  "adc r8, __zero_reg__\n"
934  "lsl r8\n"
935  "rol r9\n"
936  "rol r10\n"
937  "rol r11\n"
938  "rol r12\n"
939  "rol r13\n"
940  "rol r14\n"
941  "rol r15\n"
942  "adc r8, __zero_reg__\n"
943  "st X+,r11\n"
944  "st X+,r12\n"
945  "st X+,r13\n"
946  "st X+,r14\n"
947  "st X+,r15\n"
948  "st X+,r8\n"
949  "st X+,r9\n"
950  "st X+,r10\n"
951 
952  // B[1][1] = leftRotate20_64(A[1][4])
953  "adiw r26,32\n"
954  "ldd r8,Z+32\n"
955  "ldd r9,Z+33\n"
956  "ldd r10,Z+34\n"
957  "ldd r11,Z+35\n"
958  "ldd r12,Z+36\n"
959  "ldd r13,Z+37\n"
960  "ldd r14,Z+38\n"
961  "ldd r15,Z+39\n"
962  "lsl r8\n"
963  "rol r9\n"
964  "rol r10\n"
965  "rol r11\n"
966  "rol r12\n"
967  "rol r13\n"
968  "rol r14\n"
969  "rol r15\n"
970  "adc r8, __zero_reg__\n"
971  "lsl r8\n"
972  "rol r9\n"
973  "rol r10\n"
974  "rol r11\n"
975  "rol r12\n"
976  "rol r13\n"
977  "rol r14\n"
978  "rol r15\n"
979  "adc r8, __zero_reg__\n"
980  "lsl r8\n"
981  "rol r9\n"
982  "rol r10\n"
983  "rol r11\n"
984  "rol r12\n"
985  "rol r13\n"
986  "rol r14\n"
987  "rol r15\n"
988  "adc r8, __zero_reg__\n"
989  "lsl r8\n"
990  "rol r9\n"
991  "rol r10\n"
992  "rol r11\n"
993  "rol r12\n"
994  "rol r13\n"
995  "rol r14\n"
996  "rol r15\n"
997  "adc r8, __zero_reg__\n"
998  "st X+,r14\n"
999  "st X+,r15\n"
1000  "st X+,r8\n"
1001  "st X+,r9\n"
1002  "st X+,r10\n"
1003  "st X+,r11\n"
1004  "st X+,r12\n"
1005  "st X+,r13\n"
1006 
1007  // B[2][1] = leftRotate6_64(A[1][2])
1008  "adiw r26,32\n"
1009  "ldd r8,Z+16\n"
1010  "ldd r9,Z+17\n"
1011  "ldd r10,Z+18\n"
1012  "ldd r11,Z+19\n"
1013  "ldd r12,Z+20\n"
1014  "ldd r13,Z+21\n"
1015  "ldd r14,Z+22\n"
1016  "ldd r15,Z+23\n"
1017  "bst r8,0\n"
1018  "ror r15\n"
1019  "ror r14\n"
1020  "ror r13\n"
1021  "ror r12\n"
1022  "ror r11\n"
1023  "ror r10\n"
1024  "ror r9\n"
1025  "ror r8\n"
1026  "bld r15,7\n"
1027  "bst r8,0\n"
1028  "ror r15\n"
1029  "ror r14\n"
1030  "ror r13\n"
1031  "ror r12\n"
1032  "ror r11\n"
1033  "ror r10\n"
1034  "ror r9\n"
1035  "ror r8\n"
1036  "bld r15,7\n"
1037  "st X+,r15\n"
1038  "st X+,r8\n"
1039  "st X+,r9\n"
1040  "st X+,r10\n"
1041  "st X+,r11\n"
1042  "st X+,r12\n"
1043  "st X+,r13\n"
1044  "st X+,r14\n"
1045 
1046  // B[3][1] = leftRotate36_64(A[1][0])
1047  "adiw r26,32\n"
1048  "ld r8,Z\n"
1049  "ldd r9,Z+1\n"
1050  "ldd r10,Z+2\n"
1051  "ldd r11,Z+3\n"
1052  "ldd r12,Z+4\n"
1053  "ldd r13,Z+5\n"
1054  "ldd r14,Z+6\n"
1055  "ldd r15,Z+7\n"
1056  "lsl r8\n"
1057  "rol r9\n"
1058  "rol r10\n"
1059  "rol r11\n"
1060  "rol r12\n"
1061  "rol r13\n"
1062  "rol r14\n"
1063  "rol r15\n"
1064  "adc r8, __zero_reg__\n"
1065  "lsl r8\n"
1066  "rol r9\n"
1067  "rol r10\n"
1068  "rol r11\n"
1069  "rol r12\n"
1070  "rol r13\n"
1071  "rol r14\n"
1072  "rol r15\n"
1073  "adc r8, __zero_reg__\n"
1074  "lsl r8\n"
1075  "rol r9\n"
1076  "rol r10\n"
1077  "rol r11\n"
1078  "rol r12\n"
1079  "rol r13\n"
1080  "rol r14\n"
1081  "rol r15\n"
1082  "adc r8, __zero_reg__\n"
1083  "lsl r8\n"
1084  "rol r9\n"
1085  "rol r10\n"
1086  "rol r11\n"
1087  "rol r12\n"
1088  "rol r13\n"
1089  "rol r14\n"
1090  "rol r15\n"
1091  "adc r8, __zero_reg__\n"
1092  "st X+,r12\n"
1093  "st X+,r13\n"
1094  "st X+,r14\n"
1095  "st X+,r15\n"
1096  "st X+,r8\n"
1097  "st X+,r9\n"
1098  "st X+,r10\n"
1099  "st X+,r11\n"
1100 
1101  // B[4][1] = leftRotate55_64(A[1][3])
1102  "adiw r26,32\n"
1103  "ldd r8,Z+24\n"
1104  "ldd r9,Z+25\n"
1105  "ldd r10,Z+26\n"
1106  "ldd r11,Z+27\n"
1107  "ldd r12,Z+28\n"
1108  "ldd r13,Z+29\n"
1109  "ldd r14,Z+30\n"
1110  "ldd r15,Z+31\n"
1111  "bst r8,0\n"
1112  "ror r15\n"
1113  "ror r14\n"
1114  "ror r13\n"
1115  "ror r12\n"
1116  "ror r11\n"
1117  "ror r10\n"
1118  "ror r9\n"
1119  "ror r8\n"
1120  "bld r15,7\n"
1121  "st X+,r9\n"
1122  "st X+,r10\n"
1123  "st X+,r11\n"
1124  "st X+,r12\n"
1125  "st X+,r13\n"
1126  "st X+,r14\n"
1127  "st X+,r15\n"
1128  "st X+,r8\n"
1129 
1130  // B[0][2] = leftRotate43_64(A[2][2])
1131  "subi r26,160\n"
1132  "sbc r27,__zero_reg__\n"
1133  "adiw r30,40\n"
1134  "ldd r8,Z+16\n"
1135  "ldd r9,Z+17\n"
1136  "ldd r10,Z+18\n"
1137  "ldd r11,Z+19\n"
1138  "ldd r12,Z+20\n"
1139  "ldd r13,Z+21\n"
1140  "ldd r14,Z+22\n"
1141  "ldd r15,Z+23\n"
1142  "lsl r8\n"
1143  "rol r9\n"
1144  "rol r10\n"
1145  "rol r11\n"
1146  "rol r12\n"
1147  "rol r13\n"
1148  "rol r14\n"
1149  "rol r15\n"
1150  "adc r8, __zero_reg__\n"
1151  "lsl r8\n"
1152  "rol r9\n"
1153  "rol r10\n"
1154  "rol r11\n"
1155  "rol r12\n"
1156  "rol r13\n"
1157  "rol r14\n"
1158  "rol r15\n"
1159  "adc r8, __zero_reg__\n"
1160  "lsl r8\n"
1161  "rol r9\n"
1162  "rol r10\n"
1163  "rol r11\n"
1164  "rol r12\n"
1165  "rol r13\n"
1166  "rol r14\n"
1167  "rol r15\n"
1168  "adc r8, __zero_reg__\n"
1169  "st X+,r11\n"
1170  "st X+,r12\n"
1171  "st X+,r13\n"
1172  "st X+,r14\n"
1173  "st X+,r15\n"
1174  "st X+,r8\n"
1175  "st X+,r9\n"
1176  "st X+,r10\n"
1177 
1178  // B[1][2] = leftRotate3_64(A[2][0])
1179  "adiw r26,32\n"
1180  "ld r8,Z\n"
1181  "ldd r9,Z+1\n"
1182  "ldd r10,Z+2\n"
1183  "ldd r11,Z+3\n"
1184  "ldd r12,Z+4\n"
1185  "ldd r13,Z+5\n"
1186  "ldd r14,Z+6\n"
1187  "ldd r15,Z+7\n"
1188  "lsl r8\n"
1189  "rol r9\n"
1190  "rol r10\n"
1191  "rol r11\n"
1192  "rol r12\n"
1193  "rol r13\n"
1194  "rol r14\n"
1195  "rol r15\n"
1196  "adc r8, __zero_reg__\n"
1197  "lsl r8\n"
1198  "rol r9\n"
1199  "rol r10\n"
1200  "rol r11\n"
1201  "rol r12\n"
1202  "rol r13\n"
1203  "rol r14\n"
1204  "rol r15\n"
1205  "adc r8, __zero_reg__\n"
1206  "lsl r8\n"
1207  "rol r9\n"
1208  "rol r10\n"
1209  "rol r11\n"
1210  "rol r12\n"
1211  "rol r13\n"
1212  "rol r14\n"
1213  "rol r15\n"
1214  "adc r8, __zero_reg__\n"
1215  "st X+,r8\n"
1216  "st X+,r9\n"
1217  "st X+,r10\n"
1218  "st X+,r11\n"
1219  "st X+,r12\n"
1220  "st X+,r13\n"
1221  "st X+,r14\n"
1222  "st X+,r15\n"
1223 
1224  // B[2][2] = leftRotate25_64(A[2][3])
1225  "adiw r26,32\n"
1226  "ldd r8,Z+24\n"
1227  "ldd r9,Z+25\n"
1228  "ldd r10,Z+26\n"
1229  "ldd r11,Z+27\n"
1230  "ldd r12,Z+28\n"
1231  "ldd r13,Z+29\n"
1232  "ldd r14,Z+30\n"
1233  "ldd r15,Z+31\n"
1234  "lsl r8\n"
1235  "rol r9\n"
1236  "rol r10\n"
1237  "rol r11\n"
1238  "rol r12\n"
1239  "rol r13\n"
1240  "rol r14\n"
1241  "rol r15\n"
1242  "adc r8, __zero_reg__\n"
1243  "st X+,r13\n"
1244  "st X+,r14\n"
1245  "st X+,r15\n"
1246  "st X+,r8\n"
1247  "st X+,r9\n"
1248  "st X+,r10\n"
1249  "st X+,r11\n"
1250  "st X+,r12\n"
1251 
1252  // B[3][2] = leftRotate10_64(A[2][1])
1253  "adiw r26,32\n"
1254  "ldd r8,Z+8\n"
1255  "ldd r9,Z+9\n"
1256  "ldd r10,Z+10\n"
1257  "ldd r11,Z+11\n"
1258  "ldd r12,Z+12\n"
1259  "ldd r13,Z+13\n"
1260  "ldd r14,Z+14\n"
1261  "ldd r15,Z+15\n"
1262  "lsl r8\n"
1263  "rol r9\n"
1264  "rol r10\n"
1265  "rol r11\n"
1266  "rol r12\n"
1267  "rol r13\n"
1268  "rol r14\n"
1269  "rol r15\n"
1270  "adc r8, __zero_reg__\n"
1271  "lsl r8\n"
1272  "rol r9\n"
1273  "rol r10\n"
1274  "rol r11\n"
1275  "rol r12\n"
1276  "rol r13\n"
1277  "rol r14\n"
1278  "rol r15\n"
1279  "adc r8, __zero_reg__\n"
1280  "st X+,r15\n"
1281  "st X+,r8\n"
1282  "st X+,r9\n"
1283  "st X+,r10\n"
1284  "st X+,r11\n"
1285  "st X+,r12\n"
1286  "st X+,r13\n"
1287  "st X+,r14\n"
1288 
1289  // B[4][2] = leftRotate39_64(A[2][4])
1290  "adiw r26,32\n"
1291  "ldd r8,Z+32\n"
1292  "ldd r9,Z+33\n"
1293  "ldd r10,Z+34\n"
1294  "ldd r11,Z+35\n"
1295  "ldd r12,Z+36\n"
1296  "ldd r13,Z+37\n"
1297  "ldd r14,Z+38\n"
1298  "ldd r15,Z+39\n"
1299  "bst r8,0\n"
1300  "ror r15\n"
1301  "ror r14\n"
1302  "ror r13\n"
1303  "ror r12\n"
1304  "ror r11\n"
1305  "ror r10\n"
1306  "ror r9\n"
1307  "ror r8\n"
1308  "bld r15,7\n"
1309  "st X+,r11\n"
1310  "st X+,r12\n"
1311  "st X+,r13\n"
1312  "st X+,r14\n"
1313  "st X+,r15\n"
1314  "st X+,r8\n"
1315  "st X+,r9\n"
1316  "st X+,r10\n"
1317 
1318  // B[0][3] = leftRotate21_64(A[3][3])
1319  "subi r26,160\n"
1320  "sbc r27,__zero_reg__\n"
1321  "adiw r30,40\n"
1322  "ldd r8,Z+24\n"
1323  "ldd r9,Z+25\n"
1324  "ldd r10,Z+26\n"
1325  "ldd r11,Z+27\n"
1326  "ldd r12,Z+28\n"
1327  "ldd r13,Z+29\n"
1328  "ldd r14,Z+30\n"
1329  "ldd r15,Z+31\n"
1330  "bst r8,0\n"
1331  "ror r15\n"
1332  "ror r14\n"
1333  "ror r13\n"
1334  "ror r12\n"
1335  "ror r11\n"
1336  "ror r10\n"
1337  "ror r9\n"
1338  "ror r8\n"
1339  "bld r15,7\n"
1340  "bst r8,0\n"
1341  "ror r15\n"
1342  "ror r14\n"
1343  "ror r13\n"
1344  "ror r12\n"
1345  "ror r11\n"
1346  "ror r10\n"
1347  "ror r9\n"
1348  "ror r8\n"
1349  "bld r15,7\n"
1350  "bst r8,0\n"
1351  "ror r15\n"
1352  "ror r14\n"
1353  "ror r13\n"
1354  "ror r12\n"
1355  "ror r11\n"
1356  "ror r10\n"
1357  "ror r9\n"
1358  "ror r8\n"
1359  "bld r15,7\n"
1360  "st X+,r13\n"
1361  "st X+,r14\n"
1362  "st X+,r15\n"
1363  "st X+,r8\n"
1364  "st X+,r9\n"
1365  "st X+,r10\n"
1366  "st X+,r11\n"
1367  "st X+,r12\n"
1368 
1369  // B[1][3] = leftRotate45_64(A[3][1])
1370  "adiw r26,32\n"
1371  "ldd r8,Z+8\n"
1372  "ldd r9,Z+9\n"
1373  "ldd r10,Z+10\n"
1374  "ldd r11,Z+11\n"
1375  "ldd r12,Z+12\n"
1376  "ldd r13,Z+13\n"
1377  "ldd r14,Z+14\n"
1378  "ldd r15,Z+15\n"
1379  "bst r8,0\n"
1380  "ror r15\n"
1381  "ror r14\n"
1382  "ror r13\n"
1383  "ror r12\n"
1384  "ror r11\n"
1385  "ror r10\n"
1386  "ror r9\n"
1387  "ror r8\n"
1388  "bld r15,7\n"
1389  "bst r8,0\n"
1390  "ror r15\n"
1391  "ror r14\n"
1392  "ror r13\n"
1393  "ror r12\n"
1394  "ror r11\n"
1395  "ror r10\n"
1396  "ror r9\n"
1397  "ror r8\n"
1398  "bld r15,7\n"
1399  "bst r8,0\n"
1400  "ror r15\n"
1401  "ror r14\n"
1402  "ror r13\n"
1403  "ror r12\n"
1404  "ror r11\n"
1405  "ror r10\n"
1406  "ror r9\n"
1407  "ror r8\n"
1408  "bld r15,7\n"
1409  "st X+,r10\n"
1410  "st X+,r11\n"
1411  "st X+,r12\n"
1412  "st X+,r13\n"
1413  "st X+,r14\n"
1414  "st X+,r15\n"
1415  "st X+,r8\n"
1416  "st X+,r9\n"
1417 
1418  // B[2][3] = leftRotate8_64(A[3][4])
1419  "adiw r26,32\n"
1420  "ldd r8,Z+32\n"
1421  "ldd r9,Z+33\n"
1422  "ldd r10,Z+34\n"
1423  "ldd r11,Z+35\n"
1424  "ldd r12,Z+36\n"
1425  "ldd r13,Z+37\n"
1426  "ldd r14,Z+38\n"
1427  "ldd r15,Z+39\n"
1428  "st X+,r15\n"
1429  "st X+,r8\n"
1430  "st X+,r9\n"
1431  "st X+,r10\n"
1432  "st X+,r11\n"
1433  "st X+,r12\n"
1434  "st X+,r13\n"
1435  "st X+,r14\n"
1436 
1437  // B[3][3] = leftRotate15_64(A[3][2])
1438  "adiw r26,32\n"
1439  "ldd r8,Z+16\n"
1440  "ldd r9,Z+17\n"
1441  "ldd r10,Z+18\n"
1442  "ldd r11,Z+19\n"
1443  "ldd r12,Z+20\n"
1444  "ldd r13,Z+21\n"
1445  "ldd r14,Z+22\n"
1446  "ldd r15,Z+23\n"
1447  "bst r8,0\n"
1448  "ror r15\n"
1449  "ror r14\n"
1450  "ror r13\n"
1451  "ror r12\n"
1452  "ror r11\n"
1453  "ror r10\n"
1454  "ror r9\n"
1455  "ror r8\n"
1456  "bld r15,7\n"
1457  "st X+,r14\n"
1458  "st X+,r15\n"
1459  "st X+,r8\n"
1460  "st X+,r9\n"
1461  "st X+,r10\n"
1462  "st X+,r11\n"
1463  "st X+,r12\n"
1464  "st X+,r13\n"
1465 
1466  // B[4][3] = leftRotate41_64(A[3][0])
1467  "adiw r26,32\n"
1468  "ld r8,Z\n"
1469  "ldd r9,Z+1\n"
1470  "ldd r10,Z+2\n"
1471  "ldd r11,Z+3\n"
1472  "ldd r12,Z+4\n"
1473  "ldd r13,Z+5\n"
1474  "ldd r14,Z+6\n"
1475  "ldd r15,Z+7\n"
1476  "lsl r8\n"
1477  "rol r9\n"
1478  "rol r10\n"
1479  "rol r11\n"
1480  "rol r12\n"
1481  "rol r13\n"
1482  "rol r14\n"
1483  "rol r15\n"
1484  "adc r8, __zero_reg__\n"
1485  "st X+,r11\n"
1486  "st X+,r12\n"
1487  "st X+,r13\n"
1488  "st X+,r14\n"
1489  "st X+,r15\n"
1490  "st X+,r8\n"
1491  "st X+,r9\n"
1492  "st X+,r10\n"
1493 
1494  // B[0][4] = leftRotate14_64(A[4][4])
1495  "subi r26,160\n"
1496  "sbc r27,__zero_reg__\n"
1497  "adiw r30,40\n"
1498  "ldd r8,Z+32\n"
1499  "ldd r9,Z+33\n"
1500  "ldd r10,Z+34\n"
1501  "ldd r11,Z+35\n"
1502  "ldd r12,Z+36\n"
1503  "ldd r13,Z+37\n"
1504  "ldd r14,Z+38\n"
1505  "ldd r15,Z+39\n"
1506  "bst r8,0\n"
1507  "ror r15\n"
1508  "ror r14\n"
1509  "ror r13\n"
1510  "ror r12\n"
1511  "ror r11\n"
1512  "ror r10\n"
1513  "ror r9\n"
1514  "ror r8\n"
1515  "bld r15,7\n"
1516  "bst r8,0\n"
1517  "ror r15\n"
1518  "ror r14\n"
1519  "ror r13\n"
1520  "ror r12\n"
1521  "ror r11\n"
1522  "ror r10\n"
1523  "ror r9\n"
1524  "ror r8\n"
1525  "bld r15,7\n"
1526  "st X+,r14\n"
1527  "st X+,r15\n"
1528  "st X+,r8\n"
1529  "st X+,r9\n"
1530  "st X+,r10\n"
1531  "st X+,r11\n"
1532  "st X+,r12\n"
1533  "st X+,r13\n"
1534 
1535  // B[1][4] = leftRotate61_64(A[4][2])
1536  "adiw r26,32\n"
1537  "ldd r8,Z+16\n"
1538  "ldd r9,Z+17\n"
1539  "ldd r10,Z+18\n"
1540  "ldd r11,Z+19\n"
1541  "ldd r12,Z+20\n"
1542  "ldd r13,Z+21\n"
1543  "ldd r14,Z+22\n"
1544  "ldd r15,Z+23\n"
1545  "bst r8,0\n"
1546  "ror r15\n"
1547  "ror r14\n"
1548  "ror r13\n"
1549  "ror r12\n"
1550  "ror r11\n"
1551  "ror r10\n"
1552  "ror r9\n"
1553  "ror r8\n"
1554  "bld r15,7\n"
1555  "bst r8,0\n"
1556  "ror r15\n"
1557  "ror r14\n"
1558  "ror r13\n"
1559  "ror r12\n"
1560  "ror r11\n"
1561  "ror r10\n"
1562  "ror r9\n"
1563  "ror r8\n"
1564  "bld r15,7\n"
1565  "bst r8,0\n"
1566  "ror r15\n"
1567  "ror r14\n"
1568  "ror r13\n"
1569  "ror r12\n"
1570  "ror r11\n"
1571  "ror r10\n"
1572  "ror r9\n"
1573  "ror r8\n"
1574  "bld r15,7\n"
1575  "st X+,r8\n"
1576  "st X+,r9\n"
1577  "st X+,r10\n"
1578  "st X+,r11\n"
1579  "st X+,r12\n"
1580  "st X+,r13\n"
1581  "st X+,r14\n"
1582  "st X+,r15\n"
1583 
1584  // B[2][4] = leftRotate18_64(A[4][0])
1585  "adiw r26,32\n"
1586  "ld r8,Z\n"
1587  "ldd r9,Z+1\n"
1588  "ldd r10,Z+2\n"
1589  "ldd r11,Z+3\n"
1590  "ldd r12,Z+4\n"
1591  "ldd r13,Z+5\n"
1592  "ldd r14,Z+6\n"
1593  "ldd r15,Z+7\n"
1594  "lsl r8\n"
1595  "rol r9\n"
1596  "rol r10\n"
1597  "rol r11\n"
1598  "rol r12\n"
1599  "rol r13\n"
1600  "rol r14\n"
1601  "rol r15\n"
1602  "adc r8, __zero_reg__\n"
1603  "lsl r8\n"
1604  "rol r9\n"
1605  "rol r10\n"
1606  "rol r11\n"
1607  "rol r12\n"
1608  "rol r13\n"
1609  "rol r14\n"
1610  "rol r15\n"
1611  "adc r8, __zero_reg__\n"
1612  "st X+,r14\n"
1613  "st X+,r15\n"
1614  "st X+,r8\n"
1615  "st X+,r9\n"
1616  "st X+,r10\n"
1617  "st X+,r11\n"
1618  "st X+,r12\n"
1619  "st X+,r13\n"
1620 
1621  // B[3][4] = leftRotate56_64(A[4][3])
1622  "adiw r26,32\n"
1623  "ldd r8,Z+24\n"
1624  "ldd r9,Z+25\n"
1625  "ldd r10,Z+26\n"
1626  "ldd r11,Z+27\n"
1627  "ldd r12,Z+28\n"
1628  "ldd r13,Z+29\n"
1629  "ldd r14,Z+30\n"
1630  "ldd r15,Z+31\n"
1631  "st X+,r9\n"
1632  "st X+,r10\n"
1633  "st X+,r11\n"
1634  "st X+,r12\n"
1635  "st X+,r13\n"
1636  "st X+,r14\n"
1637  "st X+,r15\n"
1638  "st X+,r8\n"
1639 
1640  // B[4][4] = leftRotate2_64(A[4][1])
1641  "adiw r26,32\n"
1642  "ldd r8,Z+8\n"
1643  "ldd r9,Z+9\n"
1644  "ldd r10,Z+10\n"
1645  "ldd r11,Z+11\n"
1646  "ldd r12,Z+12\n"
1647  "ldd r13,Z+13\n"
1648  "ldd r14,Z+14\n"
1649  "ldd r15,Z+15\n"
1650  "lsl r8\n"
1651  "rol r9\n"
1652  "rol r10\n"
1653  "rol r11\n"
1654  "rol r12\n"
1655  "rol r13\n"
1656  "rol r14\n"
1657  "rol r15\n"
1658  "adc r8, __zero_reg__\n"
1659  "lsl r8\n"
1660  "rol r9\n"
1661  "rol r10\n"
1662  "rol r11\n"
1663  "rol r12\n"
1664  "rol r13\n"
1665  "rol r14\n"
1666  "rol r15\n"
1667  "adc r8, __zero_reg__\n"
1668  "st X+,r8\n"
1669  "st X+,r9\n"
1670  "st X+,r10\n"
1671  "st X+,r11\n"
1672  "st X+,r12\n"
1673  "st X+,r13\n"
1674  "st X+,r14\n"
1675  "st X+,r15\n"
1676  "subi r26,200\n"
1677  "sbc r27,__zero_reg__\n"
1678  "subi r30,160\n"
1679  "sbc r31,__zero_reg__\n"
1680 
1681  // Step mapping chi.
1682  "ldi r20,5\n"
1683  "50:\n"
1684  "ld r8,Y\n"
1685  "ldd r9,Y+8\n"
1686  "ldd r10,Y+16\n"
1687  "ldd r11,Y+24\n"
1688  "ldd r12,Y+32\n"
1689  "mov r13,r9\n"
1690  "com r13\n"
1691  "and r13,r10\n"
1692  "eor r13,r8\n"
1693  "mov r14,r10\n"
1694  "com r14\n"
1695  "and r14,r11\n"
1696  "eor r14,r9\n"
1697  "mov r15,r11\n"
1698  "com r15\n"
1699  "and r15,r12\n"
1700  "eor r15,r10\n"
1701  "mov r17,r12\n"
1702  "com r17\n"
1703  "and r17,r8\n"
1704  "eor r17,r11\n"
1705  "mov r16,r8\n"
1706  "com r16\n"
1707  "and r16,r9\n"
1708  "eor r16,r12\n"
1709  "st Z,r13\n"
1710  "std Z+8,r14\n"
1711  "std Z+16,r15\n"
1712  "std Z+24,r17\n"
1713  "std Z+32,r16\n"
1714  "ldd r8,Y+1\n"
1715  "ldd r9,Y+9\n"
1716  "ldd r10,Y+17\n"
1717  "ldd r11,Y+25\n"
1718  "ldd r12,Y+33\n"
1719  "mov r13,r9\n"
1720  "com r13\n"
1721  "and r13,r10\n"
1722  "eor r13,r8\n"
1723  "mov r14,r10\n"
1724  "com r14\n"
1725  "and r14,r11\n"
1726  "eor r14,r9\n"
1727  "mov r15,r11\n"
1728  "com r15\n"
1729  "and r15,r12\n"
1730  "eor r15,r10\n"
1731  "mov r17,r12\n"
1732  "com r17\n"
1733  "and r17,r8\n"
1734  "eor r17,r11\n"
1735  "mov r16,r8\n"
1736  "com r16\n"
1737  "and r16,r9\n"
1738  "eor r16,r12\n"
1739  "std Z+1,r13\n"
1740  "std Z+9,r14\n"
1741  "std Z+17,r15\n"
1742  "std Z+25,r17\n"
1743  "std Z+33,r16\n"
1744  "ldd r8,Y+2\n"
1745  "ldd r9,Y+10\n"
1746  "ldd r10,Y+18\n"
1747  "ldd r11,Y+26\n"
1748  "ldd r12,Y+34\n"
1749  "mov r13,r9\n"
1750  "com r13\n"
1751  "and r13,r10\n"
1752  "eor r13,r8\n"
1753  "mov r14,r10\n"
1754  "com r14\n"
1755  "and r14,r11\n"
1756  "eor r14,r9\n"
1757  "mov r15,r11\n"
1758  "com r15\n"
1759  "and r15,r12\n"
1760  "eor r15,r10\n"
1761  "mov r17,r12\n"
1762  "com r17\n"
1763  "and r17,r8\n"
1764  "eor r17,r11\n"
1765  "mov r16,r8\n"
1766  "com r16\n"
1767  "and r16,r9\n"
1768  "eor r16,r12\n"
1769  "std Z+2,r13\n"
1770  "std Z+10,r14\n"
1771  "std Z+18,r15\n"
1772  "std Z+26,r17\n"
1773  "std Z+34,r16\n"
1774  "ldd r8,Y+3\n"
1775  "ldd r9,Y+11\n"
1776  "ldd r10,Y+19\n"
1777  "ldd r11,Y+27\n"
1778  "ldd r12,Y+35\n"
1779  "mov r13,r9\n"
1780  "com r13\n"
1781  "and r13,r10\n"
1782  "eor r13,r8\n"
1783  "mov r14,r10\n"
1784  "com r14\n"
1785  "and r14,r11\n"
1786  "eor r14,r9\n"
1787  "mov r15,r11\n"
1788  "com r15\n"
1789  "and r15,r12\n"
1790  "eor r15,r10\n"
1791  "mov r17,r12\n"
1792  "com r17\n"
1793  "and r17,r8\n"
1794  "eor r17,r11\n"
1795  "mov r16,r8\n"
1796  "com r16\n"
1797  "and r16,r9\n"
1798  "eor r16,r12\n"
1799  "std Z+3,r13\n"
1800  "std Z+11,r14\n"
1801  "std Z+19,r15\n"
1802  "std Z+27,r17\n"
1803  "std Z+35,r16\n"
1804  "ldd r8,Y+4\n"
1805  "ldd r9,Y+12\n"
1806  "ldd r10,Y+20\n"
1807  "ldd r11,Y+28\n"
1808  "ldd r12,Y+36\n"
1809  "mov r13,r9\n"
1810  "com r13\n"
1811  "and r13,r10\n"
1812  "eor r13,r8\n"
1813  "mov r14,r10\n"
1814  "com r14\n"
1815  "and r14,r11\n"
1816  "eor r14,r9\n"
1817  "mov r15,r11\n"
1818  "com r15\n"
1819  "and r15,r12\n"
1820  "eor r15,r10\n"
1821  "mov r17,r12\n"
1822  "com r17\n"
1823  "and r17,r8\n"
1824  "eor r17,r11\n"
1825  "mov r16,r8\n"
1826  "com r16\n"
1827  "and r16,r9\n"
1828  "eor r16,r12\n"
1829  "std Z+4,r13\n"
1830  "std Z+12,r14\n"
1831  "std Z+20,r15\n"
1832  "std Z+28,r17\n"
1833  "std Z+36,r16\n"
1834  "ldd r8,Y+5\n"
1835  "ldd r9,Y+13\n"
1836  "ldd r10,Y+21\n"
1837  "ldd r11,Y+29\n"
1838  "ldd r12,Y+37\n"
1839  "mov r13,r9\n"
1840  "com r13\n"
1841  "and r13,r10\n"
1842  "eor r13,r8\n"
1843  "mov r14,r10\n"
1844  "com r14\n"
1845  "and r14,r11\n"
1846  "eor r14,r9\n"
1847  "mov r15,r11\n"
1848  "com r15\n"
1849  "and r15,r12\n"
1850  "eor r15,r10\n"
1851  "mov r17,r12\n"
1852  "com r17\n"
1853  "and r17,r8\n"
1854  "eor r17,r11\n"
1855  "mov r16,r8\n"
1856  "com r16\n"
1857  "and r16,r9\n"
1858  "eor r16,r12\n"
1859  "std Z+5,r13\n"
1860  "std Z+13,r14\n"
1861  "std Z+21,r15\n"
1862  "std Z+29,r17\n"
1863  "std Z+37,r16\n"
1864  "ldd r8,Y+6\n"
1865  "ldd r9,Y+14\n"
1866  "ldd r10,Y+22\n"
1867  "ldd r11,Y+30\n"
1868  "ldd r12,Y+38\n"
1869  "mov r13,r9\n"
1870  "com r13\n"
1871  "and r13,r10\n"
1872  "eor r13,r8\n"
1873  "mov r14,r10\n"
1874  "com r14\n"
1875  "and r14,r11\n"
1876  "eor r14,r9\n"
1877  "mov r15,r11\n"
1878  "com r15\n"
1879  "and r15,r12\n"
1880  "eor r15,r10\n"
1881  "mov r17,r12\n"
1882  "com r17\n"
1883  "and r17,r8\n"
1884  "eor r17,r11\n"
1885  "mov r16,r8\n"
1886  "com r16\n"
1887  "and r16,r9\n"
1888  "eor r16,r12\n"
1889  "std Z+6,r13\n"
1890  "std Z+14,r14\n"
1891  "std Z+22,r15\n"
1892  "std Z+30,r17\n"
1893  "std Z+38,r16\n"
1894  "ldd r8,Y+7\n"
1895  "ldd r9,Y+15\n"
1896  "ldd r10,Y+23\n"
1897  "ldd r11,Y+31\n"
1898  "ldd r12,Y+39\n"
1899  "mov r13,r9\n"
1900  "com r13\n"
1901  "and r13,r10\n"
1902  "eor r13,r8\n"
1903  "mov r14,r10\n"
1904  "com r14\n"
1905  "and r14,r11\n"
1906  "eor r14,r9\n"
1907  "mov r15,r11\n"
1908  "com r15\n"
1909  "and r15,r12\n"
1910  "eor r15,r10\n"
1911  "mov r17,r12\n"
1912  "com r17\n"
1913  "and r17,r8\n"
1914  "eor r17,r11\n"
1915  "mov r16,r8\n"
1916  "com r16\n"
1917  "and r16,r9\n"
1918  "eor r16,r12\n"
1919  "std Z+7,r13\n"
1920  "std Z+15,r14\n"
1921  "std Z+23,r15\n"
1922  "std Z+31,r17\n"
1923  "std Z+39,r16\n"
1924  "adiw r30,40\n"
1925  "adiw r28,40\n"
1926  "dec r20\n"
1927  "breq 51f\n"
1928  "rjmp 50b\n"
1929  "51:\n"
1930  "pop r28\n"
1931  "pop r29\n"
1932 
1933  // Done
1934  : : "x"(B), "z"(state.A)
1935  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
1936  "r16", "r17", "r18", "r19", "r20", "r21", "memory"
1937  );
1938 #else
1939  static const uint8_t addMod5Table[9] PROGMEM = {
1940  0, 1, 2, 3, 4, 0, 1, 2, 3
1941  };
1942  #define addMod5(x, y) (pgm_read_byte(&(addMod5Table[(x) + (y)])))
1943  uint64_t D;
1944  uint8_t index, index2;
1945  for (uint8_t round = 0; round < 24; ++round) {
1946  // Step mapping theta. The specification mentions two temporary
1947  // arrays of size 5 called C and D. To save a bit of memory,
1948  // we use the first row of B to store C and compute D on the fly.
1949  for (index = 0; index < 5; ++index) {
1950  B[0][index] = state.A[0][index] ^ state.A[1][index] ^
1951  state.A[2][index] ^ state.A[3][index] ^
1952  state.A[4][index];
1953  }
1954  for (index = 0; index < 5; ++index) {
1955  D = B[0][addMod5(index, 4)] ^
1956  leftRotate1_64(B[0][addMod5(index, 1)]);
1957  for (index2 = 0; index2 < 5; ++index2)
1958  state.A[index2][index] ^= D;
1959  }
1960 
1961  // Step mapping rho and pi combined into a single step.
1962  // Rotate all lanes by a specific offset and rearrange.
1963  B[0][0] = state.A[0][0];
1964  B[1][0] = leftRotate28_64(state.A[0][3]);
1965  B[2][0] = leftRotate1_64 (state.A[0][1]);
1966  B[3][0] = leftRotate27_64(state.A[0][4]);
1967  B[4][0] = leftRotate62_64(state.A[0][2]);
1968  B[0][1] = leftRotate44_64(state.A[1][1]);
1969  B[1][1] = leftRotate20_64(state.A[1][4]);
1970  B[2][1] = leftRotate6_64 (state.A[1][2]);
1971  B[3][1] = leftRotate36_64(state.A[1][0]);
1972  B[4][1] = leftRotate55_64(state.A[1][3]);
1973  B[0][2] = leftRotate43_64(state.A[2][2]);
1974  B[1][2] = leftRotate3_64 (state.A[2][0]);
1975  B[2][2] = leftRotate25_64(state.A[2][3]);
1976  B[3][2] = leftRotate10_64(state.A[2][1]);
1977  B[4][2] = leftRotate39_64(state.A[2][4]);
1978  B[0][3] = leftRotate21_64(state.A[3][3]);
1979  B[1][3] = leftRotate45_64(state.A[3][1]);
1980  B[2][3] = leftRotate8_64 (state.A[3][4]);
1981  B[3][3] = leftRotate15_64(state.A[3][2]);
1982  B[4][3] = leftRotate41_64(state.A[3][0]);
1983  B[0][4] = leftRotate14_64(state.A[4][4]);
1984  B[1][4] = leftRotate61_64(state.A[4][2]);
1985  B[2][4] = leftRotate18_64(state.A[4][0]);
1986  B[3][4] = leftRotate56_64(state.A[4][3]);
1987  B[4][4] = leftRotate2_64 (state.A[4][1]);
1988 
1989  // Step mapping chi. Combine each lane with two other lanes in its row.
1990  for (index = 0; index < 5; ++index) {
1991  for (index2 = 0; index2 < 5; ++index2) {
1992  state.A[index2][index] =
1993  B[index2][index] ^
1994  ((~B[index2][addMod5(index, 1)]) &
1995  B[index2][addMod5(index, 2)]);
1996  }
1997  }
1998 #endif
1999 
2000  // Step mapping iota. XOR A[0][0] with the round constant.
2001  static uint64_t const RC[24] PROGMEM = {
2002  0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808AULL,
2003  0x8000000080008000ULL, 0x000000000000808BULL, 0x0000000080000001ULL,
2004  0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008AULL,
2005  0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000AULL,
2006  0x000000008000808BULL, 0x800000000000008BULL, 0x8000000000008089ULL,
2007  0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL,
2008  0x000000000000800AULL, 0x800000008000000AULL, 0x8000000080008081ULL,
2009  0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
2010  };
2011  state.A[0][0] ^= pgm_read_qword(RC + round);
2012  }
2013 }
size_t blockSize() const
Returns the input block size for the sponge function in bytes.
Definition: KeccakCore.h:38
~KeccakCore()
Destroys this Keccak sponge function after clearing all sensitive information.
Definition: KeccakCore.cpp:66
void reset()
Resets the Keccak sponge function ready for a new session.
Definition: KeccakCore.cpp:114
KeccakCore()
Constructs a new Keccak sponge function.
Definition: KeccakCore.cpp:54
void pad(uint8_t tag)
Pads the last block of input data to blockSize().
Definition: KeccakCore.cpp:167
size_t capacity() const
Returns the capacity of the sponge function in bits.
Definition: KeccakCore.cpp:76
void update(const void *data, size_t size)
Updates the Keccak sponge function with more input data.
Definition: KeccakCore.cpp:133
void extract(void *data, size_t size)
Extracts data from the Keccak sponge function.
Definition: KeccakCore.cpp:194
void setCapacity(size_t capacity)
Sets the capacity of the Keccak sponge function in bits.
Definition: KeccakCore.cpp:94
void encrypt(void *output, const void *input, size_t size)
Extracts data from the Keccak sponge function and uses it to encrypt a buffer.
Definition: KeccakCore.cpp:240
void clear()
Clears all sensitive data from this object.
Definition: KeccakCore.cpp:275
void setHMACKey(const void *key, size_t len, uint8_t pad, size_t hashSize)
Sets a HMAC key for a Keccak-based hash algorithm.
Definition: KeccakCore.cpp:293