Skinny-C
 All Data Structures Files Functions Variables Groups Pages
Mantis8.cpp
1 /*
2  * Copyright (C) 2017 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "Mantis8.h"
24 #include "Crypto.h"
25 #include "utility/EndianUtil.h"
26 #include "utility/RotateUtil.h"
27 #include "utility/ProgMemUtil.h"
28 #include <string.h>
29 
57 #if defined(__AVR__)
58 #define USE_AVR_INLINE_ASM 1
59 #endif
60 
61 #ifndef CRYPTO_LITTLE_ENDIAN
62 #error "Arduino platforms are assumed to be little-endian"
63 #endif
64 
65 // Extract the 32 bits for a row from a 64-bit round constant.
66 #define RC_EXTRACT_ROW(x,shift) \
67  (((((uint32_t)((x) >> ((shift) + 24))) & 0xFF)) | \
68  ((((uint32_t)((x) >> ((shift) + 16))) & 0xFF) << 8) | \
69  ((((uint32_t)((x) >> ((shift) + 8))) & 0xFF) << 16) | \
70  ((((uint32_t)((x) >> ((shift)))) & 0xFF) << 24))
71 
72 // Alpha constant for adjusting k1 for the inverse rounds.
73 #define ALPHA 0x243F6A8885A308D3ULL
74 #define ALPHA_ROW0 (RC_EXTRACT_ROW(ALPHA, 32))
75 #define ALPHA_ROW1 (RC_EXTRACT_ROW(ALPHA, 0))
76 
77 #ifndef USE_AVR_INLINE_ASM
78 
79 // Extract the rows from a 64-bit round constant.
80 #define RC(x) \
81  {RC_EXTRACT_ROW((x), 32), RC_EXTRACT_ROW((x), 0)}
82 
83 // Round constants for Mantis, split up into 32-bit row values.
84 static uint32_t const rc[8][2] = {
85  RC(0x13198A2E03707344ULL),
86  RC(0xA4093822299F31D0ULL),
87  RC(0x082EFA98EC4E6C89ULL),
88  RC(0x452821E638D01377ULL),
89  RC(0xBE5466CF34E90C6CULL),
90  RC(0xC0AC29B7C97C50DDULL),
91  RC(0x3F84D5B5B5470917ULL),
92  RC(0x9216D5D98979FB1BULL)
93 };
94 
95 #endif // !USE_AVR_INLINE_ASM
96 
101 {
102 }
103 
109 {
110  clean(st);
111 }
112 
117 size_t Mantis8::blockSize() const
118 {
119  return 8;
120 }
121 
126 size_t Mantis8::keySize() const
127 {
128  return 16;
129 }
130 
131 #ifndef USE_AVR_INLINE_ASM
132 
133 inline void mantis_unpack_block(uint32_t *block, const uint8_t *buf)
134 {
135  block[0] = ((uint32_t)(buf[0])) |
136  (((uint32_t)(buf[1])) << 8) |
137  (((uint32_t)(buf[2])) << 16) |
138  (((uint32_t)(buf[3])) << 24);
139  block[1] = ((uint32_t)(buf[4])) |
140  (((uint32_t)(buf[5])) << 8) |
141  (((uint32_t)(buf[6])) << 16) |
142  (((uint32_t)(buf[7])) << 24);
143 }
144 
145 static void mantis_unpack_rotated_block(uint32_t *block, const uint8_t *buf)
146 {
147  uint8_t rotated[8];
148  uint8_t index;
149  uint8_t next;
150  uint8_t carry = buf[7];
151  for (index = 0; index < 8; ++index) {
152  next = buf[index];
153  rotated[index] = (carry << 7) | (next >> 1);
154  carry = next;
155  }
156  rotated[7] ^= (buf[0] >> 7);
157  mantis_unpack_block(block, rotated);
158  clean(rotated);
159 }
160 
161 #endif // !USE_AVR_INLINE_ASM
162 
163 bool Mantis8::setKey(const uint8_t *key, size_t len)
164 {
165  if (len != 16)
166  return false;
167 #if USE_AVR_INLINE_ASM
168  __asm__ __volatile__ (
169  // Load k0 from the incoming key and store into the object.
170  "ld r8,X+\n"
171  "ld r9,X+\n"
172  "ld r10,X+\n"
173  "ld r11,X+\n"
174  "ld r12,X+\n"
175  "ld r13,X+\n"
176  "ld r14,X+\n"
177  "ld r15,X+\n"
178  "st Z,r8\n"
179  "std Z+1,r9\n"
180  "std Z+2,r10\n"
181  "std Z+3,r11\n"
182  "std Z+4,r12\n"
183  "std Z+5,r13\n"
184  "std Z+6,r14\n"
185  "std Z+7,r15\n"
186 
187  // Rotate k0 to create k0prime.
188  "bst r15,0\n"
189  "lsr r8\n"
190  "ror r9\n"
191  "ror r10\n"
192  "ror r11\n"
193  "ror r12\n"
194  "ror r13\n"
195  "ror r14\n"
196  "ror r15\n"
197  "bld r8,7\n"
198  "mov __tmp_reg__,__zero_reg__\n"
199  "bld __tmp_reg__,0\n"
200  "eor r15,__tmp_reg__\n"
201  "std Z+8,r8\n"
202  "std Z+9,r9\n"
203  "std Z+10,r10\n"
204  "std Z+11,r11\n"
205  "std Z+12,r12\n"
206  "std Z+13,r13\n"
207  "std Z+14,r14\n"
208  "std Z+15,r15\n"
209 
210  // Load k1 from the incoming key and store into the object.
211  "ld __tmp_reg__,X+\n"
212  "std Z+16,__tmp_reg__\n"
213  "ld __tmp_reg__,X+\n"
214  "std Z+17,__tmp_reg__\n"
215  "ld __tmp_reg__,X+\n"
216  "std Z+18,__tmp_reg__\n"
217  "ld __tmp_reg__,X+\n"
218  "std Z+19,__tmp_reg__\n"
219  "ld __tmp_reg__,X+\n"
220  "std Z+20,__tmp_reg__\n"
221  "ld __tmp_reg__,X+\n"
222  "std Z+21,__tmp_reg__\n"
223  "ld __tmp_reg__,X+\n"
224  "std Z+22,__tmp_reg__\n"
225  "ld __tmp_reg__,X+\n"
226  "std Z+23,__tmp_reg__\n"
227 
228  // Zero the tweak.
229  "std Z+24,__zero_reg__\n"
230  "std Z+25,__zero_reg__\n"
231  "std Z+26,__zero_reg__\n"
232  "std Z+27,__zero_reg__\n"
233  "std Z+28,__zero_reg__\n"
234  "std Z+29,__zero_reg__\n"
235  "std Z+30,__zero_reg__\n"
236  "std Z+31,__zero_reg__\n"
237 
238  : : "z"(&st), "x"(key)
239  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "memory"
240  );
241 #else
242  mantis_unpack_block(st.k0, key);
243  mantis_unpack_block(st.k1, key + 8);
244  mantis_unpack_rotated_block(st.k0prime, key);
245  st.tweak[0] = 0;
246  st.tweak[1] = 0;
247 #endif
248  return true;
249 }
250 
266 bool Mantis8::setTweak(const uint8_t *tweak, size_t len)
267 {
268  if (len != 8)
269  return false;
270 #if USE_AVR_INLINE_ASM
271  __asm__ __volatile__ (
272  "mov __tmp_reg__,r26\n"
273  "or __tmp_reg__,r27\n"
274  "brne 1f\n"
275  "st Z+,__zero_reg__\n"
276  "st Z+,__zero_reg__\n"
277  "st Z+,__zero_reg__\n"
278  "st Z+,__zero_reg__\n"
279  "st Z+,__zero_reg__\n"
280  "st Z+,__zero_reg__\n"
281  "st Z+,__zero_reg__\n"
282  "st Z+,__zero_reg__\n"
283  "rjmp 2f\n"
284  "1:\n"
285  "ld __tmp_reg__,X+\n"
286  "st Z+,__tmp_reg__\n"
287  "ld __tmp_reg__,X+\n"
288  "st Z+,__tmp_reg__\n"
289  "ld __tmp_reg__,X+\n"
290  "st Z+,__tmp_reg__\n"
291  "ld __tmp_reg__,X+\n"
292  "st Z+,__tmp_reg__\n"
293  "ld __tmp_reg__,X+\n"
294  "st Z+,__tmp_reg__\n"
295  "ld __tmp_reg__,X+\n"
296  "st Z+,__tmp_reg__\n"
297  "ld __tmp_reg__,X+\n"
298  "st Z+,__tmp_reg__\n"
299  "ld __tmp_reg__,X+\n"
300  "st Z+,__tmp_reg__\n"
301  "2:\n"
302  : : "z"(st.tweak), "x"(tweak)
303  );
304 #else
305  if (tweak) {
306  mantis_unpack_block(st.tweak, tweak);
307  } else {
308  st.tweak[0] = 0;
309  st.tweak[1] = 0;
310  }
311 #endif
312  return true;
313 }
314 
323 {
324  // Swap k0 with k0prime.
325  uint32_t temp = st.k0[0];
326  st.k0[0] = st.k0prime[0];
327  st.k0prime[0] = temp;
328  temp = st.k0[1];
329  st.k0[1] = st.k0prime[1];
330  st.k0prime[1] = temp;
331 
332  // XOR k1 with the alpha constant.
333  st.k1[0] ^= ALPHA_ROW0;
334  st.k1[1] ^= ALPHA_ROW1;
335 }
336 
337 #if USE_AVR_INLINE_ASM
338 
339 // Extract the bytes from a 64-bit round constant.
340 #define RC_EXTRACT_BYTE(x,shift) ((uint8_t)((x) >> (shift)))
341 #define RC(x) \
342  RC_EXTRACT_BYTE((x), 56), \
343  RC_EXTRACT_BYTE((x), 48), \
344  RC_EXTRACT_BYTE((x), 40), \
345  RC_EXTRACT_BYTE((x), 32), \
346  RC_EXTRACT_BYTE((x), 24), \
347  RC_EXTRACT_BYTE((x), 16), \
348  RC_EXTRACT_BYTE((x), 8), \
349  RC_EXTRACT_BYTE((x), 0)
350 
351 // Force the sboxes to be aligned on a 256-byte boundary.
352 // This makes sbox lookups more efficient.
353 #define ALIGN256 __attribute__((aligned(256)))
354 
355 // MIDORI Sb0, expanded from 4 bits to 8 bits for easier byte lookup.
356 // We only use this for AVR platforms, as there will be issues with
357 // constant cache behaviour on ARM. It would be nice to avoid this
358 // for AVR as well, but the S-box operations are simply too slow using
359 // bit operations on AVR.
360 static uint8_t const sbox[256 + 64] PROGMEM ALIGN256 = {
361  0xcc, 0xca, 0xcd, 0xc3, 0xce, 0xcb, 0xcf, 0xc7, 0xc8, 0xc9, 0xc1, 0xc5,
362  0xc0, 0xc2, 0xc4, 0xc6, 0xac, 0xaa, 0xad, 0xa3, 0xae, 0xab, 0xaf, 0xa7,
363  0xa8, 0xa9, 0xa1, 0xa5, 0xa0, 0xa2, 0xa4, 0xa6, 0xdc, 0xda, 0xdd, 0xd3,
364  0xde, 0xdb, 0xdf, 0xd7, 0xd8, 0xd9, 0xd1, 0xd5, 0xd0, 0xd2, 0xd4, 0xd6,
365  0x3c, 0x3a, 0x3d, 0x33, 0x3e, 0x3b, 0x3f, 0x37, 0x38, 0x39, 0x31, 0x35,
366  0x30, 0x32, 0x34, 0x36, 0xec, 0xea, 0xed, 0xe3, 0xee, 0xeb, 0xef, 0xe7,
367  0xe8, 0xe9, 0xe1, 0xe5, 0xe0, 0xe2, 0xe4, 0xe6, 0xbc, 0xba, 0xbd, 0xb3,
368  0xbe, 0xbb, 0xbf, 0xb7, 0xb8, 0xb9, 0xb1, 0xb5, 0xb0, 0xb2, 0xb4, 0xb6,
369  0xfc, 0xfa, 0xfd, 0xf3, 0xfe, 0xfb, 0xff, 0xf7, 0xf8, 0xf9, 0xf1, 0xf5,
370  0xf0, 0xf2, 0xf4, 0xf6, 0x7c, 0x7a, 0x7d, 0x73, 0x7e, 0x7b, 0x7f, 0x77,
371  0x78, 0x79, 0x71, 0x75, 0x70, 0x72, 0x74, 0x76, 0x8c, 0x8a, 0x8d, 0x83,
372  0x8e, 0x8b, 0x8f, 0x87, 0x88, 0x89, 0x81, 0x85, 0x80, 0x82, 0x84, 0x86,
373  0x9c, 0x9a, 0x9d, 0x93, 0x9e, 0x9b, 0x9f, 0x97, 0x98, 0x99, 0x91, 0x95,
374  0x90, 0x92, 0x94, 0x96, 0x1c, 0x1a, 0x1d, 0x13, 0x1e, 0x1b, 0x1f, 0x17,
375  0x18, 0x19, 0x11, 0x15, 0x10, 0x12, 0x14, 0x16, 0x5c, 0x5a, 0x5d, 0x53,
376  0x5e, 0x5b, 0x5f, 0x57, 0x58, 0x59, 0x51, 0x55, 0x50, 0x52, 0x54, 0x56,
377  0x0c, 0x0a, 0x0d, 0x03, 0x0e, 0x0b, 0x0f, 0x07, 0x08, 0x09, 0x01, 0x05,
378  0x00, 0x02, 0x04, 0x06, 0x2c, 0x2a, 0x2d, 0x23, 0x2e, 0x2b, 0x2f, 0x27,
379  0x28, 0x29, 0x21, 0x25, 0x20, 0x22, 0x24, 0x26, 0x4c, 0x4a, 0x4d, 0x43,
380  0x4e, 0x4b, 0x4f, 0x47, 0x48, 0x49, 0x41, 0x45, 0x40, 0x42, 0x44, 0x46,
381  0x6c, 0x6a, 0x6d, 0x63, 0x6e, 0x6b, 0x6f, 0x67, 0x68, 0x69, 0x61, 0x65,
382  0x60, 0x62, 0x64, 0x66,
383 
384  // Put the round constants at the end of the S-box table so that
385  // they can be accessed from the same pgmspace base pointer.
386  RC(0x13198A2E03707344ULL),
387  RC(0xA4093822299F31D0ULL),
388  RC(0x082EFA98EC4E6C89ULL),
389  RC(0x452821E638D01377ULL),
390  RC(0xBE5466CF34E90C6CULL),
391  RC(0xC0AC29B7C97C50DDULL),
392  RC(0x3F84D5B5B5470917ULL),
393  RC(0x9216D5D98979FB1BULL)
394 };
395 
396 // Figure out how to do lookups from a pgmspace sbox table on this platform.
397 #if defined(RAMPZ)
398 #define SBOX(reg) \
399  "mov r30," reg "\n" \
400  "out %6,r24\n" \
401  "elpm " reg ",Z\n"
402 #define RC_SETUP(reg) \
403  "ldi r25,1\n" \
404  "mov r30," reg "\n" \
405  "add r31,r25\n" \
406  "adc r24,__zero_reg__\n" \
407  "out %6,r24\n"
408 #define RC_CLEANUP(reg) \
409  "sub r31,r25\n" \
410  "sbc r24,__zero_reg__\n" \
411  "sbiw r30,8\n" \
412  "sbc r24,__zero_reg__\n"
413 #define RC_ADD(reg) \
414  "elpm r0,Z+\n" \
415  "eor " reg ",r0\n"
416 #elif defined(__AVR_HAVE_LPMX__)
417 #define SBOX(reg) \
418  "mov r30," reg "\n" \
419  "lpm " reg ",Z\n"
420 #define RC_SETUP(reg) \
421  "ldi r25,1\n" \
422  "mov r30," reg "\n" \
423  "add r31,r25\n"
424 #define RC_CLEANUP(reg) \
425  "sub r31,r25\n" \
426  "sbiw r30,8\n"
427 #define RC_ADD(reg) \
428  "lpm r0,Z+\n" \
429  "eor " reg ",r0\n"
430 #elif defined(__AVR_TINY__)
431 #define SBOX(reg) \
432  "mov r30," reg "\n" \
433  "ld " reg ",Z\n"
434 #define RC_SETUP(reg) \
435  "ldi r25,1\n" \
436  "mov r30," reg "\n" \
437  "add r31,r25\n"
438 #define RC_CLEANUP(reg) \
439  "sub r31,r25\n" \
440  "sbiw r30,8\n"
441 #define RC_ADD(reg) \
442  "ld r0,Z+\n" \
443  "eor " reg ",r0\n"
444 #else
445 #define SBOX(reg) \
446  "mov r30," reg "\n" \
447  "lpm\n" \
448  "mov " reg ",r0\n"
449 #define RC_SETUP(reg) \
450  "ldi r25,1\n" \
451  "mov r30," reg "\n" \
452  "add r31,r25\n"
453 #define RC_CLEANUP(reg) \
454  "sub r31,r25\n" \
455  "sbiw r30,8\n"
456 #define RC_ADD(reg) \
457  "lpm\n" \
458  "eor " reg ",r0\n" \
459  "adiw r30,1\n"
460 #endif
461 
462 // Mix the columns during an encryption round.
463 #define MIX_COLUMNS(row0, row1, row2, row3) \
464  "mov __tmp_reg__," row0 "\n" \
465  "mov r25," row2 "\n" \
466  "mov " row0 "," row1 "\n" \
467  "mov " row1 ",__tmp_reg__\n" \
468  "mov " row2 "," row3 "\n" \
469  "mov " row3 ",r25\n" \
470  "eor __tmp_reg__," row0 "\n" \
471  "eor r25," row2 "\n" \
472  "eor " row0 ",r25\n" \
473  "eor " row1 ",r25\n" \
474  "eor " row2 ",__tmp_reg__\n" \
475  "eor " row3 ",__tmp_reg__\n" \
476 
477 #else // !USE_AVR_INLINE_ASM
478 
479 typedef union
480 {
481  uint16_t row[4];
482  uint32_t lrow[2];
483 
484 } MantisCells_t;
485 
486 inline uint32_t mantis_sbox(uint32_t d)
487 {
488  /*
489  * MIDORI Sb0 from section 4.2 of https://eprint.iacr.org/2015/1142.pdf
490  *
491  * {a, b, c, d} -> {aout, bout, cout, dout} where a/aout is the MSB.
492  *
493  * aout = NAND(NAND(~c, NAND(a, b)), (a | d))
494  * bout = NAND(NOR(NOR(a, d), (b & c)), NAND((a & c), d))
495  * cout = NAND(NAND(b, d), (NOR(b, d) | a))
496  * dout = NOR(NOR(a, (b | c)), NAND(NAND(a, b), (c | d)))
497  */
498  uint32_t a = (d >> 3);
499  uint32_t b = (d >> 2);
500  uint32_t c = (d >> 1);
501  uint32_t not_a = ~a;
502  uint32_t ab = not_a | (~b);
503  uint32_t ad = not_a & (~d);
504  uint32_t aout = (((~c) & ab) | ad);
505  uint32_t bout = ad | (b & c) | (a & c & d);
506  uint32_t cout = (b & d) | ((b | d) & not_a);
507  uint32_t dout = (a | b | c) & ab & (c | d);
508  return ((aout & 0x11111111U) << 3) | ((bout & 0x11111111U) << 2) |
509  ((cout & 0x11111111U) << 1) | (dout & 0x11111111U);
510 }
511 
512 inline void mantis_update_tweak(MantisCells_t *tweak)
513 {
514  /* h = [6, 5, 14, 15, 0, 1, 2, 3, 7, 12, 13, 4, 8, 9, 10, 11] */
515  uint16_t row1 = tweak->row[1];
516  uint16_t row3 = tweak->row[3];
517  tweak->row[1] = tweak->row[0];
518  tweak->row[3] = tweak->row[2];
519  tweak->row[0] = ((row1 >> 8) & 0x00F0U) |
520  (row1 & 0x000FU) |
521  (row3 & 0xFF00U);
522  tweak->row[2] = ((row1 << 4) & 0x0F00U) |
523  ((row1 >> 4) & 0x00F0U) |
524  ((row3 >> 4) & 0x000FU) |
525  ((row3 << 12) & 0xF000U);
526 }
527 
528 inline void mantis_update_tweak_inverse(MantisCells_t *tweak)
529 {
530  /* h' = [4, 5, 6, 7, 11, 1, 0, 8, 12, 13, 14, 15, 9, 10, 2, 3] */
531  uint16_t row0 = tweak->row[0];
532  uint16_t row2 = tweak->row[2];
533  tweak->row[0] = tweak->row[1];
534  tweak->row[2] = tweak->row[3];
535  tweak->row[1] = ((row2 >> 4) & 0x00F0U) |
536  ((row2 << 4) & 0x0F00U) |
537  (row0 & 0x000FU) |
538  ((row0 << 8) & 0xF000U);
539  tweak->row[3] = (row0 & 0xFF00U) |
540  ((row2 << 4) & 0x00F0U) |
541  ((row2 >> 12) & 0x000FU);
542 }
543 
544 inline void mantis_shift_rows(MantisCells_t *state)
545 {
546  /* P = [0, 11, 6, 13, 10, 1, 12, 7, 5, 14, 3, 8, 15, 4, 9, 2] */
547  uint16_t row0 = state->row[0];
548  uint16_t row1 = state->row[1];
549  uint16_t row2 = state->row[2];
550  uint16_t row3 = state->row[3];
551  state->row[0] = (row0 & 0x00F0U) |
552  (row1 & 0xF000U) |
553  ((row2 >> 8) & 0x000FU) |
554  ((row3 << 8) & 0x0F00U);
555  state->row[1] = (row0 & 0x000FU) |
556  (row1 & 0x0F00U) |
557  ((row2 >> 8) & 0x00F0U) |
558  ((row3 << 8) & 0xF000U);
559  state->row[2] = ((row0 << 4) & 0xF000U) |
560  ((row1 << 4) & 0x00F0U) |
561  ((row2 << 4) & 0x0F00U) |
562  ((row3 >> 12) & 0x000FU);
563  state->row[3] = ((row0 >> 4) & 0x0F00U) |
564  ((row1 >> 4) & 0x000FU) |
565  ((row2 << 12) & 0xF000U) |
566  ((row3 >> 4) & 0x00F0U);
567 }
568 
569 inline void mantis_shift_rows_inverse(MantisCells_t *state)
570 {
571  /* P' = [0, 5, 15, 10, 13, 8, 2, 7, 11, 14, 4, 1, 6, 3, 9, 12] */
572  uint16_t row0 = state->row[0];
573  uint16_t row1 = state->row[1];
574  uint16_t row2 = state->row[2];
575  uint16_t row3 = state->row[3];
576  state->row[0] = (row0 & 0x00F0U) |
577  (row1 & 0x000FU) |
578  ((row2 >> 4) & 0x0F00U) |
579  ((row3 << 4) & 0xF000U);
580  state->row[1] = (row0 & 0xF000U) |
581  (row1 & 0x0F00U) |
582  ((row2 >> 4) & 0x000FU) |
583  ((row3 << 4) & 0x00F0U);
584  state->row[2] = ((row0 << 8) & 0x0F00U) |
585  ((row1 << 8) & 0xF000U) |
586  ((row2 >> 4) & 0x00F0U) |
587  ((row3 >> 12) & 0x000FU);
588  state->row[3] = ((row0 >> 8) & 0x000FU) |
589  ((row1 >> 8) & 0x00F0U) |
590  ((row2 << 12) & 0xF000U) |
591  ((row3 << 4) & 0x0F00U);
592 }
593 
594 inline void mantis_mix_columns(MantisCells_t *state)
595 {
596  uint16_t t0 = state->row[0];
597  uint16_t t1 = state->row[1];
598  uint16_t t2 = state->row[2];
599  uint16_t t3 = state->row[3];
600  state->row[0] = t1 ^ t2 ^ t3;
601  state->row[1] = t0 ^ t2 ^ t3;
602  state->row[2] = t0 ^ t1 ^ t3;
603  state->row[3] = t0 ^ t1 ^ t2;
604 }
605 
606 #endif // !USE_AVR_INLINE_ASM
607 
608 void Mantis8::encryptBlock(uint8_t *output, const uint8_t *input)
609 {
610 #if USE_AVR_INLINE_ASM
611 #if defined(RAMPZ)
612  uint32_t sbox_addr = (uint32_t)sbox;
613 #else
614  uint16_t sbox_addr = (uint16_t)sbox;
615 #endif
616  uint32_t k1_0, k1_1;
617  __asm__ __volatile__ (
618  // Load the input block into r8..r15.
619  "ld r8,X+\n"
620  "ld r9,X+\n"
621  "ld r10,X+\n"
622  "ld r11,X+\n"
623  "ld r12,X+\n"
624  "ld r13,X+\n"
625  "ld r14,X+\n"
626  "ld r15,X+\n"
627 
628  // Load k1 from the state into k1_0 and k1_1 and XOR with the state.
629  "ldd r16,Z+16\n"
630  "ldd r17,Z+17\n"
631  "ldd r18,Z+18\n"
632  "ldd r19,Z+19\n"
633  "ldd r20,Z+20\n"
634  "ldd r21,Z+21\n"
635  "ldd r22,Z+22\n"
636  "ldd r23,Z+23\n"
637  "eor r8,r16\n"
638  "eor r9,r17\n"
639  "eor r10,r18\n"
640  "eor r11,r19\n"
641  "eor r12,r20\n"
642  "eor r13,r21\n"
643  "eor r14,r22\n"
644  "eor r15,r23\n"
645  "std %A4,r16\n"
646  "std %B4,r17\n"
647  "std %C4,r18\n"
648  "std %D4,r19\n"
649  "std %A5,r20\n"
650  "std %B5,r21\n"
651  "std %C5,r22\n"
652  "std %D5,r23\n"
653 
654  // Load the tweak into r16..r23.
655  "ldd r16,Z+24\n"
656  "ldd r17,Z+25\n"
657  "ldd r18,Z+26\n"
658  "ldd r19,Z+27\n"
659  "ldd r20,Z+28\n"
660  "ldd r21,Z+29\n"
661  "ldd r22,Z+30\n"
662  "ldd r23,Z+31\n"
663 
664  // XOR the initial whitening key k0 and the tweak with the state.
665  // state.lrow[0] ^= st.k0[0] ^ k1.lrow[0] ^ tweak.lrow[0];
666  // state.lrow[1] ^= st.k0[1] ^ k1.lrow[1] ^ tweak.lrow[1];
667  // Note: k1 was already XOR'ed in above prior to loading the tweak.
668  "ld __tmp_reg__,Z\n" // r8 ^= k0[0] ^ k1[0] ^ r16
669  "eor r8,__tmp_reg__\n"
670  "eor r8,r16\n"
671  "ldd __tmp_reg__,Z+1\n" // r9 ^= k0[1] ^ k1[1] ^ r17
672  "eor r9,__tmp_reg__\n"
673  "eor r9,r17\n"
674  "ldd __tmp_reg__,Z+2\n" // r10 ^= k0[2] ^ k1[2] ^ r18
675  "eor r10,__tmp_reg__\n"
676  "eor r10,r18\n"
677  "ldd __tmp_reg__,Z+3\n" // r11 ^= k0[3] ^ k1[3] ^ r19
678  "eor r11,__tmp_reg__\n"
679  "eor r11,r19\n"
680  "ldd __tmp_reg__,Z+4\n" // r12 ^= k0[4] ^ k1[4] ^ r20
681  "eor r12,__tmp_reg__\n"
682  "eor r12,r20\n"
683  "ldd __tmp_reg__,Z+5\n" // r13 ^= k0[5] ^ k1[5] ^ r21
684  "eor r13,__tmp_reg__\n"
685  "eor r13,r21\n"
686  "ldd __tmp_reg__,Z+6\n" // r14 ^= k0[6] ^ k1[6] ^ r22
687  "eor r14,__tmp_reg__\n"
688  "eor r14,r22\n"
689  "ldd __tmp_reg__,Z+7\n" // r15 ^= k0[7] ^ k1[7] ^ r23
690  "eor r15,__tmp_reg__\n"
691  "eor r15,r23\n"
692 
693  // Save the state pointer in Z into X.
694  "movw r26,r30\n"
695 
696  // Set up Z to point to the start of the sbox table.
697 #if defined(RAMPZ)
698  "in __tmp_reg__,%6\n"
699  "push __tmp_reg__\n"
700 #endif
701  "ldd r30,%A3\n"
702  "ldd r31,%B3\n"
703 
704  // Top of the loop for the eight forward rounds.
705  "clr r7\n" // r7 is the RC table offset and loop counter.
706  "1:\n"
707 
708  // Update the tweak using the h permutation.
709  // h = [6, 5, 14, 15, 0, 1, 2, 3, 7, 12, 13, 4, 8, 9, 10, 11]
710  "push r16\n" // Save [0, 1, 2, 3, 8, 9, 10, 11] on the stack.
711  "push r17\n"
712  "push r20\n"
713  "push r21\n"
714  "mov r17,r23\n" // TK[2/3] = TK[14/15]
715  "mov r16,r18\n" // TK[1] = TK[5]
716  "andi r16,0x0F\n"
717  "swap r18\n" // TK[11] = TK[4]
718  "mov r21,r18\n"
719  "andi r21,0x0F\n"
720  "mov r20,r19\n" // TK[8] = TK[7]
721  "swap r20\n"
722  "andi r20,0xF0\n"
723  "andi r19,0xF0\n" // TK[0] = TK[6]
724  "or r16,r19\n"
725  "swap r22\n" // TK[9] = TK[12]
726  "mov r23,r22\n"
727  "andi r23,0x0F\n"
728  "or r20,r23\n"
729  "andi r22,0xF0\n" // TK[10] = TK[13]
730  "or r21,r22\n"
731  "pop r23\n" // Restore saved values from the stack
732  "pop r22\n" // into [4, 5, 6, 7, 12, 13, 14, 15]
733  "pop r19\n"
734  "pop r18\n"
735 
736  // Transform the state using the sbox.
737 #if defined(RAMPZ)
738  "ldd r24,%C3\n"
739 #endif
740  SBOX("r8")
741  SBOX("r9")
742  SBOX("r10")
743  SBOX("r11")
744  SBOX("r12")
745  SBOX("r13")
746  SBOX("r14")
747  SBOX("r15")
748 
749  // Add the round constant.
750  RC_SETUP("r7")
751  RC_ADD("r8")
752  RC_ADD("r9")
753  RC_ADD("r10")
754  RC_ADD("r11")
755  RC_ADD("r12")
756  RC_ADD("r13")
757  RC_ADD("r14")
758  RC_ADD("r15")
759  RC_CLEANUP("r7")
760 
761  // XOR with the key and tweak.
762  // state.lrow[0] ^= k1.lrow[0] ^ tweak.lrow[0];
763  // state.lrow[1] ^= k1.lrow[1] ^ tweak.lrow[1];
764  "ldd __tmp_reg__,%A4\n"
765  "eor r8,__tmp_reg__\n"
766  "eor r8,r16\n"
767  "ldd __tmp_reg__,%B4\n"
768  "eor r9,__tmp_reg__\n"
769  "eor r9,r17\n"
770  "ldd __tmp_reg__,%C4\n"
771  "eor r10,__tmp_reg__\n"
772  "eor r10,r18\n"
773  "ldd __tmp_reg__,%D4\n"
774  "eor r11,__tmp_reg__\n"
775  "eor r11,r19\n"
776  "ldd __tmp_reg__,%A5\n"
777  "eor r12,__tmp_reg__\n"
778  "eor r12,r20\n"
779  "ldd __tmp_reg__,%B5\n"
780  "eor r13,__tmp_reg__\n"
781  "eor r13,r21\n"
782  "ldd __tmp_reg__,%C5\n"
783  "eor r14,__tmp_reg__\n"
784  "eor r14,r22\n"
785  "ldd __tmp_reg__,%D5\n"
786  "eor r15,__tmp_reg__\n"
787  "eor r15,r23\n"
788 
789  // Shift the rows using the P permutation.
790  // P = [0, 11, 6, 13, 10, 1, 12, 7, 5, 14, 3, 8, 15, 4, 9, 2]
791  "ldi r24,0xF0\n"
792  "ldi r25,0x0F\n"
793  "mov __tmp_reg__,r8\n"
794  "and r8,r24\n" // S'[0] = S[0]
795  "mov r6,r10\n"
796  "mov r10,r13\n" // S'[4] = S[10]
797  "and r10,r24\n"
798  "and __tmp_reg__,r25\n" // S'[5] = S[1]
799  "or r10,__tmp_reg__\n"
800  "and r13,r25\n" // S'[1] = S[11]
801  "or r8,r13\n"
802  "swap r9\n" // S'[10] = S[3]
803  "mov r13,r9\n"
804  "and r13,r24\n"
805  "swap r12\n" // S'[11] = S[8]
806  "mov __tmp_reg__,r12\n"
807  "and __tmp_reg__,r25\n"
808  "or r13,__tmp_reg__\n"
809  "and r9,r25\n" // S'[15] = S[2]
810  "and r12,r24\n"
811  "or r12,r9\n" // S'[14] = S[9]
812  "mov r9,r11\n" // S'[2] = S[6]
813  "and r9,r24\n"
814  "mov __tmp_reg__,r14\n" // S'[3] = S[13]
815  "and __tmp_reg__,r25\n"
816  "or r9,__tmp_reg__\n"
817  "and r11,r25\n" // S'[7] = S[7]
818  "and r14,r24\n" // S'[6] = S[12]
819  "or r11,r14\n"
820  "mov r14,r15\n" // S'[12] = S[15]
821  "swap r14\n"
822  "mov __tmp_reg__,r14\n"
823  "mov r15,r12\n"
824  "and r14,r24\n"
825  "swap r6\n" // S'[8] = S[5]
826  "mov r12,r6\n"
827  "and r12,r24\n"
828  "and __tmp_reg__,r25\n" // S'[9] = S[14]
829  "or r12,__tmp_reg__\n"
830  "and r6,r25\n" // S'[13] = S[4]
831  "or r14,r6\n"
832 
833  // Mix the columns.
834  MIX_COLUMNS("r8", "r10", "r12", "r14")
835  MIX_COLUMNS("r9", "r11", "r13", "r15")
836 
837  // Bottom of the loop for the eight forward rounds.
838  "ldi r24,8\n" // r7 += 8
839  "add r7,r24\n" // loop if r7 < 64
840  "ldi r24,64\n"
841  "cp r7,r24\n"
842  "breq 2f\n"
843  "rjmp 1b\n"
844  "2:\n"
845 
846  // Half-way there: sbox, mix, sbox.
847 #if defined(RAMPZ)
848  "ldd r24,%C3\n"
849 #endif
850  SBOX("r8")
851  SBOX("r9")
852  SBOX("r10")
853  SBOX("r11")
854  SBOX("r12")
855  SBOX("r13")
856  SBOX("r14")
857  SBOX("r15")
858  MIX_COLUMNS("r8", "r10", "r12", "r14")
859  MIX_COLUMNS("r9", "r11", "r13", "r15")
860  SBOX("r8")
861  SBOX("r9")
862  SBOX("r10")
863  SBOX("r11")
864  SBOX("r12")
865  SBOX("r13")
866  SBOX("r14")
867  SBOX("r15")
868 
869  // Convert k1 into k1 XOR alpha for the reverse rounds.
870  // alpha = 0x243F6A8885A308D3
871  #define ALPHA_ADJUST(reg, c) \
872  "ldi r24," c "\n" \
873  "ldd __tmp_reg__," reg "\n" \
874  "eor __tmp_reg__,r24\n" \
875  "std " reg ",__tmp_reg__\n"
876  ALPHA_ADJUST("%A4", "0x24")
877  ALPHA_ADJUST("%B4", "0x3F")
878  ALPHA_ADJUST("%C4", "0x6A")
879  ALPHA_ADJUST("%D4", "0x88")
880  ALPHA_ADJUST("%A5", "0x85")
881  ALPHA_ADJUST("%B5", "0xA3")
882  ALPHA_ADJUST("%C5", "0x08")
883  ALPHA_ADJUST("%D5", "0xD3")
884 
885  // Top of the loop for the eight reverse rounds.
886  "3:\n"
887  "ldi r24,8\n" // r7 -= 8
888  "sub r7,r24\n"
889 
890  // Mix the columns.
891  MIX_COLUMNS("r8", "r10", "r12", "r14")
892  MIX_COLUMNS("r9", "r11", "r13", "r15")
893 
894  // Shift the rows using the inverse of the P permutation.
895  // P' = [0, 5, 15, 10, 13, 8, 2, 7, 11, 14, 4, 1, 6, 3, 9, 12]
896  "ldi r24,0xF0\n"
897  "ldi r25,0x0F\n"
898  "mov __tmp_reg__,r8\n" // S'[0] = S[0]
899  "and r8,r24\n"
900  "mov r6,r10\n" // S'[1] = S[5]
901  "and r6,r25\n"
902  "or r8,r6\n"
903  "and __tmp_reg__,r25\n" // S'[11] = S[1]
904  "and r10,r24\n" // S'[10] = S[4]
905  "or r10,__tmp_reg__\n"
906  "mov __tmp_reg__,r13\n"
907  "mov r13,r10\n"
908  "swap __tmp_reg__\n" // S'[3] = S[10]
909  "mov r6,r9\n"
910  "mov r9,__tmp_reg__\n"
911  "and r9,r25\n"
912  "swap r15\n" // S'[2] = S[15]
913  "mov r10,r15\n"
914  "and r10,r24\n"
915  "or r9,r10\n"
916  "and r15,r25\n" // S'[9] = S[14]
917  "and __tmp_reg__,r24\n" // S'[8] = S[11]
918  "or r15,__tmp_reg__\n"
919  "mov __tmp_reg__,r11\n" // S'[7] = S[7]
920  "and r11,r25\n"
921  "and __tmp_reg__,r24\n" // S'[12] = S[6]
922  "mov r10,r6\n" // S'[6] = S[2]
923  "and r10,r24\n"
924  "or r11,r10\n"
925  "and r6,r25\n" // S'[13] = S[3]
926  "or r6,__tmp_reg__\n"
927  "swap r12\n" // S'[5] = S[8]
928  "swap r14\n" // S'[4] = S[13]
929  "mov r10,r12\n"
930  "and r10,r25\n"
931  "mov __tmp_reg__,r14\n"
932  "and __tmp_reg__,r24\n"
933  "or r10,__tmp_reg__\n"
934  "and r12,r24\n" // S'[14] = S[9]
935  "and r14,r25\n" // S'[15] = S[12]
936  "or r14,r12\n"
937  "mov r12,r15\n"
938  "mov r15,r14\n"
939  "mov r14,r6\n"
940 
941  // XOR with the key and tweak.
942  // state.lrow[0] ^= k1.lrow[0] ^ tweak.lrow[0];
943  // state.lrow[1] ^= k1.lrow[1] ^ tweak.lrow[1];
944  "ldd __tmp_reg__,%A4\n"
945  "eor r8,__tmp_reg__\n"
946  "eor r8,r16\n"
947  "ldd __tmp_reg__,%B4\n"
948  "eor r9,__tmp_reg__\n"
949  "eor r9,r17\n"
950  "ldd __tmp_reg__,%C4\n"
951  "eor r10,__tmp_reg__\n"
952  "eor r10,r18\n"
953  "ldd __tmp_reg__,%D4\n"
954  "eor r11,__tmp_reg__\n"
955  "eor r11,r19\n"
956  "ldd __tmp_reg__,%A5\n"
957  "eor r12,__tmp_reg__\n"
958  "eor r12,r20\n"
959  "ldd __tmp_reg__,%B5\n"
960  "eor r13,__tmp_reg__\n"
961  "eor r13,r21\n"
962  "ldd __tmp_reg__,%C5\n"
963  "eor r14,__tmp_reg__\n"
964  "eor r14,r22\n"
965  "ldd __tmp_reg__,%D5\n"
966  "eor r15,__tmp_reg__\n"
967  "eor r15,r23\n"
968 
969  // Add the round constant.
970 #if defined(RAMPZ)
971  "ldd r24,%C3\n"
972 #endif
973  RC_SETUP("r7")
974  RC_ADD("r8")
975  RC_ADD("r9")
976  RC_ADD("r10")
977  RC_ADD("r11")
978  RC_ADD("r12")
979  RC_ADD("r13")
980  RC_ADD("r14")
981  RC_ADD("r15")
982  RC_CLEANUP("r7")
983 
984  // Transform the state using the sbox.
985  SBOX("r8")
986  SBOX("r9")
987  SBOX("r10")
988  SBOX("r11")
989  SBOX("r12")
990  SBOX("r13")
991  SBOX("r14")
992  SBOX("r15")
993 
994  // Update the tweak using the inverse h permutation.
995  // h' = [4, 5, 6, 7, 11, 1, 0, 8, 12, 13, 14, 15, 9, 10, 2, 3]
996  "push r18\n" // Save [4, 5, 6, 7, 12, 13, 14, 15] on the stack.
997  "push r19\n"
998  "push r22\n"
999  "push r23\n"
1000  "mov r23,r17\n" // TK[14/15] = TK[2/3]
1001  "mov r19,r16\n" // TK[6] = TK[0]
1002  "andi r19,0xF0\n"
1003  "mov r18,r16\n" // TK[5] = TK[1]
1004  "andi r18,0x0F\n"
1005  "swap r20\n" // TK[12] = TK[9]
1006  "mov r22,r20\n"
1007  "andi r22,0xF0\n"
1008  "andi r20,0x0F\n" // TK[7] = TK[8]
1009  "or r19,r20\n"
1010  "swap r21\n"
1011  "mov r20,r21\n" // TK[4] = TK[11]
1012  "andi r20,0xF0\n"
1013  "or r18,r20\n"
1014  "andi r21,0x0F\n" // TK[13] = TK[10]
1015  "or r22,r21\n"
1016  "pop r21\n" // Restore saved values from the stack
1017  "pop r20\n" // into [0, 1, 2, 3, 8, 9, 10, 11]
1018  "pop r17\n"
1019  "pop r16\n"
1020 
1021  // Bottom of the loop for the eight reverse rounds.
1022  "or r7,r7\n" // loop if r7 > 0
1023  "breq 4f\n"
1024  "rjmp 3b\n"
1025  "4:\n"
1026 
1027  // Restore the original RAMPZ value.
1028 #if defined(RAMPZ)
1029  "pop __tmp_reg__\n"
1030  "out %6,__tmp_reg__\n"
1031 #endif
1032 
1033  // Restore the state pointer from X into Z.
1034  "movw r30,r26\n"
1035 
1036  // XOR the final whitening key k0prime with the state,
1037  // together with k1alpha and the final tweak value.
1038  // state.lrow[0] ^= st.k0prime[0] ^ k1.lrow[0] ^ tweak.lrow[0];
1039  // state.lrow[1] ^= st.k0prime[1] ^ k1.lrow[1] ^ tweak.lrow[1];
1040  "ldd __tmp_reg__,Z+8\n" // r8 ^= k0prime[0] ^ k1[0] ^ r16
1041  "eor r8,__tmp_reg__\n"
1042  "ldd __tmp_reg__,%A4\n"
1043  "eor r8,__tmp_reg__\n"
1044  "eor r8,r16\n"
1045  "ldd __tmp_reg__,Z+9\n" // r9 ^= k0prime[1] ^ k1[1] ^ r17
1046  "eor r9,__tmp_reg__\n"
1047  "ldd __tmp_reg__,%B4\n"
1048  "eor r9,__tmp_reg__\n"
1049  "eor r9,r17\n"
1050  "ldd __tmp_reg__,Z+10\n" // r10 ^= k0prime[2] ^ k1[2] ^ r18
1051  "eor r10,__tmp_reg__\n"
1052  "ldd __tmp_reg__,%C4\n"
1053  "eor r10,__tmp_reg__\n"
1054  "eor r10,r18\n"
1055  "ldd __tmp_reg__,Z+11\n" // r11 ^= k0prime[3] ^ k1[3] ^ r19
1056  "eor r11,__tmp_reg__\n"
1057  "ldd __tmp_reg__,%D4\n"
1058  "eor r11,__tmp_reg__\n"
1059  "eor r11,r19\n"
1060  "ldd __tmp_reg__,Z+12\n" // r12 ^= k0prime[4] ^ k1[4] ^ r20
1061  "eor r12,__tmp_reg__\n"
1062  "ldd __tmp_reg__,%A5\n"
1063  "eor r12,__tmp_reg__\n"
1064  "eor r12,r20\n"
1065  "ldd __tmp_reg__,Z+13\n" // r13 ^= k0prime[5] ^ k1[5] ^ r21
1066  "eor r13,__tmp_reg__\n"
1067  "ldd __tmp_reg__,%B5\n"
1068  "eor r13,__tmp_reg__\n"
1069  "eor r13,r21\n"
1070  "ldd __tmp_reg__,Z+14\n" // r14 ^= k0prime[6] ^ k1[6] ^ r22
1071  "eor r14,__tmp_reg__\n"
1072  "ldd __tmp_reg__,%C5\n"
1073  "eor r14,__tmp_reg__\n"
1074  "eor r14,r22\n"
1075  "ldd __tmp_reg__,Z+15\n" // r15 ^= k0prime[7] ^ k1[7] ^ r23
1076  "eor r15,__tmp_reg__\n"
1077  "ldd __tmp_reg__,%D5\n"
1078  "eor r15,__tmp_reg__\n"
1079  "eor r15,r23\n"
1080 
1081  // Store r8..r15 to the output block.
1082  "ldd r26,%A2\n"
1083  "ldd r27,%B2\n"
1084  "st X+,r8\n"
1085  "st X+,r9\n"
1086  "st X+,r10\n"
1087  "st X+,r11\n"
1088  "st X+,r12\n"
1089  "st X+,r13\n"
1090  "st X+,r14\n"
1091  "st X+,r15\n"
1092  : : "x"(input), "z"(&st), "Q"(output), "Q"(sbox_addr),
1093  "Q"(k1_0), "Q"(k1_1)
1094 #if defined(RAMPZ)
1095  , "I" (_SFR_IO_ADDR(RAMPZ))
1096 #endif
1097  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
1098  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
1099  "r24", "r25", "r6", "r7", "memory"
1100  );
1101 #else // !USE_AVR_INLINE_ASM
1102  const uint32_t *r = rc[0];
1103  MantisCells_t tweak;
1104  MantisCells_t k1;
1105  MantisCells_t state;
1106  uint8_t index;
1107 
1108  // Copy the initial tweak and k1 values into local variables.
1109  tweak.lrow[0] = st.tweak[0];
1110  tweak.lrow[1] = st.tweak[1];
1111  k1.lrow[0] = st.k1[0];
1112  k1.lrow[1] = st.k1[1];
1113 
1114  // Read the input buffer and convert little-endian to host-endian.
1115  mantis_unpack_block(state.lrow, input);
1116 
1117  // XOR the initial whitening key k0 with the state,
1118  // together with k1 and the initial tweak value.
1119  state.lrow[0] ^= st.k0[0] ^ k1.lrow[0] ^ tweak.lrow[0];
1120  state.lrow[1] ^= st.k0[1] ^ k1.lrow[1] ^ tweak.lrow[1];
1121 
1122  // Perform all eight forward rounds.
1123  for (index = 8; index > 0; --index) {
1124  // Update the tweak with the forward h function.
1125  mantis_update_tweak(&tweak);
1126 
1127  // Apply the S-box.
1128  state.lrow[0] = mantis_sbox(state.lrow[0]);
1129  state.lrow[1] = mantis_sbox(state.lrow[1]);
1130 
1131  // Add the round constant.
1132  state.lrow[0] ^= r[0];
1133  state.lrow[1] ^= r[1];
1134  r += 2;
1135 
1136  // XOR with the key and tweak.
1137  state.lrow[0] ^= k1.lrow[0] ^ tweak.lrow[0];
1138  state.lrow[1] ^= k1.lrow[1] ^ tweak.lrow[1];
1139 
1140  // Shift the rows.
1141  mantis_shift_rows(&state);
1142 
1143  // Mix the columns.
1144  mantis_mix_columns(&state);
1145  }
1146 
1147  // Half-way there: sbox, mix, sbox.
1148  state.lrow[0] = mantis_sbox(state.lrow[0]);
1149  state.lrow[1] = mantis_sbox(state.lrow[1]);
1150  mantis_mix_columns(&state);
1151  state.lrow[0] = mantis_sbox(state.lrow[0]);
1152  state.lrow[1] = mantis_sbox(state.lrow[1]);
1153 
1154  // Convert k1 into k1 XOR alpha for the reverse rounds.
1155  k1.lrow[0] ^= ALPHA_ROW0;
1156  k1.lrow[1] ^= ALPHA_ROW1;
1157 
1158  // Perform all eight reverse rounds.
1159  for (index = 8; index > 0; --index) {
1160  // Inverse mix of the columns (same as the forward mix).
1161  mantis_mix_columns(&state);
1162 
1163  // Inverse shift of the rows.
1164  mantis_shift_rows_inverse(&state);
1165 
1166  /* XOR with the key and tweak */
1167  state.lrow[0] ^= k1.lrow[0] ^ tweak.lrow[0];
1168  state.lrow[1] ^= k1.lrow[1] ^ tweak.lrow[1];
1169 
1170  // Add the round constant.
1171  r -= 2;
1172  state.lrow[0] ^= r[0];
1173  state.lrow[1] ^= r[1];
1174 
1175  // Apply the inverse S-box (which is the same as the forward S-box).
1176  state.lrow[0] = mantis_sbox(state.lrow[0]);
1177  state.lrow[1] = mantis_sbox(state.lrow[1]);
1178 
1179  // Update the tweak with the reverse h function.
1180  mantis_update_tweak_inverse(&tweak);
1181  }
1182 
1183  // XOR the final whitening key k0prime with the state,
1184  // together with k1alpha and the final tweak value.
1185  state.lrow[0] ^= st.k0prime[0] ^ k1.lrow[0] ^ tweak.lrow[0];
1186  state.lrow[1] ^= st.k0prime[1] ^ k1.lrow[1] ^ tweak.lrow[1];
1187 
1188  // Convert host-endian back into little-endian in the output buffer.
1189  uint32_t x = state.lrow[0];
1190  output[0] = (uint8_t)x;
1191  output[1] = (uint8_t)(x >> 8);
1192  output[2] = (uint8_t)(x >> 16);
1193  output[3] = (uint8_t)(x >> 24);
1194  x = state.lrow[1];
1195  output[4] = (uint8_t)x;
1196  output[5] = (uint8_t)(x >> 8);
1197  output[6] = (uint8_t)(x >> 16);
1198  output[7] = (uint8_t)(x >> 24);
1199 #endif // !USE_AVR_INLINE_ASM
1200 }
1201 
1202 void Mantis8::decryptBlock(uint8_t *output, const uint8_t *input)
1203 {
1204  // Decryption is the same as encryption - need to use swapModes()
1205  // after setKey() to select decryption mode instead of encryption.
1206  encryptBlock(output, input);
1207 }
1208 
1210 {
1211  clean(st);
1212 }
void swapModes()
Swaps the encryption/decryption mode for this Mantis block cipher.
Definition: Mantis8.cpp:322
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: Mantis8.cpp:163
size_t blockSize() const
Size of a Mantis-8 block in bytes.
Definition: Mantis8.cpp:117
void decryptBlock(uint8_t *output, const uint8_t *input)
Decrypts a single block using this cipher.
Definition: Mantis8.cpp:1202
uint32_t lrow[2]
virtual ~Mantis8()
Destroys this Mantis-8 block cipher object after clearing sensitive information.
Definition: Mantis8.cpp:108
void encryptBlock(uint8_t *output, const uint8_t *input)
Encrypts a single block using this cipher.
Definition: Mantis8.cpp:608
void clear()
Clears all security-sensitive state from this block cipher.
Definition: Mantis8.cpp:1209
bool setTweak(const uint8_t *tweak, size_t len)
Sets the 64-bit tweak value for this Mantis-8 block cipher.
Definition: Mantis8.cpp:266
size_t keySize() const
Size of a Mantis-8 key in bytes.
Definition: Mantis8.cpp:126
Mantis8()
Constructs a new Mantis-8 tweakable block cipher instance.
Definition: Mantis8.cpp:100
Union that describes a 64-bit 4x4 array of cells.
Definition: mantis-cipher.h:97
uint16_t row[4]
Definition: mantis-cipher.h:99