Skinny-C
 All Data Structures Files Functions Variables Groups Pages
mantis-parallel-vec128.c
1 /*
2  * Copyright (C) 2017 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "mantis-parallel.h"
24 #include "skinny-internal.h"
25 
26 #if SKINNY_VEC128_MATH
27 
31 typedef union
32 {
33  SkinnyVector8x16_t row[4];
34 
35 } MantisVectorCells_t;
36 
37 STATIC_INLINE SkinnyVector8x16_t mantis_sbox(SkinnyVector8x16_t d)
38 {
39  /*
40  * MIDORI Sb0 from section 4.2 of https://eprint.iacr.org/2015/1142.pdf
41  *
42  * {a, b, c, d} -> {aout, bout, cout, dout} where a/aout is the MSB.
43  *
44  * aout = NAND(NAND(~c, NAND(a, b)), (a | d))
45  * bout = NAND(NOR(NOR(a, d), (b & c)), NAND((a & c), d))
46  * cout = NAND(NAND(b, d), (NOR(b, d) | a))
47  * dout = NOR(NOR(a, (b | c)), NAND(NAND(a, b), (c | d)))
48  */
49  SkinnyVector8x16_t a = (d >> 3);
50  SkinnyVector8x16_t b = (d >> 2);
51  SkinnyVector8x16_t c = (d >> 1);
52  SkinnyVector8x16_t not_a = ~a;
53  SkinnyVector8x16_t ab = not_a | (~b);
54  SkinnyVector8x16_t ad = not_a & (~d);
55  SkinnyVector8x16_t aout = (((~c) & ab) | ad);
56  SkinnyVector8x16_t bout = ad | (b & c) | (a & c & d);
57  SkinnyVector8x16_t cout = (b & d) | ((b | d) & not_a);
58  SkinnyVector8x16_t dout = (a | b | c) & ab & (c | d);
59  return ((aout & 0x1111U) << 3) | ((bout & 0x1111U) << 2) |
60  ((cout & 0x1111U) << 1) | (dout & 0x1111U);
61 }
62 
63 STATIC_INLINE void mantis_update_tweak(MantisVectorCells_t *tweak)
64 {
65  /* h = [6, 5, 14, 15, 0, 1, 2, 3, 7, 12, 13, 4, 8, 9, 10, 11] */
66  SkinnyVector8x16_t row1 = tweak->row[1];
67  SkinnyVector8x16_t row3 = tweak->row[3];
68  tweak->row[1] = tweak->row[0];
69  tweak->row[3] = tweak->row[2];
70  tweak->row[0] = ((row1 >> 8) & 0x00F0U) |
71  (row1 & 0x000FU) |
72  (row3 & 0xFF00U);
73  tweak->row[2] = ((row1 << 4) & 0x0F00U) |
74  ((row1 >> 4) & 0x00F0U) |
75  ((row3 >> 4) & 0x000FU) |
76  ((row3 << 12) & 0xF000U);
77 }
78 
79 STATIC_INLINE void mantis_update_tweak_inverse(MantisVectorCells_t *tweak)
80 {
81  /* h' = [4, 5, 6, 7, 11, 1, 0, 8, 12, 13, 14, 15, 9, 10, 2, 3] */
82  SkinnyVector8x16_t row0 = tweak->row[0];
83  SkinnyVector8x16_t row2 = tweak->row[2];
84  tweak->row[0] = tweak->row[1];
85  tweak->row[2] = tweak->row[3];
86  tweak->row[1] = ((row2 >> 4) & 0x00F0U) |
87  ((row2 << 4) & 0x0F00U) |
88  (row0 & 0x000FU) |
89  ((row0 << 8) & 0xF000U);
90  tweak->row[3] = (row0 & 0xFF00U) |
91  ((row2 << 4) & 0x00F0U) |
92  ((row2 >> 12) & 0x000FU);
93 }
94 
95 STATIC_INLINE void mantis_shift_rows(MantisVectorCells_t *state)
96 {
97  /* P = [0, 11, 6, 13, 10, 1, 12, 7, 5, 14, 3, 8, 15, 4, 9, 2] */
98  SkinnyVector8x16_t row0 = state->row[0];
99  SkinnyVector8x16_t row1 = state->row[1];
100  SkinnyVector8x16_t row2 = state->row[2];
101  SkinnyVector8x16_t row3 = state->row[3];
102  state->row[0] = (row0 & 0x00F0U) |
103  (row1 & 0xF000U) |
104  ((row2 >> 8) & 0x000FU) |
105  ((row3 << 8) & 0x0F00U);
106  state->row[1] = (row0 & 0x000FU) |
107  (row1 & 0x0F00U) |
108  ((row2 >> 8) & 0x00F0U) |
109  ((row3 << 8) & 0xF000U);
110  state->row[2] = ((row0 << 4) & 0xF000U) |
111  ((row1 << 4) & 0x00F0U) |
112  ((row2 << 4) & 0x0F00U) |
113  ((row3 >> 12) & 0x000FU);
114  state->row[3] = ((row0 >> 4) & 0x0F00U) |
115  ((row1 >> 4) & 0x000FU) |
116  ((row2 << 12) & 0xF000U) |
117  ((row3 >> 4) & 0x00F0U);
118 }
119 
120 STATIC_INLINE void mantis_shift_rows_inverse(MantisVectorCells_t *state)
121 {
122  /* P' = [0, 5, 15, 10, 13, 8, 2, 7, 11, 14, 4, 1, 6, 3, 9, 12] */
123  SkinnyVector8x16_t row0 = state->row[0];
124  SkinnyVector8x16_t row1 = state->row[1];
125  SkinnyVector8x16_t row2 = state->row[2];
126  SkinnyVector8x16_t row3 = state->row[3];
127  state->row[0] = (row0 & 0x00F0U) |
128  (row1 & 0x000FU) |
129  ((row2 >> 4) & 0x0F00U) |
130  ((row3 << 4) & 0xF000U);
131  state->row[1] = (row0 & 0xF000U) |
132  (row1 & 0x0F00U) |
133  ((row2 >> 4) & 0x000FU) |
134  ((row3 << 4) & 0x00F0U);
135  state->row[2] = ((row0 << 8) & 0x0F00U) |
136  ((row1 << 8) & 0xF000U) |
137  ((row2 >> 4) & 0x00F0U) |
138  ((row3 >> 12) & 0x000FU);
139  state->row[3] = ((row0 >> 8) & 0x000FU) |
140  ((row1 >> 8) & 0x00F0U) |
141  ((row2 << 12) & 0xF000U) |
142  ((row3 << 4) & 0x0F00U);
143 }
144 
145 STATIC_INLINE void mantis_mix_columns(MantisVectorCells_t *state)
146 {
147  SkinnyVector8x16_t t0 = state->row[0];
148  SkinnyVector8x16_t t1 = state->row[1];
149  SkinnyVector8x16_t t2 = state->row[2];
150  SkinnyVector8x16_t t3 = state->row[3];
151  state->row[0] = t1 ^ t2 ^ t3;
152  state->row[1] = t0 ^ t2 ^ t3;
153  state->row[2] = t0 ^ t1 ^ t3;
154  state->row[3] = t0 ^ t1 ^ t2;
155 }
156 
157 /* Extract the 16 bits for a row from a 64-bit round constant */
158 #define RC_EXTRACT_ROW(x,shift) \
159  (((((uint16_t)((x) >> ((shift) + 8))) & 0xFF)) | \
160  ((((uint16_t)((x) >> ((shift)))) & 0xFF) << 8))
161 
162 /* Extract the rows from a 64-bit round constant */
163 #define RC(x) \
164  {RC_EXTRACT_ROW((x), 48), RC_EXTRACT_ROW((x), 32), \
165  RC_EXTRACT_ROW((x), 16), RC_EXTRACT_ROW((x), 0)}
166 
167 /* Alpha constant for adjusting k1 for the inverse rounds */
168 #define ALPHA 0x243F6A8885A308D3ULL
169 #define ALPHA_ROW0 (RC_EXTRACT_ROW(ALPHA, 48))
170 #define ALPHA_ROW1 (RC_EXTRACT_ROW(ALPHA, 32))
171 #define ALPHA_ROW2 (RC_EXTRACT_ROW(ALPHA, 16))
172 #define ALPHA_ROW3 (RC_EXTRACT_ROW(ALPHA, 0))
173 
174 /* Round constants for Mantis, split up into 16-bit row values */
175 static uint16_t const rc[MANTIS_MAX_ROUNDS][4] = {
176  RC(0x13198A2E03707344ULL),
177  RC(0xA4093822299F31D0ULL),
178  RC(0x082EFA98EC4E6C89ULL),
179  RC(0x452821E638D01377ULL),
180  RC(0xBE5466CF34E90C6CULL),
181  RC(0xC0AC29B7C97C50DDULL),
182  RC(0x3F84D5B5B5470917ULL),
183  RC(0x9216D5D98979FB1BULL)
184 };
185 
186 void _mantis_parallel_crypt_vec128
187  (void *output, const void *input, const void *tweak, const MantisKey_t *ks)
188 {
189  const uint16_t *r = rc[0];
190  MantisVectorCells_t tk;
191  MantisCells_t k1 = ks->k1;
192  MantisVectorCells_t state;
193  unsigned index;
194 
195  /* Read the rows of all eight blocks into memory */
196  state.row[0] = (SkinnyVector8x16_t)
197  {READ_WORD16(input, 0), READ_WORD16(input, 8),
198  READ_WORD16(input, 16), READ_WORD16(input, 24),
199  READ_WORD16(input, 32), READ_WORD16(input, 40),
200  READ_WORD16(input, 48), READ_WORD16(input, 56)};
201  state.row[1] = (SkinnyVector8x16_t)
202  {READ_WORD16(input, 2), READ_WORD16(input, 10),
203  READ_WORD16(input, 18), READ_WORD16(input, 26),
204  READ_WORD16(input, 34), READ_WORD16(input, 42),
205  READ_WORD16(input, 50), READ_WORD16(input, 58)};
206  state.row[2] = (SkinnyVector8x16_t)
207  {READ_WORD16(input, 4), READ_WORD16(input, 12),
208  READ_WORD16(input, 20), READ_WORD16(input, 28),
209  READ_WORD16(input, 36), READ_WORD16(input, 44),
210  READ_WORD16(input, 52), READ_WORD16(input, 60)};
211  state.row[3] = (SkinnyVector8x16_t)
212  {READ_WORD16(input, 6), READ_WORD16(input, 14),
213  READ_WORD16(input, 22), READ_WORD16(input, 30),
214  READ_WORD16(input, 38), READ_WORD16(input, 46),
215  READ_WORD16(input, 54), READ_WORD16(input, 62)};
216 
217  /* Read the eight tweak values into memory */
218  tk.row[0] = (SkinnyVector8x16_t)
219  {READ_WORD16(tweak, 0), READ_WORD16(tweak, 8),
220  READ_WORD16(tweak, 16), READ_WORD16(tweak, 24),
221  READ_WORD16(tweak, 32), READ_WORD16(tweak, 40),
222  READ_WORD16(tweak, 48), READ_WORD16(tweak, 56)};
223  tk.row[1] = (SkinnyVector8x16_t)
224  {READ_WORD16(tweak, 2), READ_WORD16(tweak, 10),
225  READ_WORD16(tweak, 18), READ_WORD16(tweak, 26),
226  READ_WORD16(tweak, 34), READ_WORD16(tweak, 42),
227  READ_WORD16(tweak, 50), READ_WORD16(tweak, 58)};
228  tk.row[2] = (SkinnyVector8x16_t)
229  {READ_WORD16(tweak, 4), READ_WORD16(tweak, 12),
230  READ_WORD16(tweak, 20), READ_WORD16(tweak, 28),
231  READ_WORD16(tweak, 36), READ_WORD16(tweak, 44),
232  READ_WORD16(tweak, 52), READ_WORD16(tweak, 60)};
233  tk.row[3] = (SkinnyVector8x16_t)
234  {READ_WORD16(tweak, 6), READ_WORD16(tweak, 14),
235  READ_WORD16(tweak, 22), READ_WORD16(tweak, 30),
236  READ_WORD16(tweak, 38), READ_WORD16(tweak, 46),
237  READ_WORD16(tweak, 54), READ_WORD16(tweak, 62)};
238 
239  /* XOR the initial whitening key k0 with the state,
240  together with k1 and the initial tweak value */
241  state.row[0] ^= ks->k0.row[0] ^ k1.row[0];
242  state.row[0] ^= tk.row[0];
243  state.row[1] ^= ks->k0.row[1] ^ k1.row[1];
244  state.row[1] ^= tk.row[1];
245  state.row[2] ^= ks->k0.row[2] ^ k1.row[2];
246  state.row[2] ^= tk.row[2];
247  state.row[3] ^= ks->k0.row[3] ^ k1.row[3];
248  state.row[3] ^= tk.row[3];
249 
250  /* Perform all forward rounds */
251  for (index = ks->rounds; index > 0; --index) {
252  /* Update the tweak with the forward h function */
253  mantis_update_tweak(&tk);
254 
255  /* Apply the S-box */
256  state.row[0] = mantis_sbox(state.row[0]);
257  state.row[1] = mantis_sbox(state.row[1]);
258  state.row[2] = mantis_sbox(state.row[2]);
259  state.row[3] = mantis_sbox(state.row[3]);
260 
261  /* Add the round constant */
262  state.row[0] ^= r[0];
263  state.row[1] ^= r[1];
264  state.row[2] ^= r[2];
265  state.row[3] ^= r[3];
266  r += 4;
267 
268  /* XOR with the key and tweak */
269  state.row[0] ^= k1.row[0] ^ tk.row[0];
270  state.row[1] ^= k1.row[1] ^ tk.row[1];
271  state.row[2] ^= k1.row[2] ^ tk.row[2];
272  state.row[3] ^= k1.row[3] ^ tk.row[3];
273 
274  /* Shift the rows */
275  mantis_shift_rows(&state);
276 
277  /* Mix the columns */
278  mantis_mix_columns(&state);
279  }
280 
281  /* Half-way there: sbox, mix, sbox */
282  state.row[0] = mantis_sbox(state.row[0]);
283  state.row[1] = mantis_sbox(state.row[1]);
284  state.row[2] = mantis_sbox(state.row[2]);
285  state.row[3] = mantis_sbox(state.row[3]);
286  mantis_mix_columns(&state);
287  state.row[0] = mantis_sbox(state.row[0]);
288  state.row[1] = mantis_sbox(state.row[1]);
289  state.row[2] = mantis_sbox(state.row[2]);
290  state.row[3] = mantis_sbox(state.row[3]);
291 
292  /* Convert k1 into k1 XOR alpha for the reverse rounds */
293  k1.row[0] ^= ALPHA_ROW0;
294  k1.row[1] ^= ALPHA_ROW1;
295  k1.row[2] ^= ALPHA_ROW2;
296  k1.row[3] ^= ALPHA_ROW3;
297 
298  /* Perform all reverse rounds */
299  for (index = ks->rounds; index > 0; --index) {
300  /* Inverse mix of the columns (same as the forward mix) */
301  mantis_mix_columns(&state);
302 
303  /* Inverse shift of the rows */
304  mantis_shift_rows_inverse(&state);
305 
306  /* XOR with the key and tweak */
307  state.row[0] ^= k1.row[0] ^ tk.row[0];
308  state.row[1] ^= k1.row[1] ^ tk.row[1];
309  state.row[2] ^= k1.row[2] ^ tk.row[2];
310  state.row[3] ^= k1.row[3] ^ tk.row[3];
311 
312  /* Add the round constant */
313  r -= 4;
314  state.row[0] ^= r[0];
315  state.row[1] ^= r[1];
316  state.row[2] ^= r[2];
317  state.row[3] ^= r[3];
318 
319  /* Apply the inverse S-box (which is the same as the forward S-box) */
320  state.row[0] = mantis_sbox(state.row[0]);
321  state.row[1] = mantis_sbox(state.row[1]);
322  state.row[2] = mantis_sbox(state.row[2]);
323  state.row[3] = mantis_sbox(state.row[3]);
324 
325  /* Update the tweak with the reverse h function */
326  mantis_update_tweak_inverse(&tk);
327  }
328 
329  /* XOR the final whitening key k0prime with the state,
330  together with k1alpha and the final tweak value */
331  state.row[0] ^= ks->k0prime.row[0] ^ k1.row[0];
332  state.row[0] ^= tk.row[0];
333  state.row[1] ^= ks->k0prime.row[1] ^ k1.row[1];
334  state.row[1] ^= tk.row[1];
335  state.row[2] ^= ks->k0prime.row[2] ^ k1.row[2];
336  state.row[2] ^= tk.row[2];
337  state.row[3] ^= ks->k0prime.row[3] ^ k1.row[3];
338  state.row[3] ^= tk.row[3];
339 
340  /* Write the rows of all eight blocks back to memory */
341  WRITE_WORD16(output, 0, state.row[0][0]);
342  WRITE_WORD16(output, 2, state.row[1][0]);
343  WRITE_WORD16(output, 4, state.row[2][0]);
344  WRITE_WORD16(output, 6, state.row[3][0]);
345  WRITE_WORD16(output, 8, state.row[0][1]);
346  WRITE_WORD16(output, 10, state.row[1][1]);
347  WRITE_WORD16(output, 12, state.row[2][1]);
348  WRITE_WORD16(output, 14, state.row[3][1]);
349  WRITE_WORD16(output, 16, state.row[0][2]);
350  WRITE_WORD16(output, 18, state.row[1][2]);
351  WRITE_WORD16(output, 20, state.row[2][2]);
352  WRITE_WORD16(output, 22, state.row[3][2]);
353  WRITE_WORD16(output, 24, state.row[0][3]);
354  WRITE_WORD16(output, 26, state.row[1][3]);
355  WRITE_WORD16(output, 28, state.row[2][3]);
356  WRITE_WORD16(output, 30, state.row[3][3]);
357  WRITE_WORD16(output, 32, state.row[0][4]);
358  WRITE_WORD16(output, 34, state.row[1][4]);
359  WRITE_WORD16(output, 36, state.row[2][4]);
360  WRITE_WORD16(output, 38, state.row[3][4]);
361  WRITE_WORD16(output, 40, state.row[0][5]);
362  WRITE_WORD16(output, 42, state.row[1][5]);
363  WRITE_WORD16(output, 44, state.row[2][5]);
364  WRITE_WORD16(output, 46, state.row[3][5]);
365  WRITE_WORD16(output, 48, state.row[0][6]);
366  WRITE_WORD16(output, 50, state.row[1][6]);
367  WRITE_WORD16(output, 52, state.row[2][6]);
368  WRITE_WORD16(output, 54, state.row[3][6]);
369  WRITE_WORD16(output, 56, state.row[0][7]);
370  WRITE_WORD16(output, 58, state.row[1][7]);
371  WRITE_WORD16(output, 60, state.row[2][7]);
372  WRITE_WORD16(output, 62, state.row[3][7]);
373 }
374 
375 #else /* !SKINNY_VEC128_MATH */
376 
377 /* Stubbed out */
378 void _mantis_parallel_crypt_vec128
379  (void *output, const void *input, const void *tweak, const MantisKey_t *ks)
380 {
381  (void)output;
382  (void)input;
383  (void)tweak;
384  (void)ks;
385 }
386 
387 #endif /* SKINNY_VEC128_MATH */
MantisCells_t k1
#define MANTIS_MAX_ROUNDS
Maximum number of rounds for Mantis block ciphers.
Definition: mantis-cipher.h:82
unsigned rounds
MantisCells_t k0prime
Key schedule for Mantis block ciphers.
MantisCells_t k0
Union that describes a 64-bit 4x4 array of cells.
Definition: mantis-cipher.h:97
uint16_t row[4]
Definition: mantis-cipher.h:99