23 #include "mantis-parallel.h"
24 #include "skinny-internal.h"
26 #if SKINNY_VEC128_MATH
33 SkinnyVector8x16_t row[4];
35 } MantisVectorCells_t;
37 STATIC_INLINE SkinnyVector8x16_t mantis_sbox(SkinnyVector8x16_t d)
49 SkinnyVector8x16_t a = (d >> 3);
50 SkinnyVector8x16_t b = (d >> 2);
51 SkinnyVector8x16_t c = (d >> 1);
52 SkinnyVector8x16_t not_a = ~a;
53 SkinnyVector8x16_t ab = not_a | (~b);
54 SkinnyVector8x16_t ad = not_a & (~d);
55 SkinnyVector8x16_t aout = (((~c) & ab) | ad);
56 SkinnyVector8x16_t bout = ad | (b & c) | (a & c & d);
57 SkinnyVector8x16_t cout = (b & d) | ((b | d) & not_a);
58 SkinnyVector8x16_t dout = (a | b | c) & ab & (c | d);
59 return ((aout & 0x1111U) << 3) | ((bout & 0x1111U) << 2) |
60 ((cout & 0x1111U) << 1) | (dout & 0x1111U);
63 STATIC_INLINE
void mantis_update_tweak(MantisVectorCells_t *tweak)
66 SkinnyVector8x16_t row1 = tweak->row[1];
67 SkinnyVector8x16_t row3 = tweak->row[3];
68 tweak->row[1] = tweak->row[0];
69 tweak->row[3] = tweak->row[2];
70 tweak->row[0] = ((row1 >> 8) & 0x00F0U) |
73 tweak->row[2] = ((row1 << 4) & 0x0F00U) |
74 ((row1 >> 4) & 0x00F0U) |
75 ((row3 >> 4) & 0x000FU) |
76 ((row3 << 12) & 0xF000U);
79 STATIC_INLINE
void mantis_update_tweak_inverse(MantisVectorCells_t *tweak)
82 SkinnyVector8x16_t row0 = tweak->row[0];
83 SkinnyVector8x16_t row2 = tweak->row[2];
84 tweak->row[0] = tweak->row[1];
85 tweak->row[2] = tweak->row[3];
86 tweak->row[1] = ((row2 >> 4) & 0x00F0U) |
87 ((row2 << 4) & 0x0F00U) |
89 ((row0 << 8) & 0xF000U);
90 tweak->row[3] = (row0 & 0xFF00U) |
91 ((row2 << 4) & 0x00F0U) |
92 ((row2 >> 12) & 0x000FU);
95 STATIC_INLINE
void mantis_shift_rows(MantisVectorCells_t *state)
98 SkinnyVector8x16_t row0 = state->row[0];
99 SkinnyVector8x16_t row1 = state->row[1];
100 SkinnyVector8x16_t row2 = state->row[2];
101 SkinnyVector8x16_t row3 = state->row[3];
102 state->row[0] = (row0 & 0x00F0U) |
104 ((row2 >> 8) & 0x000FU) |
105 ((row3 << 8) & 0x0F00U);
106 state->row[1] = (row0 & 0x000FU) |
108 ((row2 >> 8) & 0x00F0U) |
109 ((row3 << 8) & 0xF000U);
110 state->row[2] = ((row0 << 4) & 0xF000U) |
111 ((row1 << 4) & 0x00F0U) |
112 ((row2 << 4) & 0x0F00U) |
113 ((row3 >> 12) & 0x000FU);
114 state->row[3] = ((row0 >> 4) & 0x0F00U) |
115 ((row1 >> 4) & 0x000FU) |
116 ((row2 << 12) & 0xF000U) |
117 ((row3 >> 4) & 0x00F0U);
120 STATIC_INLINE
void mantis_shift_rows_inverse(MantisVectorCells_t *state)
123 SkinnyVector8x16_t row0 = state->row[0];
124 SkinnyVector8x16_t row1 = state->row[1];
125 SkinnyVector8x16_t row2 = state->row[2];
126 SkinnyVector8x16_t row3 = state->row[3];
127 state->row[0] = (row0 & 0x00F0U) |
129 ((row2 >> 4) & 0x0F00U) |
130 ((row3 << 4) & 0xF000U);
131 state->row[1] = (row0 & 0xF000U) |
133 ((row2 >> 4) & 0x000FU) |
134 ((row3 << 4) & 0x00F0U);
135 state->row[2] = ((row0 << 8) & 0x0F00U) |
136 ((row1 << 8) & 0xF000U) |
137 ((row2 >> 4) & 0x00F0U) |
138 ((row3 >> 12) & 0x000FU);
139 state->row[3] = ((row0 >> 8) & 0x000FU) |
140 ((row1 >> 8) & 0x00F0U) |
141 ((row2 << 12) & 0xF000U) |
142 ((row3 << 4) & 0x0F00U);
145 STATIC_INLINE
void mantis_mix_columns(MantisVectorCells_t *state)
147 SkinnyVector8x16_t t0 = state->row[0];
148 SkinnyVector8x16_t t1 = state->row[1];
149 SkinnyVector8x16_t t2 = state->row[2];
150 SkinnyVector8x16_t t3 = state->row[3];
151 state->row[0] = t1 ^ t2 ^ t3;
152 state->row[1] = t0 ^ t2 ^ t3;
153 state->row[2] = t0 ^ t1 ^ t3;
154 state->row[3] = t0 ^ t1 ^ t2;
158 #define RC_EXTRACT_ROW(x,shift) \
159 (((((uint16_t)((x) >> ((shift) + 8))) & 0xFF)) | \
160 ((((uint16_t)((x) >> ((shift)))) & 0xFF) << 8))
164 {RC_EXTRACT_ROW((x), 48), RC_EXTRACT_ROW((x), 32), \
165 RC_EXTRACT_ROW((x), 16), RC_EXTRACT_ROW((x), 0)}
168 #define ALPHA 0x243F6A8885A308D3ULL
169 #define ALPHA_ROW0 (RC_EXTRACT_ROW(ALPHA, 48))
170 #define ALPHA_ROW1 (RC_EXTRACT_ROW(ALPHA, 32))
171 #define ALPHA_ROW2 (RC_EXTRACT_ROW(ALPHA, 16))
172 #define ALPHA_ROW3 (RC_EXTRACT_ROW(ALPHA, 0))
176 RC(0x13198A2E03707344ULL),
177 RC(0xA4093822299F31D0ULL),
178 RC(0x082EFA98EC4E6C89ULL),
179 RC(0x452821E638D01377ULL),
180 RC(0xBE5466CF34E90C6CULL),
181 RC(0xC0AC29B7C97C50DDULL),
182 RC(0x3F84D5B5B5470917ULL),
183 RC(0x9216D5D98979FB1BULL)
186 void _mantis_parallel_crypt_vec128
187 (
void *output,
const void *input,
const void *tweak,
const MantisKey_t *ks)
189 const uint16_t *r = rc[0];
190 MantisVectorCells_t tk;
192 MantisVectorCells_t state;
196 state.
row[0] = (SkinnyVector8x16_t)
197 {READ_WORD16(input, 0), READ_WORD16(input, 8),
198 READ_WORD16(input, 16), READ_WORD16(input, 24),
199 READ_WORD16(input, 32), READ_WORD16(input, 40),
200 READ_WORD16(input, 48), READ_WORD16(input, 56)};
201 state.row[1] = (SkinnyVector8x16_t)
202 {READ_WORD16(input, 2), READ_WORD16(input, 10),
203 READ_WORD16(input, 18), READ_WORD16(input, 26),
204 READ_WORD16(input, 34), READ_WORD16(input, 42),
205 READ_WORD16(input, 50), READ_WORD16(input, 58)};
206 state.row[2] = (SkinnyVector8x16_t)
207 {READ_WORD16(input, 4), READ_WORD16(input, 12),
208 READ_WORD16(input, 20), READ_WORD16(input, 28),
209 READ_WORD16(input, 36), READ_WORD16(input, 44),
210 READ_WORD16(input, 52), READ_WORD16(input, 60)};
211 state.row[3] = (SkinnyVector8x16_t)
212 {READ_WORD16(input, 6), READ_WORD16(input, 14),
213 READ_WORD16(input, 22), READ_WORD16(input, 30),
214 READ_WORD16(input, 38), READ_WORD16(input, 46),
215 READ_WORD16(input, 54), READ_WORD16(input, 62)};
218 tk.row[0] = (SkinnyVector8x16_t)
219 {READ_WORD16(tweak, 0), READ_WORD16(tweak, 8),
220 READ_WORD16(tweak, 16), READ_WORD16(tweak, 24),
221 READ_WORD16(tweak, 32), READ_WORD16(tweak, 40),
222 READ_WORD16(tweak, 48), READ_WORD16(tweak, 56)};
223 tk.row[1] = (SkinnyVector8x16_t)
224 {READ_WORD16(tweak, 2), READ_WORD16(tweak, 10),
225 READ_WORD16(tweak, 18), READ_WORD16(tweak, 26),
226 READ_WORD16(tweak, 34), READ_WORD16(tweak, 42),
227 READ_WORD16(tweak, 50), READ_WORD16(tweak, 58)};
228 tk.row[2] = (SkinnyVector8x16_t)
229 {READ_WORD16(tweak, 4), READ_WORD16(tweak, 12),
230 READ_WORD16(tweak, 20), READ_WORD16(tweak, 28),
231 READ_WORD16(tweak, 36), READ_WORD16(tweak, 44),
232 READ_WORD16(tweak, 52), READ_WORD16(tweak, 60)};
233 tk.row[3] = (SkinnyVector8x16_t)
234 {READ_WORD16(tweak, 6), READ_WORD16(tweak, 14),
235 READ_WORD16(tweak, 22), READ_WORD16(tweak, 30),
236 READ_WORD16(tweak, 38), READ_WORD16(tweak, 46),
237 READ_WORD16(tweak, 54), READ_WORD16(tweak, 62)};
241 state.row[0] ^= ks->
k0.
row[0] ^ k1.
row[0];
242 state.row[0] ^= tk.row[0];
243 state.row[1] ^= ks->
k0.
row[1] ^ k1.
row[1];
244 state.row[1] ^= tk.row[1];
245 state.row[2] ^= ks->
k0.
row[2] ^ k1.
row[2];
246 state.row[2] ^= tk.row[2];
247 state.row[3] ^= ks->
k0.
row[3] ^ k1.
row[3];
248 state.row[3] ^= tk.row[3];
251 for (index = ks->
rounds; index > 0; --index) {
253 mantis_update_tweak(&tk);
256 state.row[0] = mantis_sbox(state.row[0]);
257 state.row[1] = mantis_sbox(state.row[1]);
258 state.row[2] = mantis_sbox(state.row[2]);
259 state.row[3] = mantis_sbox(state.row[3]);
262 state.row[0] ^= r[0];
263 state.row[1] ^= r[1];
264 state.row[2] ^= r[2];
265 state.row[3] ^= r[3];
269 state.row[0] ^= k1.
row[0] ^ tk.row[0];
270 state.row[1] ^= k1.
row[1] ^ tk.row[1];
271 state.row[2] ^= k1.
row[2] ^ tk.row[2];
272 state.row[3] ^= k1.
row[3] ^ tk.row[3];
275 mantis_shift_rows(&state);
278 mantis_mix_columns(&state);
282 state.row[0] = mantis_sbox(state.row[0]);
283 state.row[1] = mantis_sbox(state.row[1]);
284 state.row[2] = mantis_sbox(state.row[2]);
285 state.row[3] = mantis_sbox(state.row[3]);
286 mantis_mix_columns(&state);
287 state.row[0] = mantis_sbox(state.row[0]);
288 state.row[1] = mantis_sbox(state.row[1]);
289 state.row[2] = mantis_sbox(state.row[2]);
290 state.row[3] = mantis_sbox(state.row[3]);
293 k1.
row[0] ^= ALPHA_ROW0;
294 k1.
row[1] ^= ALPHA_ROW1;
295 k1.
row[2] ^= ALPHA_ROW2;
296 k1.
row[3] ^= ALPHA_ROW3;
299 for (index = ks->
rounds; index > 0; --index) {
301 mantis_mix_columns(&state);
304 mantis_shift_rows_inverse(&state);
307 state.row[0] ^= k1.
row[0] ^ tk.row[0];
308 state.row[1] ^= k1.
row[1] ^ tk.row[1];
309 state.row[2] ^= k1.
row[2] ^ tk.row[2];
310 state.row[3] ^= k1.
row[3] ^ tk.row[3];
314 state.row[0] ^= r[0];
315 state.row[1] ^= r[1];
316 state.row[2] ^= r[2];
317 state.row[3] ^= r[3];
320 state.row[0] = mantis_sbox(state.row[0]);
321 state.row[1] = mantis_sbox(state.row[1]);
322 state.row[2] = mantis_sbox(state.row[2]);
323 state.row[3] = mantis_sbox(state.row[3]);
326 mantis_update_tweak_inverse(&tk);
332 state.row[0] ^= tk.row[0];
334 state.row[1] ^= tk.row[1];
336 state.row[2] ^= tk.row[2];
338 state.row[3] ^= tk.row[3];
341 WRITE_WORD16(output, 0, state.row[0][0]);
342 WRITE_WORD16(output, 2, state.row[1][0]);
343 WRITE_WORD16(output, 4, state.row[2][0]);
344 WRITE_WORD16(output, 6, state.row[3][0]);
345 WRITE_WORD16(output, 8, state.row[0][1]);
346 WRITE_WORD16(output, 10, state.row[1][1]);
347 WRITE_WORD16(output, 12, state.row[2][1]);
348 WRITE_WORD16(output, 14, state.row[3][1]);
349 WRITE_WORD16(output, 16, state.row[0][2]);
350 WRITE_WORD16(output, 18, state.row[1][2]);
351 WRITE_WORD16(output, 20, state.row[2][2]);
352 WRITE_WORD16(output, 22, state.row[3][2]);
353 WRITE_WORD16(output, 24, state.row[0][3]);
354 WRITE_WORD16(output, 26, state.row[1][3]);
355 WRITE_WORD16(output, 28, state.row[2][3]);
356 WRITE_WORD16(output, 30, state.row[3][3]);
357 WRITE_WORD16(output, 32, state.row[0][4]);
358 WRITE_WORD16(output, 34, state.row[1][4]);
359 WRITE_WORD16(output, 36, state.row[2][4]);
360 WRITE_WORD16(output, 38, state.row[3][4]);
361 WRITE_WORD16(output, 40, state.row[0][5]);
362 WRITE_WORD16(output, 42, state.row[1][5]);
363 WRITE_WORD16(output, 44, state.row[2][5]);
364 WRITE_WORD16(output, 46, state.row[3][5]);
365 WRITE_WORD16(output, 48, state.row[0][6]);
366 WRITE_WORD16(output, 50, state.row[1][6]);
367 WRITE_WORD16(output, 52, state.row[2][6]);
368 WRITE_WORD16(output, 54, state.row[3][6]);
369 WRITE_WORD16(output, 56, state.row[0][7]);
370 WRITE_WORD16(output, 58, state.row[1][7]);
371 WRITE_WORD16(output, 60, state.row[2][7]);
372 WRITE_WORD16(output, 62, state.row[3][7]);
378 void _mantis_parallel_crypt_vec128
379 (
void *output,
const void *input,
const void *tweak,
const MantisKey_t *ks)
#define MANTIS_MAX_ROUNDS
Maximum number of rounds for Mantis block ciphers.
Key schedule for Mantis block ciphers.
Union that describes a 64-bit 4x4 array of cells.