Skinny-C
 All Data Structures Files Functions Variables Groups Pages
skinny128-ctr-vec256.c
1 /*
2  * Copyright (C) 2017 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "skinny128-cipher.h"
24 #include "skinny128-ctr-internal.h"
25 #include "skinny-internal.h"
26 #include <stdlib.h>
27 
28 #if SKINNY_VEC256_MATH
29 
30 /* This implementation encrypts eight blocks at a time */
31 #define SKINNY128_CTR_BLOCK_SIZE (SKINNY128_BLOCK_SIZE * 8)
32 
34 typedef struct
35 {
38 
40  SkinnyVector8x32_t counter[4];
41 
43  unsigned char ecounter[SKINNY128_CTR_BLOCK_SIZE];
44 
46  unsigned offset;
47 
49  void *base_ptr;
50 
51 } Skinny128CTRVec256Ctx_t;
52 
53 static int skinny128_ctr_vec256_init(Skinny128CTR_t *ctr)
54 {
55  Skinny128CTRVec256Ctx_t *ctx;
56  void *base_ptr;
57  if ((ctx = skinny_calloc(sizeof(Skinny128CTRVec256Ctx_t), &base_ptr)) == NULL)
58  return 0;
59  ctx->base_ptr = base_ptr;
60  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
61  ctr->ctx = ctx;
62  return 1;
63 }
64 
65 static void skinny128_ctr_vec256_cleanup(Skinny128CTR_t *ctr)
66 {
67  if (ctr->ctx) {
68  Skinny128CTRVec256Ctx_t *ctx = ctr->ctx;
69  void *base_ptr = ctx->base_ptr;
70  skinny_cleanse(ctx, sizeof(Skinny128CTRVec256Ctx_t));
71  free(base_ptr);
72  ctr->ctx = 0;
73  }
74 }
75 
76 static int skinny128_ctr_vec256_set_key
77  (Skinny128CTR_t *ctr, const void *key, unsigned size)
78 {
79  Skinny128CTRVec256Ctx_t *ctx;
80 
81  /* Validate the parameters */
82  if (!key)
83  return 0;
84  ctx = ctr->ctx;
85  if (!ctx)
86  return 0;
87 
88  /* Populate the underlying key schedule */
89  if (!skinny128_set_key(&(ctx->kt.ks), key, size))
90  return 0;
91 
92  /* Reset the keystream */
93  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
94  return 1;
95 }
96 
97 static int skinny128_ctr_vec256_set_tweaked_key
98  (Skinny128CTR_t *ctr, const void *key, unsigned key_size)
99 {
100  Skinny128CTRVec256Ctx_t *ctx;
101 
102  /* Validate the parameters */
103  if (!key)
104  return 0;
105  ctx = ctr->ctx;
106  if (!ctx)
107  return 0;
108 
109  /* Populate the underlying key schedule */
110  if (!skinny128_set_tweaked_key(&(ctx->kt), key, key_size))
111  return 0;
112 
113  /* Reset the keystream */
114  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
115  return 1;
116 }
117 
118 static int skinny128_ctr_vec256_set_tweak
119  (Skinny128CTR_t *ctr, const void *tweak, unsigned tweak_size)
120 {
121  Skinny128CTRVec256Ctx_t *ctx;
122 
123  /* Validate the parameters */
124  ctx = ctr->ctx;
125  if (!ctx)
126  return 0;
127 
128  /* Populate the underlying tweak */
129  if (!skinny128_set_tweak(&(ctx->kt), tweak, tweak_size))
130  return 0;
131 
132  /* Reset the keystream */
133  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
134  return 1;
135 }
136 
137 /* Increment a specific column in an array of row vectors */
138 STATIC_INLINE void skinny128_ctr_increment
139  (SkinnyVector8x32_t *counter, unsigned column, unsigned inc)
140 {
141  uint8_t *ctr = ((uint8_t *)counter) + column * 4;
142  uint8_t *ptr;
143  unsigned index;
144  for (index = 16; index > 0; ) {
145  --index;
146  ptr = ctr + (index & 0x0C) * 8;
147 #if SKINNY_LITTLE_ENDIAN
148  ptr += index & 0x03;
149 #else
150  ptr += 3 - (index & 0x03);
151 #endif
152  inc += ptr[0];
153  ptr[0] = (uint8_t)inc;
154  inc >>= 8;
155  }
156 }
157 
158 static int skinny128_ctr_vec256_set_counter
159  (Skinny128CTR_t *ctr, const void *counter, unsigned size)
160 {
161  Skinny128CTRVec256Ctx_t *ctx;
162  unsigned char block[SKINNY128_BLOCK_SIZE];
163 
164  /* Validate the parameters */
165  if (size > SKINNY128_BLOCK_SIZE)
166  return 0;
167  ctx = ctr->ctx;
168  if (!ctx)
169  return 0;
170 
171  /* Set the counter and reset the keystream to a block boundary */
172  if (counter) {
173  memset(block, 0, SKINNY128_BLOCK_SIZE - size);
174  memcpy(block + SKINNY128_BLOCK_SIZE - size, counter, size);
175  } else {
176  memset(block, 0, SKINNY128_BLOCK_SIZE);
177  }
178  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
179 
180  /* Load the counter block and convert into row vectors */
181  ctx->counter[0] = skinny_to_vec8x32(READ_WORD32(block, 0));
182  ctx->counter[1] = skinny_to_vec8x32(READ_WORD32(block, 4));
183  ctx->counter[2] = skinny_to_vec8x32(READ_WORD32(block, 8));
184  ctx->counter[3] = skinny_to_vec8x32(READ_WORD32(block, 12));
185 
186  /* Increment the second through seventh columns of each row vector */
187  skinny128_ctr_increment(ctx->counter, 1, 1);
188  skinny128_ctr_increment(ctx->counter, 2, 2);
189  skinny128_ctr_increment(ctx->counter, 3, 3);
190  skinny128_ctr_increment(ctx->counter, 4, 4);
191  skinny128_ctr_increment(ctx->counter, 5, 5);
192  skinny128_ctr_increment(ctx->counter, 6, 6);
193  skinny128_ctr_increment(ctx->counter, 7, 7);
194 
195  /* Clean up and exit */
196  skinny_cleanse(block, sizeof(block));
197  return 1;
198 }
199 
200 STATIC_INLINE SkinnyVector8x32_t skinny128_rotate_right
201  (SkinnyVector8x32_t x, unsigned count)
202 {
203  /* Note: we are rotating the cells right, which actually moves
204  the values up closer to the MSB. That is, we do a left shift
205  on the word to rotate the cells in the word right */
206  return (x << count) | (x >> (32 - count));
207 }
208 
209 /* This function evaluates the S-box on four 256-bit vectors in parallel
210  by interleaving the operations. This tends to make better use of YMM
211  registers on x86-64 CPU's that have AVX2 support or better as the CPU
212  can schedule unrelated operations to operate in parallel. */
213 STATIC_INLINE void skinny128_sbox_four
214  (SkinnyVector8x32_t *u, SkinnyVector8x32_t *v,
215  SkinnyVector8x32_t *s, SkinnyVector8x32_t *t)
216 {
217  SkinnyVector8x32_t x1 = *u;
218  SkinnyVector8x32_t y1;
219  SkinnyVector8x32_t x2 = *v;
220  SkinnyVector8x32_t y2;
221  SkinnyVector8x32_t x3 = *s;
222  SkinnyVector8x32_t y3;
223  SkinnyVector8x32_t x4 = *t;
224  SkinnyVector8x32_t y4;
225 
226  x1 ^= ((~((x1 >> 2) | (x1 >> 3))) & 0x11111111U);
227  x2 ^= ((~((x2 >> 2) | (x2 >> 3))) & 0x11111111U);
228  x3 ^= ((~((x3 >> 2) | (x3 >> 3))) & 0x11111111U);
229  x4 ^= ((~((x4 >> 2) | (x4 >> 3))) & 0x11111111U);
230 
231  y1 = ((~((x1 << 5) | (x1 << 1))) & 0x20202020U);
232  y2 = ((~((x2 << 5) | (x2 << 1))) & 0x20202020U);
233  y3 = ((~((x3 << 5) | (x3 << 1))) & 0x20202020U);
234  y4 = ((~((x4 << 5) | (x4 << 1))) & 0x20202020U);
235 
236  x1 ^= ((~((x1 << 5) | (x1 << 4))) & 0x40404040U) ^ y1;
237  x2 ^= ((~((x2 << 5) | (x2 << 4))) & 0x40404040U) ^ y2;
238  x3 ^= ((~((x3 << 5) | (x3 << 4))) & 0x40404040U) ^ y3;
239  x4 ^= ((~((x4 << 5) | (x4 << 4))) & 0x40404040U) ^ y4;
240 
241  y1 = ((~((x1 << 2) | (x1 << 1))) & 0x80808080U);
242  y2 = ((~((x2 << 2) | (x2 << 1))) & 0x80808080U);
243  y3 = ((~((x3 << 2) | (x3 << 1))) & 0x80808080U);
244  y4 = ((~((x4 << 2) | (x4 << 1))) & 0x80808080U);
245 
246  x1 ^= ((~((x1 >> 2) | (x1 << 1))) & 0x02020202U) ^ y1;
247  x2 ^= ((~((x2 >> 2) | (x2 << 1))) & 0x02020202U) ^ y2;
248  x3 ^= ((~((x3 >> 2) | (x3 << 1))) & 0x02020202U) ^ y3;
249  x4 ^= ((~((x4 >> 2) | (x4 << 1))) & 0x02020202U) ^ y4;
250 
251  y1 = ((~((x1 >> 5) | (x1 << 1))) & 0x04040404U);
252  y2 = ((~((x2 >> 5) | (x2 << 1))) & 0x04040404U);
253  y3 = ((~((x3 >> 5) | (x3 << 1))) & 0x04040404U);
254  y4 = ((~((x4 >> 5) | (x4 << 1))) & 0x04040404U);
255 
256  x1 ^= ((~((x1 >> 1) | (x1 >> 2))) & 0x08080808U) ^ y1;
257  x2 ^= ((~((x2 >> 1) | (x2 >> 2))) & 0x08080808U) ^ y2;
258  x3 ^= ((~((x3 >> 1) | (x3 >> 2))) & 0x08080808U) ^ y3;
259  x4 ^= ((~((x4 >> 1) | (x4 >> 2))) & 0x08080808U) ^ y4;
260 
261  *u = ((x1 & 0x08080808U) << 1) |
262  ((x1 & 0x32323232U) << 2) |
263  ((x1 & 0x01010101U) << 5) |
264  ((x1 & 0x80808080U) >> 6) |
265  ((x1 & 0x40404040U) >> 4) |
266  ((x1 & 0x04040404U) >> 2);
267 
268  *v = ((x2 & 0x08080808U) << 1) |
269  ((x2 & 0x32323232U) << 2) |
270  ((x2 & 0x01010101U) << 5) |
271  ((x2 & 0x80808080U) >> 6) |
272  ((x2 & 0x40404040U) >> 4) |
273  ((x2 & 0x04040404U) >> 2);
274 
275  *s = ((x3 & 0x08080808U) << 1) |
276  ((x3 & 0x32323232U) << 2) |
277  ((x3 & 0x01010101U) << 5) |
278  ((x3 & 0x80808080U) >> 6) |
279  ((x3 & 0x40404040U) >> 4) |
280  ((x3 & 0x04040404U) >> 2);
281 
282  *t = ((x4 & 0x08080808U) << 1) |
283  ((x4 & 0x32323232U) << 2) |
284  ((x4 & 0x01010101U) << 5) |
285  ((x4 & 0x80808080U) >> 6) |
286  ((x4 & 0x40404040U) >> 4) |
287  ((x4 & 0x04040404U) >> 2);
288 }
289 
290 static void skinny128_ecb_encrypt_eight
291  (void *output, const SkinnyVector8x32_t *input, const Skinny128Key_t *ks)
292 {
293  SkinnyVector8x32_t row0;
294  SkinnyVector8x32_t row1;
295  SkinnyVector8x32_t row2;
296  SkinnyVector8x32_t row3;
297  const Skinny128HalfCells_t *schedule;
298  unsigned index;
299  SkinnyVector8x32_t temp;
300 
301  /* Read the rows of all eight counter blocks into memory */
302  row0 = input[0];
303  row1 = input[1];
304  row2 = input[2];
305  row3 = input[3];
306 
307  /* Perform all encryption rounds on the eight blocks in parallel */
308  schedule = ks->schedule;
309  for (index = ks->rounds; index > 0; --index, ++schedule) {
310  /* Apply the S-box to all bytes in the state */
311  skinny128_sbox_four(&row0, &row1, &row2, &row3);
312 
313  /* Apply the subkey for this round */
314  row0 ^= schedule->row[0];
315  row1 ^= schedule->row[1];
316  row2 ^= 0x02;
317 
318  /* Shift the rows */
319  row1 = skinny128_rotate_right(row1, 8);
320  row2 = skinny128_rotate_right(row2, 16);
321  row3 = skinny128_rotate_right(row3, 24);
322 
323  /* Mix the columns */
324  row1 ^= row2;
325  row2 ^= row0;
326  temp = row3 ^ row2;
327  row3 = row2;
328  row2 = row1;
329  row1 = row0;
330  row0 = temp;
331  }
332 
333  /* Write the rows of all eight blocks back to memory */
334 #if SKINNY_LITTLE_ENDIAN && SKINNY_UNALIGNED
335  *((SkinnyVector8x32U_t *)output) =
336  (SkinnyVector8x32_t){row0[0], row1[0], row2[0], row3[0],
337  row0[1], row1[1], row2[1], row3[1]};
338  *((SkinnyVector8x32U_t *)(output + 32)) =
339  (SkinnyVector8x32_t){row0[2], row1[2], row2[2], row3[2],
340  row0[3], row1[3], row2[3], row3[3]};
341  *((SkinnyVector8x32U_t *)(output + 64)) =
342  (SkinnyVector8x32_t){row0[4], row1[4], row2[4], row3[4],
343  row0[5], row1[5], row2[5], row3[5]};
344  *((SkinnyVector8x32U_t *)(output + 96)) =
345  (SkinnyVector8x32_t){row0[6], row1[6], row2[6], row3[6],
346  row0[7], row1[7], row2[7], row3[7]};
347 #else
348  WRITE_WORD32(output, 0, row0[0]);
349  WRITE_WORD32(output, 4, row1[0]);
350  WRITE_WORD32(output, 8, row2[0]);
351  WRITE_WORD32(output, 12, row3[0]);
352  WRITE_WORD32(output, 16, row0[1]);
353  WRITE_WORD32(output, 20, row1[1]);
354  WRITE_WORD32(output, 24, row2[1]);
355  WRITE_WORD32(output, 28, row3[1]);
356  WRITE_WORD32(output, 32, row0[2]);
357  WRITE_WORD32(output, 36, row1[2]);
358  WRITE_WORD32(output, 40, row2[2]);
359  WRITE_WORD32(output, 44, row3[2]);
360  WRITE_WORD32(output, 48, row0[3]);
361  WRITE_WORD32(output, 52, row1[3]);
362  WRITE_WORD32(output, 56, row2[3]);
363  WRITE_WORD32(output, 60, row3[3]);
364  WRITE_WORD32(output, 64, row0[4]);
365  WRITE_WORD32(output, 68, row1[4]);
366  WRITE_WORD32(output, 72, row2[4]);
367  WRITE_WORD32(output, 76, row3[4]);
368  WRITE_WORD32(output, 80, row0[5]);
369  WRITE_WORD32(output, 84, row1[5]);
370  WRITE_WORD32(output, 88, row2[5]);
371  WRITE_WORD32(output, 92, row3[5]);
372  WRITE_WORD32(output, 96, row0[6]);
373  WRITE_WORD32(output, 100, row1[6]);
374  WRITE_WORD32(output, 104, row2[6]);
375  WRITE_WORD32(output, 108, row3[6]);
376  WRITE_WORD32(output, 112, row0[7]);
377  WRITE_WORD32(output, 116, row1[7]);
378  WRITE_WORD32(output, 120, row2[7]);
379  WRITE_WORD32(output, 124, row3[7]);
380 #endif
381 }
382 
383 static int skinny128_ctr_vec256_encrypt
384  (void *output, const void *input, size_t size, Skinny128CTR_t *ctr)
385 {
386  Skinny128CTRVec256Ctx_t *ctx;
387  uint8_t *out = (uint8_t *)output;
388  const uint8_t *in = (const uint8_t *)input;
389 
390  /* Validate the parameters */
391  if (!output || !input)
392  return 0;
393  ctx = ctr->ctx;
394  if (!ctx)
395  return 0;
396 
397  /* Encrypt the input in CTR mode to create the output */
398  while (size > 0) {
399  if (ctx->offset >= SKINNY128_CTR_BLOCK_SIZE) {
400  /* We need a new keystream block */
401  skinny128_ecb_encrypt_eight
402  (ctx->ecounter, ctx->counter, &(ctx->kt.ks));
403  skinny128_ctr_increment(ctx->counter, 0, 8);
404  skinny128_ctr_increment(ctx->counter, 1, 8);
405  skinny128_ctr_increment(ctx->counter, 2, 8);
406  skinny128_ctr_increment(ctx->counter, 3, 8);
407  skinny128_ctr_increment(ctx->counter, 4, 8);
408  skinny128_ctr_increment(ctx->counter, 5, 8);
409  skinny128_ctr_increment(ctx->counter, 6, 8);
410  skinny128_ctr_increment(ctx->counter, 7, 8);
411 
412  /* XOR an entire keystream block in one go if possible */
413  if (size >= SKINNY128_CTR_BLOCK_SIZE) {
414  skinny128_xor(out, in, ctx->ecounter);
415  skinny128_xor(out + SKINNY128_BLOCK_SIZE,
417  ctx->ecounter + SKINNY128_BLOCK_SIZE);
418  skinny128_xor(out + SKINNY128_BLOCK_SIZE * 2,
419  in + SKINNY128_BLOCK_SIZE * 2,
420  ctx->ecounter + SKINNY128_BLOCK_SIZE * 2);
421  skinny128_xor(out + SKINNY128_BLOCK_SIZE * 3,
422  in + SKINNY128_BLOCK_SIZE * 3,
423  ctx->ecounter + SKINNY128_BLOCK_SIZE * 3);
424  skinny128_xor(out + SKINNY128_BLOCK_SIZE * 4,
425  in + SKINNY128_BLOCK_SIZE * 4,
426  ctx->ecounter + SKINNY128_BLOCK_SIZE * 4);
427  skinny128_xor(out + SKINNY128_BLOCK_SIZE * 5,
428  in + SKINNY128_BLOCK_SIZE * 5,
429  ctx->ecounter + SKINNY128_BLOCK_SIZE * 5);
430  skinny128_xor(out + SKINNY128_BLOCK_SIZE * 6,
431  in + SKINNY128_BLOCK_SIZE * 6,
432  ctx->ecounter + SKINNY128_BLOCK_SIZE * 6);
433  skinny128_xor(out + SKINNY128_BLOCK_SIZE * 7,
434  in + SKINNY128_BLOCK_SIZE * 7,
435  ctx->ecounter + SKINNY128_BLOCK_SIZE * 7);
436  out += SKINNY128_CTR_BLOCK_SIZE;
437  in += SKINNY128_CTR_BLOCK_SIZE;
438  size -= SKINNY128_CTR_BLOCK_SIZE;
439  } else {
440  /* Last partial block in the request */
441  skinny_xor(out, in, ctx->ecounter, size);
442  ctx->offset = size;
443  break;
444  }
445  } else {
446  /* Left-over keystream data from the last request */
447  size_t temp = SKINNY128_CTR_BLOCK_SIZE - ctx->offset;
448  if (temp > size)
449  temp = size;
450  skinny_xor(out, in, ctx->ecounter + ctx->offset, temp);
451  ctx->offset += temp;
452  out += temp;
453  in += temp;
454  size -= temp;
455  }
456  }
457  return 1;
458 }
459 
461 Skinny128CTRVtable_t const _skinny128_ctr_vec256 = {
462  skinny128_ctr_vec256_init,
463  skinny128_ctr_vec256_cleanup,
464  skinny128_ctr_vec256_set_key,
465  skinny128_ctr_vec256_set_tweaked_key,
466  skinny128_ctr_vec256_set_tweak,
467  skinny128_ctr_vec256_set_counter,
468  skinny128_ctr_vec256_encrypt
469 };
470 
471 #else /* !SKINNY_VEC256_MATH */
472 
473 /* Stubbed out */
474 Skinny128CTRVtable_t const _skinny128_ctr_vec256;
475 
476 #endif /* !SKINNY_VEC256_MATH */
Union that describes a 64-bit 2x4 array of cells.
Skinny128HalfCells_t schedule[SKINNY128_MAX_ROUNDS]
State information for Skinny-128 in CTR mode.
Key schedule for Skinny128 block ciphers when a tweak is in use.
int skinny128_set_tweaked_key(Skinny128TweakedKey_t *ks, const void *key, unsigned key_size)
Sets the key schedule for a Skinny128 block cipher, and prepare for tweaked encryption.
Key schedule for Skinny128 block ciphers.
int skinny128_set_key(Skinny128Key_t *ks, const void *key, unsigned size)
Sets the key schedule for a Skinny128 block cipher.
#define SKINNY128_BLOCK_SIZE
Size of a block for Skinny128 block ciphers.
int skinny128_set_tweak(Skinny128TweakedKey_t *ks, const void *tweak, unsigned tweak_size)
Changes the tweak value for a previously-initialized key schedule.