Skinny-C
 All Data Structures Files Functions Variables Groups Pages
skinny128-ctr-vec128.c
1 /*
2  * Copyright (C) 2017 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "skinny128-cipher.h"
24 #include "skinny128-ctr-internal.h"
25 #include "skinny-internal.h"
26 #include <stdlib.h>
27 
28 #if SKINNY_VEC128_MATH
29 
30 /* This implementation encrypts four blocks at a time */
31 #define SKINNY128_CTR_BLOCK_SIZE (SKINNY128_BLOCK_SIZE * 4)
32 
34 typedef struct
35 {
38 
40  SkinnyVector4x32_t counter[4];
41 
43  unsigned char ecounter[SKINNY128_CTR_BLOCK_SIZE];
44 
46  unsigned offset;
47 
49  void *base_ptr;
50 
51 } Skinny128CTRVec128Ctx_t;
52 
53 static int skinny128_ctr_vec128_init(Skinny128CTR_t *ctr)
54 {
55  Skinny128CTRVec128Ctx_t *ctx;
56  void *base_ptr;
57  if ((ctx = skinny_calloc(sizeof(Skinny128CTRVec128Ctx_t), &base_ptr)) == NULL)
58  return 0;
59  ctx->base_ptr = base_ptr;
60  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
61  ctr->ctx = ctx;
62  return 1;
63 }
64 
65 static void skinny128_ctr_vec128_cleanup(Skinny128CTR_t *ctr)
66 {
67  if (ctr->ctx) {
68  Skinny128CTRVec128Ctx_t *ctx = ctr->ctx;
69  void *base_ptr = ctx->base_ptr;
70  skinny_cleanse(ctx, sizeof(Skinny128CTRVec128Ctx_t));
71  free(base_ptr);
72  ctr->ctx = 0;
73  }
74 }
75 
76 static int skinny128_ctr_vec128_set_key
77  (Skinny128CTR_t *ctr, const void *key, unsigned size)
78 {
79  Skinny128CTRVec128Ctx_t *ctx;
80 
81  /* Validate the parameters */
82  if (!key)
83  return 0;
84  ctx = ctr->ctx;
85  if (!ctx)
86  return 0;
87 
88  /* Populate the underlying key schedule */
89  if (!skinny128_set_key(&(ctx->kt.ks), key, size))
90  return 0;
91 
92  /* Reset the keystream */
93  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
94  return 1;
95 }
96 
97 static int skinny128_ctr_vec128_set_tweaked_key
98  (Skinny128CTR_t *ctr, const void *key, unsigned key_size)
99 {
100  Skinny128CTRVec128Ctx_t *ctx;
101 
102  /* Validate the parameters */
103  if (!key)
104  return 0;
105  ctx = ctr->ctx;
106  if (!ctx)
107  return 0;
108 
109  /* Populate the underlying key schedule */
110  if (!skinny128_set_tweaked_key(&(ctx->kt), key, key_size))
111  return 0;
112 
113  /* Reset the keystream */
114  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
115  return 1;
116 }
117 
118 static int skinny128_ctr_vec128_set_tweak
119  (Skinny128CTR_t *ctr, const void *tweak, unsigned tweak_size)
120 {
121  Skinny128CTRVec128Ctx_t *ctx;
122 
123  /* Validate the parameters */
124  ctx = ctr->ctx;
125  if (!ctx)
126  return 0;
127 
128  /* Populate the underlying tweak */
129  if (!skinny128_set_tweak(&(ctx->kt), tweak, tweak_size))
130  return 0;
131 
132  /* Reset the keystream */
133  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
134  return 1;
135 }
136 
137 /* Increment a specific column in an array of row vectors */
138 STATIC_INLINE void skinny128_ctr_increment
139  (SkinnyVector4x32_t *counter, unsigned column, unsigned inc)
140 {
141  uint8_t *ctr = ((uint8_t *)counter) + column * 4;
142  uint8_t *ptr;
143  unsigned index;
144  for (index = 16; index > 0; ) {
145  --index;
146  ptr = ctr + (index & 0x0C) * 4;
147 #if SKINNY_LITTLE_ENDIAN
148  ptr += index & 0x03;
149 #else
150  ptr += 3 - (index & 0x03);
151 #endif
152  inc += ptr[0];
153  ptr[0] = (uint8_t)inc;
154  inc >>= 8;
155  }
156 }
157 
158 static int skinny128_ctr_vec128_set_counter
159  (Skinny128CTR_t *ctr, const void *counter, unsigned size)
160 {
161  Skinny128CTRVec128Ctx_t *ctx;
162  unsigned char block[SKINNY128_BLOCK_SIZE];
163 
164  /* Validate the parameters */
165  if (size > SKINNY128_BLOCK_SIZE)
166  return 0;
167  ctx = ctr->ctx;
168  if (!ctx)
169  return 0;
170 
171  /* Set the counter and reset the keystream to a block boundary */
172  if (counter) {
173  memset(block, 0, SKINNY128_BLOCK_SIZE - size);
174  memcpy(block + SKINNY128_BLOCK_SIZE - size, counter, size);
175  } else {
176  memset(block, 0, SKINNY128_BLOCK_SIZE);
177  }
178  ctx->offset = SKINNY128_CTR_BLOCK_SIZE;
179 
180  /* Load the counter block and convert into row vectors */
181  ctx->counter[0] = skinny_to_vec4x32(READ_WORD32(block, 0));
182  ctx->counter[1] = skinny_to_vec4x32(READ_WORD32(block, 4));
183  ctx->counter[2] = skinny_to_vec4x32(READ_WORD32(block, 8));
184  ctx->counter[3] = skinny_to_vec4x32(READ_WORD32(block, 12));
185 
186  /* Increment the second, third, and fourth columns of each row vector */
187  skinny128_ctr_increment(ctx->counter, 1, 1);
188  skinny128_ctr_increment(ctx->counter, 2, 2);
189  skinny128_ctr_increment(ctx->counter, 3, 3);
190 
191  /* Clean up and exit */
192  skinny_cleanse(block, sizeof(block));
193  return 1;
194 }
195 
196 STATIC_INLINE SkinnyVector4x32_t skinny128_rotate_right
197  (SkinnyVector4x32_t x, unsigned count)
198 {
199  /* Note: we are rotating the cells right, which actually moves
200  the values up closer to the MSB. That is, we do a left shift
201  on the word to rotate the cells in the word right */
202  return (x << count) | (x >> (32 - count));
203 }
204 
205 #if SKINNY_64BIT
206 
207 /* This function evaluates the S-box on four 128-bit vectors in parallel
208  by interleaving the operations. This tends to make better use of XMM
209  registers on x86-64 CPU's that have SSE2 support or better as the CPU
210  can schedule unrelated operations to operate in parallel. */
211 STATIC_INLINE void skinny128_sbox_four
212  (SkinnyVector4x32_t *u, SkinnyVector4x32_t *v,
213  SkinnyVector4x32_t *s, SkinnyVector4x32_t *t)
214 {
215  SkinnyVector4x32_t x1 = *u;
216  SkinnyVector4x32_t y1;
217  SkinnyVector4x32_t x2 = *v;
218  SkinnyVector4x32_t y2;
219  SkinnyVector4x32_t x3 = *s;
220  SkinnyVector4x32_t y3;
221  SkinnyVector4x32_t x4 = *t;
222  SkinnyVector4x32_t y4;
223 
224  x1 ^= ((~((x1 >> 2) | (x1 >> 3))) & 0x11111111U);
225  x2 ^= ((~((x2 >> 2) | (x2 >> 3))) & 0x11111111U);
226  x3 ^= ((~((x3 >> 2) | (x3 >> 3))) & 0x11111111U);
227  x4 ^= ((~((x4 >> 2) | (x4 >> 3))) & 0x11111111U);
228 
229  y1 = ((~((x1 << 5) | (x1 << 1))) & 0x20202020U);
230  y2 = ((~((x2 << 5) | (x2 << 1))) & 0x20202020U);
231  y3 = ((~((x3 << 5) | (x3 << 1))) & 0x20202020U);
232  y4 = ((~((x4 << 5) | (x4 << 1))) & 0x20202020U);
233 
234  x1 ^= ((~((x1 << 5) | (x1 << 4))) & 0x40404040U) ^ y1;
235  x2 ^= ((~((x2 << 5) | (x2 << 4))) & 0x40404040U) ^ y2;
236  x3 ^= ((~((x3 << 5) | (x3 << 4))) & 0x40404040U) ^ y3;
237  x4 ^= ((~((x4 << 5) | (x4 << 4))) & 0x40404040U) ^ y4;
238 
239  y1 = ((~((x1 << 2) | (x1 << 1))) & 0x80808080U);
240  y2 = ((~((x2 << 2) | (x2 << 1))) & 0x80808080U);
241  y3 = ((~((x3 << 2) | (x3 << 1))) & 0x80808080U);
242  y4 = ((~((x4 << 2) | (x4 << 1))) & 0x80808080U);
243 
244  x1 ^= ((~((x1 >> 2) | (x1 << 1))) & 0x02020202U) ^ y1;
245  x2 ^= ((~((x2 >> 2) | (x2 << 1))) & 0x02020202U) ^ y2;
246  x3 ^= ((~((x3 >> 2) | (x3 << 1))) & 0x02020202U) ^ y3;
247  x4 ^= ((~((x4 >> 2) | (x4 << 1))) & 0x02020202U) ^ y4;
248 
249  y1 = ((~((x1 >> 5) | (x1 << 1))) & 0x04040404U);
250  y2 = ((~((x2 >> 5) | (x2 << 1))) & 0x04040404U);
251  y3 = ((~((x3 >> 5) | (x3 << 1))) & 0x04040404U);
252  y4 = ((~((x4 >> 5) | (x4 << 1))) & 0x04040404U);
253 
254  x1 ^= ((~((x1 >> 1) | (x1 >> 2))) & 0x08080808U) ^ y1;
255  x2 ^= ((~((x2 >> 1) | (x2 >> 2))) & 0x08080808U) ^ y2;
256  x3 ^= ((~((x3 >> 1) | (x3 >> 2))) & 0x08080808U) ^ y3;
257  x4 ^= ((~((x4 >> 1) | (x4 >> 2))) & 0x08080808U) ^ y4;
258 
259  *u = ((x1 & 0x08080808U) << 1) |
260  ((x1 & 0x32323232U) << 2) |
261  ((x1 & 0x01010101U) << 5) |
262  ((x1 & 0x80808080U) >> 6) |
263  ((x1 & 0x40404040U) >> 4) |
264  ((x1 & 0x04040404U) >> 2);
265 
266  *v = ((x2 & 0x08080808U) << 1) |
267  ((x2 & 0x32323232U) << 2) |
268  ((x2 & 0x01010101U) << 5) |
269  ((x2 & 0x80808080U) >> 6) |
270  ((x2 & 0x40404040U) >> 4) |
271  ((x2 & 0x04040404U) >> 2);
272 
273  *s = ((x3 & 0x08080808U) << 1) |
274  ((x3 & 0x32323232U) << 2) |
275  ((x3 & 0x01010101U) << 5) |
276  ((x3 & 0x80808080U) >> 6) |
277  ((x3 & 0x40404040U) >> 4) |
278  ((x3 & 0x04040404U) >> 2);
279 
280  *t = ((x4 & 0x08080808U) << 1) |
281  ((x4 & 0x32323232U) << 2) |
282  ((x4 & 0x01010101U) << 5) |
283  ((x4 & 0x80808080U) >> 6) |
284  ((x4 & 0x40404040U) >> 4) |
285  ((x4 & 0x04040404U) >> 2);
286 }
287 
288 #else
289 
290 /* 32-bit x86 CPU's have eight 128-bit registers instead of the
291  16 registers on x86-64 CPU's. Since we need some intermediate
292  temporary values below, we perform the operations two at a time
293  instead of four at a time. This alleviates register pressure. */
294 STATIC_INLINE void skinny128_sbox_two
295  (SkinnyVector4x32_t *u, SkinnyVector4x32_t *v)
296 {
297  SkinnyVector4x32_t x1 = *u;
298  SkinnyVector4x32_t y1;
299  SkinnyVector4x32_t x2 = *v;
300  SkinnyVector4x32_t y2;
301 
302  x1 ^= ((~((x1 >> 2) | (x1 >> 3))) & 0x11111111U);
303  x2 ^= ((~((x2 >> 2) | (x2 >> 3))) & 0x11111111U);
304 
305  y1 = ((~((x1 << 5) | (x1 << 1))) & 0x20202020U);
306  y2 = ((~((x2 << 5) | (x2 << 1))) & 0x20202020U);
307 
308  x1 ^= ((~((x1 << 5) | (x1 << 4))) & 0x40404040U) ^ y1;
309  x2 ^= ((~((x2 << 5) | (x2 << 4))) & 0x40404040U) ^ y2;
310 
311  y1 = ((~((x1 << 2) | (x1 << 1))) & 0x80808080U);
312  y2 = ((~((x2 << 2) | (x2 << 1))) & 0x80808080U);
313 
314  x1 ^= ((~((x1 >> 2) | (x1 << 1))) & 0x02020202U) ^ y1;
315  x2 ^= ((~((x2 >> 2) | (x2 << 1))) & 0x02020202U) ^ y2;
316 
317  y1 = ((~((x1 >> 5) | (x1 << 1))) & 0x04040404U);
318  y2 = ((~((x2 >> 5) | (x2 << 1))) & 0x04040404U);
319 
320  x1 ^= ((~((x1 >> 1) | (x1 >> 2))) & 0x08080808U) ^ y1;
321  x2 ^= ((~((x2 >> 1) | (x2 >> 2))) & 0x08080808U) ^ y2;
322 
323  *u = ((x1 & 0x08080808U) << 1) |
324  ((x1 & 0x32323232U) << 2) |
325  ((x1 & 0x01010101U) << 5) |
326  ((x1 & 0x80808080U) >> 6) |
327  ((x1 & 0x40404040U) >> 4) |
328  ((x1 & 0x04040404U) >> 2);
329 
330  *v = ((x2 & 0x08080808U) << 1) |
331  ((x2 & 0x32323232U) << 2) |
332  ((x2 & 0x01010101U) << 5) |
333  ((x2 & 0x80808080U) >> 6) |
334  ((x2 & 0x40404040U) >> 4) |
335  ((x2 & 0x04040404U) >> 2);
336 }
337 
338 #endif
339 
340 static void skinny128_ecb_encrypt_four
341  (void *output, const SkinnyVector4x32_t *input, const Skinny128Key_t *ks)
342 {
343  SkinnyVector4x32_t row0;
344  SkinnyVector4x32_t row1;
345  SkinnyVector4x32_t row2;
346  SkinnyVector4x32_t row3;
347  const Skinny128HalfCells_t *schedule;
348  unsigned index;
349  SkinnyVector4x32_t temp;
350 
351  /* Read the rows of all four counter blocks into memory */
352  row0 = input[0];
353  row1 = input[1];
354  row2 = input[2];
355  row3 = input[3];
356 
357  /* Perform all encryption rounds on the four blocks in parallel */
358  schedule = ks->schedule;
359  for (index = ks->rounds; index > 0; --index, ++schedule) {
360  /* Apply the S-box to all bytes in the state */
361 #if SKINNY_64BIT
362  skinny128_sbox_four(&row0, &row1, &row2, &row3);
363 #else
364  skinny128_sbox_two(&row0, &row1);
365  skinny128_sbox_two(&row2, &row3);
366 #endif
367 
368  /* Apply the subkey for this round */
369  row0 ^= schedule->row[0];
370  row1 ^= schedule->row[1];
371  row2 ^= 0x02;
372 
373  /* Shift the rows */
374  row1 = skinny128_rotate_right(row1, 8);
375  row2 = skinny128_rotate_right(row2, 16);
376  row3 = skinny128_rotate_right(row3, 24);
377 
378  /* Mix the columns */
379  row1 ^= row2;
380  row2 ^= row0;
381  temp = row3 ^ row2;
382  row3 = row2;
383  row2 = row1;
384  row1 = row0;
385  row0 = temp;
386  }
387 
388  /* Write the rows of all four blocks back to memory */
389 #if SKINNY_LITTLE_ENDIAN && SKINNY_UNALIGNED
390  *((SkinnyVector4x32U_t *)output) =
391  (SkinnyVector4x32_t){row0[0], row1[0], row2[0], row3[0]};
392  *((SkinnyVector4x32U_t *)(output + 16)) =
393  (SkinnyVector4x32_t){row0[1], row1[1], row2[1], row3[1]};
394  *((SkinnyVector4x32U_t *)(output + 32)) =
395  (SkinnyVector4x32_t){row0[2], row1[2], row2[2], row3[2]};
396  *((SkinnyVector4x32U_t *)(output + 48)) =
397  (SkinnyVector4x32_t){row0[3], row1[3], row2[3], row3[3]};
398 #else
399  WRITE_WORD32(output, 0, row0[0]);
400  WRITE_WORD32(output, 4, row1[0]);
401  WRITE_WORD32(output, 8, row2[0]);
402  WRITE_WORD32(output, 12, row3[0]);
403  WRITE_WORD32(output, 16, row0[1]);
404  WRITE_WORD32(output, 20, row1[1]);
405  WRITE_WORD32(output, 24, row2[1]);
406  WRITE_WORD32(output, 28, row3[1]);
407  WRITE_WORD32(output, 32, row0[2]);
408  WRITE_WORD32(output, 36, row1[2]);
409  WRITE_WORD32(output, 40, row2[2]);
410  WRITE_WORD32(output, 44, row3[2]);
411  WRITE_WORD32(output, 48, row0[3]);
412  WRITE_WORD32(output, 52, row1[3]);
413  WRITE_WORD32(output, 56, row2[3]);
414  WRITE_WORD32(output, 60, row3[3]);
415 #endif
416 }
417 
418 static int skinny128_ctr_vec128_encrypt
419  (void *output, const void *input, size_t size, Skinny128CTR_t *ctr)
420 {
421  Skinny128CTRVec128Ctx_t *ctx;
422  uint8_t *out = (uint8_t *)output;
423  const uint8_t *in = (const uint8_t *)input;
424 
425  /* Validate the parameters */
426  if (!output || !input)
427  return 0;
428  ctx = ctr->ctx;
429  if (!ctx)
430  return 0;
431 
432  /* Encrypt the input in CTR mode to create the output */
433  while (size > 0) {
434  if (ctx->offset >= SKINNY128_CTR_BLOCK_SIZE) {
435  /* We need a new keystream block */
436  skinny128_ecb_encrypt_four
437  (ctx->ecounter, ctx->counter, &(ctx->kt.ks));
438  skinny128_ctr_increment(ctx->counter, 0, 4);
439  skinny128_ctr_increment(ctx->counter, 1, 4);
440  skinny128_ctr_increment(ctx->counter, 2, 4);
441  skinny128_ctr_increment(ctx->counter, 3, 4);
442 
443  /* XOR an entire keystream block in one go if possible */
444  if (size >= SKINNY128_CTR_BLOCK_SIZE) {
445  skinny128_xor(out, in, ctx->ecounter);
446  skinny128_xor(out + SKINNY128_BLOCK_SIZE,
448  ctx->ecounter + SKINNY128_BLOCK_SIZE);
449  skinny128_xor(out + SKINNY128_BLOCK_SIZE * 2,
450  in + SKINNY128_BLOCK_SIZE * 2,
451  ctx->ecounter + SKINNY128_BLOCK_SIZE * 2);
452  skinny128_xor(out + SKINNY128_BLOCK_SIZE * 3,
453  in + SKINNY128_BLOCK_SIZE * 3,
454  ctx->ecounter + SKINNY128_BLOCK_SIZE * 3);
455  out += SKINNY128_CTR_BLOCK_SIZE;
456  in += SKINNY128_CTR_BLOCK_SIZE;
457  size -= SKINNY128_CTR_BLOCK_SIZE;
458  } else {
459  /* Last partial block in the request */
460  skinny_xor(out, in, ctx->ecounter, size);
461  ctx->offset = size;
462  break;
463  }
464  } else {
465  /* Left-over keystream data from the last request */
466  size_t temp = SKINNY128_CTR_BLOCK_SIZE - ctx->offset;
467  if (temp > size)
468  temp = size;
469  skinny_xor(out, in, ctx->ecounter + ctx->offset, temp);
470  ctx->offset += temp;
471  out += temp;
472  in += temp;
473  size -= temp;
474  }
475  }
476  return 1;
477 }
478 
480 Skinny128CTRVtable_t const _skinny128_ctr_vec128 = {
481  skinny128_ctr_vec128_init,
482  skinny128_ctr_vec128_cleanup,
483  skinny128_ctr_vec128_set_key,
484  skinny128_ctr_vec128_set_tweaked_key,
485  skinny128_ctr_vec128_set_tweak,
486  skinny128_ctr_vec128_set_counter,
487  skinny128_ctr_vec128_encrypt
488 };
489 
490 #else /* !SKINNY_VEC128_MATH */
491 
492 /* Stubbed out */
493 Skinny128CTRVtable_t const _skinny128_ctr_vec128;
494 
495 #endif /* !SKINNY_VEC128_MATH */
Union that describes a 64-bit 2x4 array of cells.
Skinny128HalfCells_t schedule[SKINNY128_MAX_ROUNDS]
State information for Skinny-128 in CTR mode.
Key schedule for Skinny128 block ciphers when a tweak is in use.
int skinny128_set_tweaked_key(Skinny128TweakedKey_t *ks, const void *key, unsigned key_size)
Sets the key schedule for a Skinny128 block cipher, and prepare for tweaked encryption.
Key schedule for Skinny128 block ciphers.
int skinny128_set_key(Skinny128Key_t *ks, const void *key, unsigned size)
Sets the key schedule for a Skinny128 block cipher.
#define SKINNY128_BLOCK_SIZE
Size of a block for Skinny128 block ciphers.
int skinny128_set_tweak(Skinny128TweakedKey_t *ks, const void *tweak, unsigned tweak_size)
Changes the tweak value for a previously-initialized key schedule.