Lightweight Cryptography Primitives
 All Data Structures Files Functions Variables Typedefs Macros Pages
internal-skinnyutil.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2020 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef LW_INTERNAL_SKINNYUTIL_H
24 #define LW_INTERNAL_SKINNYUTIL_H
25 
31 #include "internal-util.h"
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
39 /* Utilities for implementing SKINNY-128 */
40 
41 #define skinny128_LFSR2(x) \
42  do { \
43  uint32_t _x = (x); \
44  (x) = ((_x << 1) & 0xFEFEFEFEU) ^ \
45  (((_x >> 7) ^ (_x >> 5)) & 0x01010101U); \
46  } while (0)
47 
48 
49 #define skinny128_LFSR3(x) \
50  do { \
51  uint32_t _x = (x); \
52  (x) = ((_x >> 1) & 0x7F7F7F7FU) ^ \
53  (((_x << 7) ^ (_x << 1)) & 0x80808080U); \
54  } while (0)
55 
56 /* LFSR2 and LFSR3 are inverses of each other */
57 #define skinny128_inv_LFSR2(x) skinny128_LFSR3(x)
58 #define skinny128_inv_LFSR3(x) skinny128_LFSR2(x)
59 
60 #define skinny128_permute_tk(tk) \
61  do { \
62  /* PT = [9, 15, 8, 13, 10, 14, 12, 11, 0, 1, 2, 3, 4, 5, 6, 7] */ \
63  uint32_t row2 = tk[2]; \
64  uint32_t row3 = tk[3]; \
65  tk[2] = tk[0]; \
66  tk[3] = tk[1]; \
67  row3 = (row3 << 16) | (row3 >> 16); \
68  tk[0] = ((row2 >> 8) & 0x000000FFU) | \
69  ((row2 << 16) & 0x00FF0000U) | \
70  ( row3 & 0xFF00FF00U); \
71  tk[1] = ((row2 >> 16) & 0x000000FFU) | \
72  (row2 & 0xFF000000U) | \
73  ((row3 << 8) & 0x0000FF00U) | \
74  ( row3 & 0x00FF0000U); \
75  } while (0)
76 
77 #define skinny128_permute_tk_half(tk2, tk3) \
78  do { \
79  /* Permute the bottom half of the tweakey state in place, no swap */ \
80  uint32_t row2 = tk2; \
81  uint32_t row3 = tk3; \
82  row3 = (row3 << 16) | (row3 >> 16); \
83  tk2 = ((row2 >> 8) & 0x000000FFU) | \
84  ((row2 << 16) & 0x00FF0000U) | \
85  ( row3 & 0xFF00FF00U); \
86  tk3 = ((row2 >> 16) & 0x000000FFU) | \
87  (row2 & 0xFF000000U) | \
88  ((row3 << 8) & 0x0000FF00U) | \
89  ( row3 & 0x00FF0000U); \
90  } while (0)
91 
92 #define skinny128_inv_permute_tk(tk) \
93  do { \
94  /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \
95  uint32_t row0 = tk[0]; \
96  uint32_t row1 = tk[1]; \
97  tk[0] = tk[2]; \
98  tk[1] = tk[3]; \
99  tk[2] = ((row0 >> 16) & 0x000000FFU) | \
100  ((row0 << 8) & 0x0000FF00U) | \
101  ((row1 << 16) & 0x00FF0000U) | \
102  ( row1 & 0xFF000000U); \
103  tk[3] = ((row0 >> 16) & 0x0000FF00U) | \
104  ((row0 << 16) & 0xFF000000U) | \
105  ((row1 >> 16) & 0x000000FFU) | \
106  ((row1 << 8) & 0x00FF0000U); \
107  } while (0)
108 
109 #define skinny128_inv_permute_tk_half(tk0, tk1) \
110  do { \
111  /* Permute the top half of the tweakey state in place, no swap */ \
112  uint32_t row0 = tk0; \
113  uint32_t row1 = tk1; \
114  tk0 = ((row0 >> 16) & 0x000000FFU) | \
115  ((row0 << 8) & 0x0000FF00U) | \
116  ((row1 << 16) & 0x00FF0000U) | \
117  ( row1 & 0xFF000000U); \
118  tk1 = ((row0 >> 16) & 0x0000FF00U) | \
119  ((row0 << 16) & 0xFF000000U) | \
120  ((row1 >> 16) & 0x000000FFU) | \
121  ((row1 << 8) & 0x00FF0000U); \
122  } while (0)
123 
124 /*
125  * Apply the SKINNY sbox. The original version from the specification is
126  * equivalent to:
127  *
128  * #define SBOX_MIX(x)
129  * (((~((((x) >> 1) | (x)) >> 2)) & 0x11111111U) ^ (x))
130  * #define SBOX_SWAP(x)
131  * (((x) & 0xF9F9F9F9U) |
132  * (((x) >> 1) & 0x02020202U) |
133  * (((x) << 1) & 0x04040404U))
134  * #define SBOX_PERMUTE(x)
135  * ((((x) & 0x01010101U) << 2) |
136  * (((x) & 0x06060606U) << 5) |
137  * (((x) & 0x20202020U) >> 5) |
138  * (((x) & 0xC8C8C8C8U) >> 2) |
139  * (((x) & 0x10101010U) >> 1))
140  *
141  * x = SBOX_MIX(x);
142  * x = SBOX_PERMUTE(x);
143  * x = SBOX_MIX(x);
144  * x = SBOX_PERMUTE(x);
145  * x = SBOX_MIX(x);
146  * x = SBOX_PERMUTE(x);
147  * x = SBOX_MIX(x);
148  * return SBOX_SWAP(x);
149  *
150  * However, we can mix the bits in their original positions and then
151  * delay the SBOX_PERMUTE and SBOX_SWAP steps to be performed with one
152  * final permuatation. This reduces the number of shift operations.
153  */
154 #define skinny128_sbox(x) \
155 do { \
156  uint32_t y; \
157  \
158  /* Mix the bits */ \
159  x = ~x; \
160  x ^= (((x >> 2) & (x >> 3)) & 0x11111111U); \
161  y = (((x << 5) & (x << 1)) & 0x20202020U); \
162  x ^= (((x << 5) & (x << 4)) & 0x40404040U) ^ y; \
163  y = (((x << 2) & (x << 1)) & 0x80808080U); \
164  x ^= (((x >> 2) & (x << 1)) & 0x02020202U) ^ y; \
165  y = (((x >> 5) & (x << 1)) & 0x04040404U); \
166  x ^= (((x >> 1) & (x >> 2)) & 0x08080808U) ^ y; \
167  x = ~x; \
168  \
169  /* Permutation generated by http://programming.sirrida.de/calcperm.php */ \
170  /* The final permutation for each byte is [2 7 6 1 3 0 4 5] */ \
171  x = ((x & 0x08080808U) << 1) | \
172  ((x & 0x32323232U) << 2) | \
173  ((x & 0x01010101U) << 5) | \
174  ((x & 0x80808080U) >> 6) | \
175  ((x & 0x40404040U) >> 4) | \
176  ((x & 0x04040404U) >> 2); \
177 } while (0)
178 
179 /*
180  * Apply the inverse of the SKINNY sbox. The original version from the
181  * specification is equivalent to:
182  *
183  * #define SBOX_MIX(x)
184  * (((~((((x) >> 1) | (x)) >> 2)) & 0x11111111U) ^ (x))
185  * #define SBOX_SWAP(x)
186  * (((x) & 0xF9F9F9F9U) |
187  * (((x) >> 1) & 0x02020202U) |
188  * (((x) << 1) & 0x04040404U))
189  * #define SBOX_PERMUTE_INV(x)
190  * ((((x) & 0x08080808U) << 1) |
191  * (((x) & 0x32323232U) << 2) |
192  * (((x) & 0x01010101U) << 5) |
193  * (((x) & 0xC0C0C0C0U) >> 5) |
194  * (((x) & 0x04040404U) >> 2))
195  *
196  * x = SBOX_SWAP(x);
197  * x = SBOX_MIX(x);
198  * x = SBOX_PERMUTE_INV(x);
199  * x = SBOX_MIX(x);
200  * x = SBOX_PERMUTE_INV(x);
201  * x = SBOX_MIX(x);
202  * x = SBOX_PERMUTE_INV(x);
203  * return SBOX_MIX(x);
204  *
205  * However, we can mix the bits in their original positions and then
206  * delay the SBOX_PERMUTE_INV and SBOX_SWAP steps to be performed with one
207  * final permuatation. This reduces the number of shift operations.
208  */
209 #define skinny128_inv_sbox(x) \
210 do { \
211  uint32_t y; \
212  \
213  /* Mix the bits */ \
214  x = ~x; \
215  y = (((x >> 1) & (x >> 3)) & 0x01010101U); \
216  x ^= (((x >> 2) & (x >> 3)) & 0x10101010U) ^ y; \
217  y = (((x >> 6) & (x >> 1)) & 0x02020202U); \
218  x ^= (((x >> 1) & (x >> 2)) & 0x08080808U) ^ y; \
219  y = (((x << 2) & (x << 1)) & 0x80808080U); \
220  x ^= (((x >> 1) & (x << 2)) & 0x04040404U) ^ y; \
221  y = (((x << 5) & (x << 1)) & 0x20202020U); \
222  x ^= (((x << 4) & (x << 5)) & 0x40404040U) ^ y; \
223  x = ~x; \
224  \
225  /* Permutation generated by http://programming.sirrida.de/calcperm.php */ \
226  /* The final permutation for each byte is [5 3 0 4 6 7 2 1] */ \
227  x = ((x & 0x01010101U) << 2) | \
228  ((x & 0x04040404U) << 4) | \
229  ((x & 0x02020202U) << 6) | \
230  ((x & 0x20202020U) >> 5) | \
231  ((x & 0xC8C8C8C8U) >> 2) | \
232  ((x & 0x10101010U) >> 1); \
233 } while (0)
234 
235 /* Utilities for implementing SKINNY-64 */
236 
237 #define skinny64_LFSR2(x) \
238  do { \
239  uint16_t _x = (x); \
240  (x) = ((_x << 1) & 0xEEEEU) ^ (((_x >> 3) ^ (_x >> 2)) & 0x1111U); \
241  } while (0)
242 
243 #define skinny64_LFSR3(x) \
244  do { \
245  uint16_t _x = (x); \
246  (x) = ((_x >> 1) & 0x7777U) ^ ((_x ^ (_x << 3)) & 0x8888U); \
247  } while (0)
248 
249 /* LFSR2 and LFSR3 are inverses of each other */
250 #define skinny64_inv_LFSR2(x) skinny64_LFSR3(x)
251 #define skinny64_inv_LFSR3(x) skinny64_LFSR2(x)
252 
253 #define skinny64_permute_tk(tk) \
254  do { \
255  /* PT = [9, 15, 8, 13, 10, 14, 12, 11, 0, 1, 2, 3, 4, 5, 6, 7] */ \
256  uint16_t row2 = tk[2]; \
257  uint16_t row3 = tk[3]; \
258  tk[2] = tk[0]; \
259  tk[3] = tk[1]; \
260  row3 = (row3 << 8) | (row3 >> 8); \
261  tk[0] = ((row2 << 4) & 0xF000U) | \
262  ((row2 >> 8) & 0x00F0U) | \
263  ( row3 & 0x0F0FU); \
264  tk[1] = ((row2 << 8) & 0xF000U) | \
265  ((row3 >> 4) & 0x0F00U) | \
266  ( row3 & 0x00F0U) | \
267  ( row2 & 0x000FU); \
268  } while (0)
269 
270 #define skinny64_inv_permute_tk(tk) \
271  do { \
272  /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \
273  uint16_t row0 = tk[0]; \
274  uint16_t row1 = tk[1]; \
275  tk[0] = tk[2]; \
276  tk[1] = tk[3]; \
277  tk[2] = ((row0 << 8) & 0xF000U) | \
278  ((row0 >> 4) & 0x0F00U) | \
279  ((row1 >> 8) & 0x00F0U) | \
280  ( row1 & 0x000FU); \
281  tk[3] = ((row1 << 8) & 0xF000U) | \
282  ((row0 << 8) & 0x0F00U) | \
283  ((row1 >> 4) & 0x00F0U) | \
284  ((row0 >> 8) & 0x000FU); \
285  } while (0)
286 
287 /*
288  * Apply the SKINNY-64 sbox. The original version from the
289  * specification is equivalent to:
290  *
291  * #define SBOX_MIX(x)
292  * (((~((((x) >> 1) | (x)) >> 2)) & 0x1111U) ^ (x))
293  * #define SBOX_SHIFT(x)
294  * ((((x) << 1) & 0xEEEEU) | (((x) >> 3) & 0x1111U))
295  *
296  * x = SBOX_MIX(x);
297  * x = SBOX_SHIFT(x);
298  * x = SBOX_MIX(x);
299  * x = SBOX_SHIFT(x);
300  * x = SBOX_MIX(x);
301  * x = SBOX_SHIFT(x);
302  * return SBOX_MIX(x);
303  *
304  * However, we can mix the bits in their original positions and then
305  * delay the SBOX_SHIFT steps to be performed with one final rotation.
306  * This reduces the number of required shift operations from 14 to 10.
307  *
308  * We can further reduce the number of NOT operations from 4 to 2
309  * using the technique from https://github.com/kste/skinny_avx to
310  * convert NOR-XOR operations into AND-XOR operations by converting
311  * the S-box into its NOT-inverse.
312  */
313 #define skinny64_sbox(x) \
314 do { \
315  x = ~x; \
316  x = (((x >> 3) & (x >> 2)) & 0x1111U) ^ x; \
317  x = (((x << 1) & (x << 2)) & 0x8888U) ^ x; \
318  x = (((x << 1) & (x << 2)) & 0x4444U) ^ x; \
319  x = (((x >> 2) & (x << 1)) & 0x2222U) ^ x; \
320  x = ~x; \
321  x = ((x >> 1) & 0x7777U) | ((x << 3) & 0x8888U); \
322 } while (0)
323 
324 /*
325  * Apply the inverse of the SKINNY-64 sbox. The original version
326  * from the specification is equivalent to:
327  *
328  * #define SBOX_MIX(x)
329  * (((~((((x) >> 1) | (x)) >> 2)) & 0x1111U) ^ (x))
330  * #define SBOX_SHIFT_INV(x)
331  * ((((x) >> 1) & 0x7777U) | (((x) << 3) & 0x8888U))
332  *
333  * x = SBOX_MIX(x);
334  * x = SBOX_SHIFT_INV(x);
335  * x = SBOX_MIX(x);
336  * x = SBOX_SHIFT_INV(x);
337  * x = SBOX_MIX(x);
338  * x = SBOX_SHIFT_INV(x);
339  * return SBOX_MIX(x);
340  */
341 #define skinny64_inv_sbox(x) \
342 do { \
343  x = ~x; \
344  x = (((x >> 3) & (x >> 2)) & 0x1111U) ^ x; \
345  x = (((x << 1) & (x >> 2)) & 0x2222U) ^ x; \
346  x = (((x << 1) & (x << 2)) & 0x4444U) ^ x; \
347  x = (((x << 1) & (x << 2)) & 0x8888U) ^ x; \
348  x = ~x; \
349  x = ((x << 1) & 0xEEEEU) | ((x >> 3) & 0x1111U); \
350 } while (0)
351 
354 #ifdef __cplusplus
355 }
356 #endif
357 
358 #endif