Skinny-C
 All Data Structures Files Functions Variables Groups Pages
skinny64-parallel-vec128.c
1 /*
2  * Copyright (C) 2017 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "skinny64-parallel.h"
24 #include "skinny-internal.h"
25 
26 #if SKINNY_VEC128_MATH
27 
28 STATIC_INLINE SkinnyVector8x16_t skinny64_rotate_right
29  (SkinnyVector8x16_t x, unsigned count)
30 {
31  return (x >> count) | (x << (16 - count));
32 }
33 
34 STATIC_INLINE SkinnyVector8x16_t skinny64_sbox(SkinnyVector8x16_t x)
35 {
36  SkinnyVector8x16_t bit0 = ~x;
37  SkinnyVector8x16_t bit1 = bit0 >> 1;
38  SkinnyVector8x16_t bit2 = bit0 >> 2;
39  SkinnyVector8x16_t bit3 = bit0 >> 3;
40  bit0 ^= bit3 & bit2;
41  bit3 ^= bit1 & bit2;
42  bit2 ^= bit1 & bit0;
43  bit1 ^= bit0 & bit3;
44  x = ((bit0 << 3) & 0x8888U) |
45  ( bit1 & 0x1111U) |
46  ((bit2 << 1) & 0x2222U) |
47  ((bit3 << 2) & 0x4444U);
48  return ~x;
49 }
50 
51 STATIC_INLINE SkinnyVector8x16_t skinny64_inv_sbox(SkinnyVector8x16_t x)
52 {
53  SkinnyVector8x16_t bit0 = ~x;
54  SkinnyVector8x16_t bit1 = bit0 >> 1;
55  SkinnyVector8x16_t bit2 = bit0 >> 2;
56  SkinnyVector8x16_t bit3 = bit0 >> 3;
57  bit0 ^= bit3 & bit2;
58  bit1 ^= bit3 & bit0;
59  bit2 ^= bit1 & bit0;
60  bit3 ^= bit1 & bit2;
61  x = ((bit0 << 1) & 0x2222U) |
62  ((bit1 << 2) & 0x4444U) |
63  ((bit2 << 3) & 0x8888U) |
64  ( bit3 & 0x1111U);
65  return ~x;
66 }
67 
68 void _skinny64_parallel_encrypt_vec128
69  (void *output, const void *input, const Skinny64Key_t *ks)
70 {
71  SkinnyVector8x16_t row0;
72  SkinnyVector8x16_t row1;
73  SkinnyVector8x16_t row2;
74  SkinnyVector8x16_t row3;
75  const Skinny64HalfCells_t *schedule;
76  unsigned index;
77  SkinnyVector8x16_t temp;
78 
79  /* Read the rows of all eight blocks into memory */
80  row0 = (SkinnyVector8x16_t)
81  {READ_WORD16(input, 0), READ_WORD16(input, 8),
82  READ_WORD16(input, 16), READ_WORD16(input, 24),
83  READ_WORD16(input, 32), READ_WORD16(input, 40),
84  READ_WORD16(input, 48), READ_WORD16(input, 56)};
85  row1 = (SkinnyVector8x16_t)
86  {READ_WORD16(input, 2), READ_WORD16(input, 10),
87  READ_WORD16(input, 18), READ_WORD16(input, 26),
88  READ_WORD16(input, 34), READ_WORD16(input, 42),
89  READ_WORD16(input, 50), READ_WORD16(input, 58)};
90  row2 = (SkinnyVector8x16_t)
91  {READ_WORD16(input, 4), READ_WORD16(input, 12),
92  READ_WORD16(input, 20), READ_WORD16(input, 28),
93  READ_WORD16(input, 36), READ_WORD16(input, 44),
94  READ_WORD16(input, 52), READ_WORD16(input, 60)};
95  row3 = (SkinnyVector8x16_t)
96  {READ_WORD16(input, 6), READ_WORD16(input, 14),
97  READ_WORD16(input, 22), READ_WORD16(input, 30),
98  READ_WORD16(input, 38), READ_WORD16(input, 46),
99  READ_WORD16(input, 54), READ_WORD16(input, 62)};
100 
101  /* Perform all encryption rounds */
102  schedule = ks->schedule;
103  for (index = ks->rounds; index > 0; --index, ++schedule) {
104  /* Apply the S-box to all bytes in the state */
105  row0 = skinny64_sbox(row0);
106  row1 = skinny64_sbox(row1);
107  row2 = skinny64_sbox(row2);
108  row3 = skinny64_sbox(row3);
109 
110  /* Apply the subkey for this round */
111  row0 ^= schedule->row[0];
112  row1 ^= schedule->row[1];
113  row2 ^= 0x20;
114 
115  /* Shift the rows */
116  row1 = skinny64_rotate_right(row1, 4);
117  row2 = skinny64_rotate_right(row2, 8);
118  row3 = skinny64_rotate_right(row3, 12);
119 
120  /* Mix the columns */
121  row1 ^= row2;
122  row2 ^= row0;
123  temp = row3 ^ row2;
124  row3 = row2;
125  row2 = row1;
126  row1 = row0;
127  row0 = temp;
128  }
129 
130  /* Write the rows of all eight blocks back to memory */
131  WRITE_WORD16(output, 0, row0[0]);
132  WRITE_WORD16(output, 2, row1[0]);
133  WRITE_WORD16(output, 4, row2[0]);
134  WRITE_WORD16(output, 6, row3[0]);
135  WRITE_WORD16(output, 8, row0[1]);
136  WRITE_WORD16(output, 10, row1[1]);
137  WRITE_WORD16(output, 12, row2[1]);
138  WRITE_WORD16(output, 14, row3[1]);
139  WRITE_WORD16(output, 16, row0[2]);
140  WRITE_WORD16(output, 18, row1[2]);
141  WRITE_WORD16(output, 20, row2[2]);
142  WRITE_WORD16(output, 22, row3[2]);
143  WRITE_WORD16(output, 24, row0[3]);
144  WRITE_WORD16(output, 26, row1[3]);
145  WRITE_WORD16(output, 28, row2[3]);
146  WRITE_WORD16(output, 30, row3[3]);
147  WRITE_WORD16(output, 32, row0[4]);
148  WRITE_WORD16(output, 34, row1[4]);
149  WRITE_WORD16(output, 36, row2[4]);
150  WRITE_WORD16(output, 38, row3[4]);
151  WRITE_WORD16(output, 40, row0[5]);
152  WRITE_WORD16(output, 42, row1[5]);
153  WRITE_WORD16(output, 44, row2[5]);
154  WRITE_WORD16(output, 46, row3[5]);
155  WRITE_WORD16(output, 48, row0[6]);
156  WRITE_WORD16(output, 50, row1[6]);
157  WRITE_WORD16(output, 52, row2[6]);
158  WRITE_WORD16(output, 54, row3[6]);
159  WRITE_WORD16(output, 56, row0[7]);
160  WRITE_WORD16(output, 58, row1[7]);
161  WRITE_WORD16(output, 60, row2[7]);
162  WRITE_WORD16(output, 62, row3[7]);
163 }
164 
165 void _skinny64_parallel_decrypt_vec128
166  (void *output, const void *input, const Skinny64Key_t *ks)
167 {
168  SkinnyVector8x16_t row0;
169  SkinnyVector8x16_t row1;
170  SkinnyVector8x16_t row2;
171  SkinnyVector8x16_t row3;
172  const Skinny64HalfCells_t *schedule;
173  unsigned index;
174  SkinnyVector8x16_t temp;
175 
176  /* Read the rows of all eight blocks into memory */
177  row0 = (SkinnyVector8x16_t)
178  {READ_WORD16(input, 0), READ_WORD16(input, 8),
179  READ_WORD16(input, 16), READ_WORD16(input, 24),
180  READ_WORD16(input, 32), READ_WORD16(input, 40),
181  READ_WORD16(input, 48), READ_WORD16(input, 56)};
182  row1 = (SkinnyVector8x16_t)
183  {READ_WORD16(input, 2), READ_WORD16(input, 10),
184  READ_WORD16(input, 18), READ_WORD16(input, 26),
185  READ_WORD16(input, 34), READ_WORD16(input, 42),
186  READ_WORD16(input, 50), READ_WORD16(input, 58)};
187  row2 = (SkinnyVector8x16_t)
188  {READ_WORD16(input, 4), READ_WORD16(input, 12),
189  READ_WORD16(input, 20), READ_WORD16(input, 28),
190  READ_WORD16(input, 36), READ_WORD16(input, 44),
191  READ_WORD16(input, 52), READ_WORD16(input, 60)};
192  row3 = (SkinnyVector8x16_t)
193  {READ_WORD16(input, 6), READ_WORD16(input, 14),
194  READ_WORD16(input, 22), READ_WORD16(input, 30),
195  READ_WORD16(input, 38), READ_WORD16(input, 46),
196  READ_WORD16(input, 54), READ_WORD16(input, 62)};
197 
198  /* Perform all decryption rounds */
199  schedule = &(ks->schedule[ks->rounds - 1]);
200  for (index = ks->rounds; index > 0; --index, --schedule) {
201  /* Inverse mix of the columns */
202  temp = row3;
203  row3 = row0;
204  row0 = row1;
205  row1 = row2;
206  row3 ^= temp;
207  row2 = temp ^ row0;
208  row1 ^= row2;
209 
210  /* Inverse shift of the rows */
211  row1 = skinny64_rotate_right(row1, 12);
212  row2 = skinny64_rotate_right(row2, 8);
213  row3 = skinny64_rotate_right(row3, 4);
214 
215  /* Apply the subkey for this round */
216  row0 ^= schedule->row[0];
217  row1 ^= schedule->row[1];
218  row2 ^= 0x20;
219 
220  /* Apply the inverse S-box to all bytes in the state */
221  row0 = skinny64_inv_sbox(row0);
222  row1 = skinny64_inv_sbox(row1);
223  row2 = skinny64_inv_sbox(row2);
224  row3 = skinny64_inv_sbox(row3);
225  }
226 
227  /* Write the rows of all eight blocks back to memory */
228  WRITE_WORD16(output, 0, row0[0]);
229  WRITE_WORD16(output, 2, row1[0]);
230  WRITE_WORD16(output, 4, row2[0]);
231  WRITE_WORD16(output, 6, row3[0]);
232  WRITE_WORD16(output, 8, row0[1]);
233  WRITE_WORD16(output, 10, row1[1]);
234  WRITE_WORD16(output, 12, row2[1]);
235  WRITE_WORD16(output, 14, row3[1]);
236  WRITE_WORD16(output, 16, row0[2]);
237  WRITE_WORD16(output, 18, row1[2]);
238  WRITE_WORD16(output, 20, row2[2]);
239  WRITE_WORD16(output, 22, row3[2]);
240  WRITE_WORD16(output, 24, row0[3]);
241  WRITE_WORD16(output, 26, row1[3]);
242  WRITE_WORD16(output, 28, row2[3]);
243  WRITE_WORD16(output, 30, row3[3]);
244  WRITE_WORD16(output, 32, row0[4]);
245  WRITE_WORD16(output, 34, row1[4]);
246  WRITE_WORD16(output, 36, row2[4]);
247  WRITE_WORD16(output, 38, row3[4]);
248  WRITE_WORD16(output, 40, row0[5]);
249  WRITE_WORD16(output, 42, row1[5]);
250  WRITE_WORD16(output, 44, row2[5]);
251  WRITE_WORD16(output, 46, row3[5]);
252  WRITE_WORD16(output, 48, row0[6]);
253  WRITE_WORD16(output, 50, row1[6]);
254  WRITE_WORD16(output, 52, row2[6]);
255  WRITE_WORD16(output, 54, row3[6]);
256  WRITE_WORD16(output, 56, row0[7]);
257  WRITE_WORD16(output, 58, row1[7]);
258  WRITE_WORD16(output, 60, row2[7]);
259  WRITE_WORD16(output, 62, row3[7]);
260 }
261 
262 #else /* !SKINNY_VEC128_MATH */
263 
264 /* Stubbed out */
265 
266 void _skinny64_parallel_encrypt_vec128
267  (void *output, const void *input, const Skinny64Key_t *ks)
268 {
269  (void)output;
270  (void)input;
271  (void)ks;
272 }
273 
274 void _skinny64_parallel_decrypt_vec128
275  (void *output, const void *input, const Skinny64Key_t *ks)
276 {
277  (void)output;
278  (void)input;
279  (void)ks;
280 }
281 
282 #endif /* !SKINNY_VEC128_MATH */
Skinny64HalfCells_t schedule[SKINNY64_MAX_ROUNDS]
Key schedule for Skinny64 block ciphers.
Union that describes a 32-bit 2x4 array of cells.