24 #include "utility/EndianUtil.h"
67 #if defined(CRYPTO_LITTLE_ENDIAN)
93 uint32_t Z[4] = {0, 0, 0, 0};
102 for (uint8_t posn = 0; posn < 16; ++posn) {
103 uint8_t value = ((
const uint8_t *)Y)[posn];
104 for (uint8_t bit = 0; bit < 8; ++bit) {
105 __asm__ __volatile__ (
110 "mov __tmp_reg__,__zero_reg__\n"
111 "sbc __tmp_reg__,__zero_reg__\n"
115 "and r24,__tmp_reg__\n"
120 "and r24,__tmp_reg__\n"
125 "and r24,__tmp_reg__\n"
130 "and r24,__tmp_reg__\n"
135 "and r24,__tmp_reg__\n"
140 "and r24,__tmp_reg__\n"
145 "and r24,__tmp_reg__\n"
150 "and r24,__tmp_reg__\n"
155 "and r24,__tmp_reg__\n"
160 "and r24,__tmp_reg__\n"
165 "and r24,__tmp_reg__\n"
170 "and r24,__tmp_reg__\n"
175 "and r24,__tmp_reg__\n"
180 "and r24,__tmp_reg__\n"
185 "and r24,__tmp_reg__\n"
190 "and r24,__tmp_reg__\n"
212 "mov r24,__zero_reg__\n"
213 "sbc r24,__zero_reg__\n"
216 :
"+r"(V0),
"+r"(V1),
"+r"(V2),
"+r"(V3)
217 :
"Q"(Z[0]),
"Q"(Z[1]),
"Q"(Z[2]),
"Q"(Z[3]),
"Q"(value)
224 __asm__ __volatile__ (
225 "ldd __tmp_reg__,%A0\n"
226 "st X+,__tmp_reg__\n"
227 "ldd __tmp_reg__,%B0\n"
228 "st X+,__tmp_reg__\n"
229 "ldd __tmp_reg__,%C0\n"
230 "st X+,__tmp_reg__\n"
231 "ldd __tmp_reg__,%D0\n"
232 "st X+,__tmp_reg__\n"
233 "ldd __tmp_reg__,%A1\n"
234 "st X+,__tmp_reg__\n"
235 "ldd __tmp_reg__,%B1\n"
236 "st X+,__tmp_reg__\n"
237 "ldd __tmp_reg__,%C1\n"
238 "st X+,__tmp_reg__\n"
239 "ldd __tmp_reg__,%D1\n"
240 "st X+,__tmp_reg__\n"
241 "ldd __tmp_reg__,%A2\n"
242 "st X+,__tmp_reg__\n"
243 "ldd __tmp_reg__,%B2\n"
244 "st X+,__tmp_reg__\n"
245 "ldd __tmp_reg__,%C2\n"
246 "st X+,__tmp_reg__\n"
247 "ldd __tmp_reg__,%D2\n"
248 "st X+,__tmp_reg__\n"
249 "ldd __tmp_reg__,%A3\n"
250 "st X+,__tmp_reg__\n"
251 "ldd __tmp_reg__,%B3\n"
252 "st X+,__tmp_reg__\n"
253 "ldd __tmp_reg__,%C3\n"
254 "st X+,__tmp_reg__\n"
255 "ldd __tmp_reg__,%D3\n"
257 : :
"Q"(Z[0]),
"Q"(Z[1]),
"Q"(Z[2]),
"Q"(Z[3]),
"x"(Y)
272 for (uint8_t posn = 0; posn < 16; ++posn) {
273 uint8_t value = ((
const uint8_t *)Y)[posn];
274 for (uint8_t bit = 0; bit < 8; ++bit, value <<= 1) {
276 uint32_t mask = (~((uint32_t)(value >> 7))) + 1;
285 mask = ((~(V3 & 0x01)) + 1) & 0xE1000000;
286 V3 = (V3 >> 1) | (V2 << 31);
287 V2 = (V2 >> 1) | (V1 << 31);
288 V1 = (V1 >> 1) | (V0 << 31);
289 V0 = (V0 >> 1) ^ mask;
317 __asm__ __volatile__ (
365 "mov r17,__zero_reg__\n"
366 "sbc r17,__zero_reg__\n"
371 :
"r16",
"r17",
"r18",
"r19",
"r20"
374 uint32_t V0 = be32toh(V[0]);
375 uint32_t V1 = be32toh(V[1]);
376 uint32_t V2 = be32toh(V[2]);
377 uint32_t V3 = be32toh(V[3]);
378 uint32_t mask = ((~(V3 & 0x01)) + 1) & 0xE1000000;
379 V3 = (V3 >> 1) | (V2 << 31);
380 V2 = (V2 >> 1) | (V1 << 31);
381 V1 = (V1 >> 1) | (V0 << 31);
382 V0 = (V0 >> 1) ^ mask;
409 __asm__ __volatile__ (
457 "mov r17,__zero_reg__\n"
458 "sbc r17,__zero_reg__\n"
463 :
"r16",
"r17",
"r18",
"r19",
"r20"
466 uint32_t V0 = be32toh(V[0]);
467 uint32_t V1 = be32toh(V[1]);
468 uint32_t V2 = be32toh(V[2]);
469 uint32_t V3 = be32toh(V[3]);
470 uint32_t mask = ((~(V0 >> 31)) + 1) & 0x00000087;
471 V0 = (V0 << 1) | (V1 >> 31);
472 V1 = (V1 << 1) | (V2 >> 31);
473 V2 = (V2 << 1) | (V3 >> 31);
474 V3 = (V3 << 1) ^ mask;
500 __asm__ __volatile__ (
548 "mov r17,__zero_reg__\n"
549 "sbc r17,__zero_reg__\n"
554 :
"r16",
"r17",
"r18",
"r19",
"r20"
557 uint32_t V0 = le32toh(V[0]);
558 uint32_t V1 = le32toh(V[1]);
559 uint32_t V2 = le32toh(V[2]);
560 uint32_t V3 = le32toh(V[3]);
561 uint32_t mask = ((~(V3 >> 31)) + 1) & 0x00000087;
562 V3 = (V3 << 1) | (V2 >> 31);
563 V2 = (V2 << 1) | (V1 >> 31);
564 V1 = (V1 << 1) | (V0 >> 31);
565 V0 = (V0 << 1) ^ mask;
static void dblXTS(uint32_t V[4])
Doubles a value in the GF(2^128) field using XTS conventions.
static void mulInit(uint32_t H[4], const void *key)
Initialize multiplication in the GF(2^128) field.
static void dblEAX(uint32_t V[4])
Doubles a value in the GF(2^128) field using EAX conventions.
static void mul(uint32_t Y[4], const uint32_t H[4])
Perform a multiplication in the GF(2^128) field.
static void dbl(uint32_t V[4])
Doubles a value in the GF(2^128) field.