23 #ifndef SKINNY_INTERNAL_H
24 #define SKINNY_INTERNAL_H
30 #if defined(__STDC__) && __STDC_VERSION__ >= 199901L
31 #define STATIC_INLINE static inline
32 #elif defined(__GNUC__) || defined(__clang__)
33 #define STATIC_INLINE static __inline__
35 #define STATIC_INLINE static
39 #if defined(__WORDSIZE) && __WORDSIZE == 64
40 #define SKINNY_64BIT 1
42 #define SKINNY_64BIT 0
46 #if defined(__x86_64) || defined(__x86_64__) || \
47 defined(__i386) || defined(__i386__)
48 #define SKINNY_UNALIGNED 1
50 #define SKINNY_UNALIGNED 0
54 #if defined(__x86_64) || defined(__x86_64__) || \
55 defined(__i386) || defined(__i386__) || \
56 defined(__arm) || defined(__arm__)
57 #define SKINNY_LITTLE_ENDIAN 1
59 #define SKINNY_LITTLE_ENDIAN 0
63 #if defined(__GNUC__) || defined(__clang__)
64 #if defined(__SSE2__) || defined(__ARM_NEON) || \
65 defined(__ARM_NEON__) || defined(__ARM_NEON_FP)
66 #define SKINNY_VEC128_MATH 1
68 #define SKINNY_VEC128_MATH 0
71 #define SKINNY_VEC128_MATH 0
75 #if defined(__GNUC__) || defined(__clang__)
77 #define SKINNY_VEC256_MATH 1
79 #define SKINNY_VEC256_MATH 0
82 #define SKINNY_VEC256_MATH 0
86 #if defined(__clang__)
87 #define SKINNY_VECTOR_ATTR(words, bytes) __attribute__((ext_vector_type(words)))
88 #define SKINNY_VECTORU_ATTR(words, bytes) __attribute__((ext_vector_type(words), aligned(1)))
90 #define SKINNY_VECTOR_ATTR(words, bytes) __attribute__((vector_size(bytes)))
91 #define SKINNY_VECTORU_ATTR(words, bytes) __attribute__((vector_size(bytes), aligned(1)))
95 STATIC_INLINE
void skinny_xor
96 (
void *output,
const void *input1,
const void *input2,
size_t size)
100 ((uint8_t *)output)[size] = ((
const uint8_t *)input1)[size] ^
101 ((
const uint8_t *)input2)[size];
106 STATIC_INLINE
void skinny128_xor
107 (
void *output,
const void *input1,
const void *input2)
109 #if SKINNY_UNALIGNED && SKINNY_64BIT
110 ((uint64_t *)output)[0] = ((
const uint64_t *)input1)[0] ^
111 ((
const uint64_t *)input2)[0];
112 ((uint64_t *)output)[1] = ((
const uint64_t *)input1)[1] ^
113 ((
const uint64_t *)input2)[1];
114 #elif SKINNY_UNALIGNED
115 ((uint32_t *)output)[0] = ((
const uint32_t *)input1)[0] ^
116 ((
const uint32_t *)input2)[0];
117 ((uint32_t *)output)[1] = ((
const uint32_t *)input1)[1] ^
118 ((
const uint32_t *)input2)[1];
119 ((uint32_t *)output)[2] = ((
const uint32_t *)input1)[2] ^
120 ((
const uint32_t *)input2)[2];
121 ((uint32_t *)output)[3] = ((
const uint32_t *)input1)[3] ^
122 ((
const uint32_t *)input2)[3];
125 for (posn = 0; posn < 16; ++posn) {
126 ((uint8_t *)output)[posn] = ((
const uint8_t *)input1)[posn] ^
127 ((
const uint8_t *)input2)[posn];
133 STATIC_INLINE
void skinny64_xor
134 (
void *output,
const void *input1,
const void *input2)
136 #if SKINNY_UNALIGNED && SKINNY_64BIT
137 ((uint64_t *)output)[0] = ((
const uint64_t *)input1)[0] ^
138 ((
const uint64_t *)input2)[0];
139 #elif SKINNY_UNALIGNED
140 ((uint32_t *)output)[0] = ((
const uint32_t *)input1)[0] ^
141 ((
const uint32_t *)input2)[0];
142 ((uint32_t *)output)[1] = ((
const uint32_t *)input1)[1] ^
143 ((
const uint32_t *)input2)[1];
146 for (posn = 0; posn < 8; ++posn) {
147 ((uint8_t *)output)[posn] = ((
const uint8_t *)input1)[posn] ^
148 ((
const uint8_t *)input2)[posn];
154 STATIC_INLINE
void skinny128_inc_counter(uint8_t *counter, uint16_t inc)
157 for (posn = 16; posn > 0; ) {
159 inc += counter[posn];
160 counter[posn] = (uint8_t)inc;
166 STATIC_INLINE
void skinny64_inc_counter(uint8_t *counter, uint16_t inc)
169 for (posn = 8; posn > 0; ) {
171 inc += counter[posn];
172 counter[posn] = (uint8_t)inc;
177 #define READ_BYTE(ptr,offset) \
178 ((uint32_t)(((const uint8_t *)(ptr))[(offset)]))
180 #define READ_WORD16(ptr,offset) \
181 (((uint16_t)(((const uint8_t *)(ptr))[(offset)])) | \
182 (((uint16_t)(((const uint8_t *)(ptr))[(offset) + 1])) << 8))
184 #define READ_WORD32(ptr,offset) \
185 (((uint32_t)(((const uint8_t *)(ptr))[(offset)])) | \
186 (((uint32_t)(((const uint8_t *)(ptr))[(offset) + 1])) << 8) | \
187 (((uint32_t)(((const uint8_t *)(ptr))[(offset) + 2])) << 16) | \
188 (((uint32_t)(((const uint8_t *)(ptr))[(offset) + 3])) << 24))
190 #define READ_WORD64(ptr,offset) \
191 (((uint64_t)(((const uint8_t *)(ptr))[(offset)])) | \
192 (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 1])) << 8) | \
193 (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 2])) << 16) | \
194 (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 3])) << 24) | \
195 (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 4])) << 32) | \
196 (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 5])) << 40) | \
197 (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 6])) << 48) | \
198 (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 7])) << 56))
200 #define WRITE_WORD16(ptr,offset,value) \
201 ((((uint8_t *)(ptr))[(offset)] = (uint8_t)(value)), \
202 (((uint8_t *)(ptr))[(offset) + 1] = (uint8_t)((value) >> 8)))
204 #define WRITE_WORD32(ptr,offset,value) \
205 ((((uint8_t *)(ptr))[(offset)] = (uint8_t)(value)), \
206 (((uint8_t *)(ptr))[(offset) + 1] = (uint8_t)((value) >> 8)), \
207 (((uint8_t *)(ptr))[(offset) + 2] = (uint8_t)((value) >> 16)), \
208 (((uint8_t *)(ptr))[(offset) + 3] = (uint8_t)((value) >> 24)))
210 #define WRITE_WORD64(ptr,offset,value) \
211 ((((uint8_t *)(ptr))[(offset)] = (uint8_t)(value)), \
212 (((uint8_t *)(ptr))[(offset) + 1] = (uint8_t)((value) >> 8)), \
213 (((uint8_t *)(ptr))[(offset) + 2] = (uint8_t)((value) >> 16)), \
214 (((uint8_t *)(ptr))[(offset) + 3] = (uint8_t)((value) >> 24)), \
215 (((uint8_t *)(ptr))[(offset) + 4] = (uint8_t)((value) >> 32)), \
216 (((uint8_t *)(ptr))[(offset) + 5] = (uint8_t)((value) >> 40)), \
217 (((uint8_t *)(ptr))[(offset) + 6] = (uint8_t)((value) >> 48)), \
218 (((uint8_t *)(ptr))[(offset) + 7] = (uint8_t)((value) >> 56)))
220 STATIC_INLINE
void skinny_cleanse(
void *ptr,
size_t size)
222 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
224 memset_s(ptr, 0, size);
227 uint8_t
volatile *p = (uint8_t
volatile *)ptr;
235 #if SKINNY_VEC128_MATH
238 typedef uint32_t SkinnyVector4x32_t SKINNY_VECTOR_ATTR(4, 16);
239 typedef uint16_t SkinnyVector8x16_t SKINNY_VECTOR_ATTR(8, 16);
241 typedef uint32_t SkinnyVector4x32U_t SKINNY_VECTORU_ATTR(4, 16);
242 typedef uint16_t SkinnyVector8x16U_t SKINNY_VECTORU_ATTR(8, 16);
246 STATIC_INLINE SkinnyVector4x32_t skinny_to_vec4x32(uint32_t x)
248 return (SkinnyVector4x32_t){x, x, x, x};
252 STATIC_INLINE SkinnyVector8x16_t skinny_to_vec8x16(uint16_t x)
254 return (SkinnyVector8x16_t){x, x, x, x, x, x, x, x};
259 #if SKINNY_VEC256_MATH
262 typedef uint32_t SkinnyVector8x32_t SKINNY_VECTOR_ATTR(8, 32);
264 typedef uint32_t SkinnyVector8x32U_t SKINNY_VECTORU_ATTR(8, 32);
268 STATIC_INLINE SkinnyVector8x32_t skinny_to_vec8x32(uint32_t x)
270 return (SkinnyVector8x32_t){x, x, x, x, x, x, x, x};
276 int _skinny_has_vec128(
void);
279 int _skinny_has_vec256(
void);
282 void *skinny_calloc(
size_t size,
void **base_ptr);