Skinny-C
 All Data Structures Files Functions Variables Groups Pages
skinny-internal.h
1 /*
2  * Copyright (C) 2017 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef SKINNY_INTERNAL_H
24 #define SKINNY_INTERNAL_H
25 
26 #include <stdint.h>
27 #include <string.h>
28 
29 /* Figure out how to inline functions using this C compiler */
30 #if defined(__STDC__) && __STDC_VERSION__ >= 199901L
31 #define STATIC_INLINE static inline
32 #elif defined(__GNUC__) || defined(__clang__)
33 #define STATIC_INLINE static __inline__
34 #else
35 #define STATIC_INLINE static
36 #endif
37 
38 /* Define SKINNY_64BIT to 1 if the CPU is natively 64-bit */
39 #if defined(__WORDSIZE) && __WORDSIZE == 64
40 #define SKINNY_64BIT 1
41 #else
42 #define SKINNY_64BIT 0
43 #endif
44 
45 /* Define SKINNY_UNALIGNED to 1 if the CPU supports byte-aligned word access */
46 #if defined(__x86_64) || defined(__x86_64__) || \
47  defined(__i386) || defined(__i386__)
48 #define SKINNY_UNALIGNED 1
49 #else
50 #define SKINNY_UNALIGNED 0
51 #endif
52 
53 /* Define SKINNY_LITTLE_ENDIAN to 1 if the CPU is little-endian */
54 #if defined(__x86_64) || defined(__x86_64__) || \
55  defined(__i386) || defined(__i386__) || \
56  defined(__arm) || defined(__arm__)
57 #define SKINNY_LITTLE_ENDIAN 1
58 #else
59 #define SKINNY_LITTLE_ENDIAN 0
60 #endif
61 
62 /* Define SKINNY_VEC128_MATH to 1 if we have 128-bit SIMD Vector Extensions */
63 #if defined(__GNUC__) || defined(__clang__)
64 #if defined(__SSE2__) || defined(__ARM_NEON) || \
65  defined(__ARM_NEON__) || defined(__ARM_NEON_FP)
66 #define SKINNY_VEC128_MATH 1
67 #else
68 #define SKINNY_VEC128_MATH 0
69 #endif
70 #else
71 #define SKINNY_VEC128_MATH 0
72 #endif
73 
74 /* Define SKINNY_VEC256_MATH to 1 if we have 256-bit SIMD Vector Extensions */
75 #if defined(__GNUC__) || defined(__clang__)
76 #if defined(__AVX2__)
77 #define SKINNY_VEC256_MATH 1
78 #else
79 #define SKINNY_VEC256_MATH 0
80 #endif
81 #else
82 #define SKINNY_VEC256_MATH 0
83 #endif
84 
85 /* Attribute for declaring a vector type with this compiler */
86 #if defined(__clang__)
87 #define SKINNY_VECTOR_ATTR(words, bytes) __attribute__((ext_vector_type(words)))
88 #define SKINNY_VECTORU_ATTR(words, bytes) __attribute__((ext_vector_type(words), aligned(1)))
89 #else
90 #define SKINNY_VECTOR_ATTR(words, bytes) __attribute__((vector_size(bytes)))
91 #define SKINNY_VECTORU_ATTR(words, bytes) __attribute__((vector_size(bytes), aligned(1)))
92 #endif
93 
94 /* XOR two blocks together of arbitrary size and alignment */
95 STATIC_INLINE void skinny_xor
96  (void *output, const void *input1, const void *input2, size_t size)
97 {
98  while (size > 0) {
99  --size;
100  ((uint8_t *)output)[size] = ((const uint8_t *)input1)[size] ^
101  ((const uint8_t *)input2)[size];
102  }
103 }
104 
105 /* XOR two 128-bit blocks together */
106 STATIC_INLINE void skinny128_xor
107  (void *output, const void *input1, const void *input2)
108 {
109 #if SKINNY_UNALIGNED && SKINNY_64BIT
110  ((uint64_t *)output)[0] = ((const uint64_t *)input1)[0] ^
111  ((const uint64_t *)input2)[0];
112  ((uint64_t *)output)[1] = ((const uint64_t *)input1)[1] ^
113  ((const uint64_t *)input2)[1];
114 #elif SKINNY_UNALIGNED
115  ((uint32_t *)output)[0] = ((const uint32_t *)input1)[0] ^
116  ((const uint32_t *)input2)[0];
117  ((uint32_t *)output)[1] = ((const uint32_t *)input1)[1] ^
118  ((const uint32_t *)input2)[1];
119  ((uint32_t *)output)[2] = ((const uint32_t *)input1)[2] ^
120  ((const uint32_t *)input2)[2];
121  ((uint32_t *)output)[3] = ((const uint32_t *)input1)[3] ^
122  ((const uint32_t *)input2)[3];
123 #else
124  unsigned posn;
125  for (posn = 0; posn < 16; ++posn) {
126  ((uint8_t *)output)[posn] = ((const uint8_t *)input1)[posn] ^
127  ((const uint8_t *)input2)[posn];
128  }
129 #endif
130 }
131 
132 /* XOR two 64-bit blocks together */
133 STATIC_INLINE void skinny64_xor
134  (void *output, const void *input1, const void *input2)
135 {
136 #if SKINNY_UNALIGNED && SKINNY_64BIT
137  ((uint64_t *)output)[0] = ((const uint64_t *)input1)[0] ^
138  ((const uint64_t *)input2)[0];
139 #elif SKINNY_UNALIGNED
140  ((uint32_t *)output)[0] = ((const uint32_t *)input1)[0] ^
141  ((const uint32_t *)input2)[0];
142  ((uint32_t *)output)[1] = ((const uint32_t *)input1)[1] ^
143  ((const uint32_t *)input2)[1];
144 #else
145  unsigned posn;
146  for (posn = 0; posn < 8; ++posn) {
147  ((uint8_t *)output)[posn] = ((const uint8_t *)input1)[posn] ^
148  ((const uint8_t *)input2)[posn];
149  }
150 #endif
151 }
152 
153 /* Increment a 128-bit counter block in big-endian order */
154 STATIC_INLINE void skinny128_inc_counter(uint8_t *counter, uint16_t inc)
155 {
156  unsigned posn;
157  for (posn = 16; posn > 0; ) {
158  --posn;
159  inc += counter[posn];
160  counter[posn] = (uint8_t)inc;
161  inc >>= 8;
162  }
163 }
164 
165 /* Increment a 64-bit counter block in big-endian order */
166 STATIC_INLINE void skinny64_inc_counter(uint8_t *counter, uint16_t inc)
167 {
168  unsigned posn;
169  for (posn = 8; posn > 0; ) {
170  --posn;
171  inc += counter[posn];
172  counter[posn] = (uint8_t)inc;
173  inc >>= 8;
174  }
175 }
176 
177 #define READ_BYTE(ptr,offset) \
178  ((uint32_t)(((const uint8_t *)(ptr))[(offset)]))
179 
180 #define READ_WORD16(ptr,offset) \
181  (((uint16_t)(((const uint8_t *)(ptr))[(offset)])) | \
182  (((uint16_t)(((const uint8_t *)(ptr))[(offset) + 1])) << 8))
183 
184 #define READ_WORD32(ptr,offset) \
185  (((uint32_t)(((const uint8_t *)(ptr))[(offset)])) | \
186  (((uint32_t)(((const uint8_t *)(ptr))[(offset) + 1])) << 8) | \
187  (((uint32_t)(((const uint8_t *)(ptr))[(offset) + 2])) << 16) | \
188  (((uint32_t)(((const uint8_t *)(ptr))[(offset) + 3])) << 24))
189 
190 #define READ_WORD64(ptr,offset) \
191  (((uint64_t)(((const uint8_t *)(ptr))[(offset)])) | \
192  (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 1])) << 8) | \
193  (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 2])) << 16) | \
194  (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 3])) << 24) | \
195  (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 4])) << 32) | \
196  (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 5])) << 40) | \
197  (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 6])) << 48) | \
198  (((uint64_t)(((const uint8_t *)(ptr))[(offset) + 7])) << 56))
199 
200 #define WRITE_WORD16(ptr,offset,value) \
201  ((((uint8_t *)(ptr))[(offset)] = (uint8_t)(value)), \
202  (((uint8_t *)(ptr))[(offset) + 1] = (uint8_t)((value) >> 8)))
203 
204 #define WRITE_WORD32(ptr,offset,value) \
205  ((((uint8_t *)(ptr))[(offset)] = (uint8_t)(value)), \
206  (((uint8_t *)(ptr))[(offset) + 1] = (uint8_t)((value) >> 8)), \
207  (((uint8_t *)(ptr))[(offset) + 2] = (uint8_t)((value) >> 16)), \
208  (((uint8_t *)(ptr))[(offset) + 3] = (uint8_t)((value) >> 24)))
209 
210 #define WRITE_WORD64(ptr,offset,value) \
211  ((((uint8_t *)(ptr))[(offset)] = (uint8_t)(value)), \
212  (((uint8_t *)(ptr))[(offset) + 1] = (uint8_t)((value) >> 8)), \
213  (((uint8_t *)(ptr))[(offset) + 2] = (uint8_t)((value) >> 16)), \
214  (((uint8_t *)(ptr))[(offset) + 3] = (uint8_t)((value) >> 24)), \
215  (((uint8_t *)(ptr))[(offset) + 4] = (uint8_t)((value) >> 32)), \
216  (((uint8_t *)(ptr))[(offset) + 5] = (uint8_t)((value) >> 40)), \
217  (((uint8_t *)(ptr))[(offset) + 6] = (uint8_t)((value) >> 48)), \
218  (((uint8_t *)(ptr))[(offset) + 7] = (uint8_t)((value) >> 56)))
219 
220 STATIC_INLINE void skinny_cleanse(void *ptr, size_t size)
221 {
222 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
223  /* C11 added memset_s() explicitly for the memory cleanse use case */
224  memset_s(ptr, 0, size);
225 #else
226  /* We don't have memset_s(), so do the best we can to cleanse memory */
227  uint8_t volatile *p = (uint8_t volatile *)ptr;
228  while (size > 0) {
229  *p++ = 0;
230  --size;
231  }
232 #endif
233 }
234 
235 #if SKINNY_VEC128_MATH
236 
237 /* Define types that fit within a 128-bit SIMD vector */
238 typedef uint32_t SkinnyVector4x32_t SKINNY_VECTOR_ATTR(4, 16);
239 typedef uint16_t SkinnyVector8x16_t SKINNY_VECTOR_ATTR(8, 16);
240 #if SKINNY_UNALIGNED
241 typedef uint32_t SkinnyVector4x32U_t SKINNY_VECTORU_ATTR(4, 16);
242 typedef uint16_t SkinnyVector8x16U_t SKINNY_VECTORU_ATTR(8, 16);
243 #endif
244 
245 /* Convert a scalar value into a 4x32 SIMD vector */
246 STATIC_INLINE SkinnyVector4x32_t skinny_to_vec4x32(uint32_t x)
247 {
248  return (SkinnyVector4x32_t){x, x, x, x};
249 }
250 
251 /* Convert a scalar value into a 8x16 SIMD vector */
252 STATIC_INLINE SkinnyVector8x16_t skinny_to_vec8x16(uint16_t x)
253 {
254  return (SkinnyVector8x16_t){x, x, x, x, x, x, x, x};
255 }
256 
257 #endif /* SKINNY_VEC128_MATH */
258 
259 #if SKINNY_VEC256_MATH
260 
261 /* Define types that fit within a 256-bit SIMD vector */
262 typedef uint32_t SkinnyVector8x32_t SKINNY_VECTOR_ATTR(8, 32);
263 #if SKINNY_UNALIGNED
264 typedef uint32_t SkinnyVector8x32U_t SKINNY_VECTORU_ATTR(8, 32);
265 #endif
266 
267 /* Convert a scalar value into a 8x32 SIMD vector */
268 STATIC_INLINE SkinnyVector8x32_t skinny_to_vec8x32(uint32_t x)
269 {
270  return (SkinnyVector8x32_t){x, x, x, x, x, x, x, x};
271 }
272 
273 #endif /* SKINNY_VEC256_MATH */
274 
275 /* Determine if this platform supports 128-bit SIMD vector operations */
276 int _skinny_has_vec128(void);
277 
278 /* Determine if this platform supports 256-bit SIMD vector operations */
279 int _skinny_has_vec256(void);
280 
281 /* Allocate cleared memory and guarantee SIMD-compatible alignment */
282 void *skinny_calloc(size_t size, void **base_ptr);
283 
284 #endif /* SKINNY_INTERNAL_H */