Arduino Cryptography Library
Poly1305.cpp
1 /*
2  * Copyright (C) 2015 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "Poly1305.h"
24 #include "Crypto.h"
25 #include "utility/EndianUtil.h"
26 #include "utility/LimbUtil.h"
27 #include <string.h>
28 
62 // Limb array with enough space for 130 bits.
63 #define NUM_LIMBS_130BIT (NUM_LIMBS_128BIT + 1)
64 
65 // Endian helper macros for limbs and arrays of limbs.
66 #if BIGNUMBER_LIMB_8BIT
67 #define lelimbtoh(x) (x)
68 #define htolelimb(x) (x)
69 #elif BIGNUMBER_LIMB_16BIT
70 #define lelimbtoh(x) (le16toh((x)))
71 #define htolelimb(x) (htole16((x)))
72 #elif BIGNUMBER_LIMB_32BIT
73 #define lelimbtoh(x) (le32toh((x)))
74 #define htolelimb(x) (htole32((x)))
75 #elif BIGNUMBER_LIMB_64BIT
76 #define lelimbtoh(x) (le64toh((x)))
77 #define htolelimb(x) (htole64((x)))
78 #endif
79 #if defined(CRYPTO_LITTLE_ENDIAN)
80 #define littleToHost(r,size) do { ; } while (0)
81 #else
82 #define littleToHost(r,size) \
83  do { \
84  for (uint8_t i = 0; i < (size); ++i) \
85  (r)[i] = lelimbtoh((r)[i]); \
86  } while (0)
87 #endif
88 
93 {
94  state.chunkSize = 0;
95 }
96 
102 {
103  clean(state);
104 }
105 
113 void Poly1305::reset(const void *key)
114 {
115  // Copy the key into place and clear the bits we don't need.
116  uint8_t *r = (uint8_t *)state.r;
117  memcpy(r, key, 16);
118  r[3] &= 0x0F;
119  r[4] &= 0xFC;
120  r[7] &= 0x0F;
121  r[8] &= 0xFC;
122  r[11] &= 0x0F;
123  r[12] &= 0xFC;
124  r[15] &= 0x0F;
125 
126  // Convert into little-endian if necessary.
127  littleToHost(state.r, NUM_LIMBS_128BIT);
128 
129  // Reset the hashing process.
130  state.chunkSize = 0;
131  memset(state.h, 0, sizeof(state.h));
132 }
133 
145 void Poly1305::update(const void *data, size_t len)
146 {
147  // Break the input up into 128-bit chunks and process each in turn.
148  const uint8_t *d = (const uint8_t *)data;
149  while (len > 0) {
150  uint8_t size = 16 - state.chunkSize;
151  if (size > len)
152  size = len;
153  memcpy(((uint8_t *)state.c) + state.chunkSize, d, size);
154  state.chunkSize += size;
155  len -= size;
156  d += size;
157  if (state.chunkSize == 16) {
158  littleToHost(state.c, NUM_LIMBS_128BIT);
159  state.c[NUM_LIMBS_128BIT] = 1;
160  processChunk();
161  state.chunkSize = 0;
162  }
163  }
164 }
165 
182 void Poly1305::finalize(const void *nonce, void *token, size_t len)
183 {
184  dlimb_t carry;
185  uint8_t i;
186  limb_t t[NUM_LIMBS_256BIT + 1];
187 
188  // Pad and flush the final chunk.
189  if (state.chunkSize > 0) {
190  uint8_t *c = (uint8_t *)state.c;
191  c[state.chunkSize] = 1;
192  memset(c + state.chunkSize + 1, 0, 16 - state.chunkSize - 1);
193  littleToHost(state.c, NUM_LIMBS_128BIT);
194  state.c[NUM_LIMBS_128BIT] = 0;
195  processChunk();
196  }
197 
198  // At this point, processChunk() has left h as a partially reduced
199  // result that is less than (2^130 - 5) * 6. Perform one more
200  // reduction and a trial subtraction to produce the final result.
201 
202  // Multiply the high bits of h by 5 and add them to the 130 low bits.
203  carry = (dlimb_t)((state.h[NUM_LIMBS_128BIT] >> 2) +
204  (state.h[NUM_LIMBS_128BIT] & ~((limb_t)3)));
205  state.h[NUM_LIMBS_128BIT] &= 0x0003;
206  for (i = 0; i < NUM_LIMBS_128BIT; ++i) {
207  carry += state.h[i];
208  state.h[i] = (limb_t)carry;
209  carry >>= LIMB_BITS;
210  }
211  state.h[i] += (limb_t)carry;
212 
213  // Subtract (2^130 - 5) from h by computing t = h + 5 - 2^130.
214  // The "minus 2^130" step is implicit.
215  carry = 5;
216  for (i = 0; i < NUM_LIMBS_130BIT; ++i) {
217  carry += state.h[i];
218  t[i] = (limb_t)carry;
219  carry >>= LIMB_BITS;
220  }
221 
222  // Borrow occurs if bit 2^130 of the previous t result is zero.
223  // Carefully turn this into a selection mask so we can select either
224  // h or t as the final result. We don't care about the highest word
225  // of the result because we are about to drop it in the next step.
226  // We have to do it this way to avoid giving away any information
227  // about the value of h in the instruction timing.
228  limb_t mask = (~((t[NUM_LIMBS_128BIT] >> 2) & 1)) + 1;
229  limb_t nmask = ~mask;
230  for (i = 0; i < NUM_LIMBS_128BIT; ++i) {
231  state.h[i] = (state.h[i] & nmask) | (t[i] & mask);
232  }
233 
234  // Add the encrypted nonce and format the final hash.
235  memcpy(state.c, nonce, 16);
236  littleToHost(state.c, NUM_LIMBS_128BIT);
237  carry = 0;
238  for (i = 0; i < NUM_LIMBS_128BIT; ++i) {
239  carry += state.h[i];
240  carry += state.c[i];
241  state.h[i] = htolelimb((limb_t)carry);
242  carry >>= LIMB_BITS;
243  }
244  if (len > 16)
245  len = 16;
246  memcpy(token, state.h, len);
247 }
248 
255 {
256  if (state.chunkSize != 0) {
257  memset(((uint8_t *)state.c) + state.chunkSize, 0, 16 - state.chunkSize);
258  littleToHost(state.c, NUM_LIMBS_128BIT);
259  state.c[NUM_LIMBS_128BIT] = 1;
260  processChunk();
261  state.chunkSize = 0;
262  }
263 }
264 
269 {
270  clean(state);
271 }
272 
276 void Poly1305::processChunk()
277 {
278  limb_t t[NUM_LIMBS_256BIT + 1];
279 
280  // Compute h = ((h + c) * r) mod (2^130 - 5).
281 
282  // Start with h += c. We assume that h is less than (2^130 - 5) * 6
283  // and that c is less than 2^129, so the result will be less than 2^133.
284  dlimb_t carry = 0;
285  uint8_t i, j;
286  for (i = 0; i < NUM_LIMBS_130BIT; ++i) {
287  carry += state.h[i];
288  carry += state.c[i];
289  state.h[i] = (limb_t)carry;
290  carry >>= LIMB_BITS;
291  }
292 
293  // Multiply h by r. We know that r is less than 2^124 because the
294  // top 4 bits were AND-ed off by reset(). That makes h * r less
295  // than 2^257. Which is less than the (2^130 - 6)^2 we want for
296  // the modulo reduction step that follows.
297  carry = 0;
298  limb_t word = state.r[0];
299  for (i = 0; i < NUM_LIMBS_130BIT; ++i) {
300  carry += ((dlimb_t)(state.h[i])) * word;
301  t[i] = (limb_t)carry;
302  carry >>= LIMB_BITS;
303  }
304  t[NUM_LIMBS_130BIT] = (limb_t)carry;
305  for (i = 1; i < NUM_LIMBS_128BIT; ++i) {
306  word = state.r[i];
307  carry = 0;
308  for (j = 0; j < NUM_LIMBS_130BIT; ++j) {
309  carry += ((dlimb_t)(state.h[j])) * word;
310  carry += t[i + j];
311  t[i + j] = (limb_t)carry;
312  carry >>= LIMB_BITS;
313  }
314  t[i + NUM_LIMBS_130BIT] = (limb_t)carry;
315  }
316 
317  // Reduce h * r modulo (2^130 - 5) by multiplying the high 130 bits by 5
318  // and adding them to the low 130 bits. See the explaination in the
319  // comments for Curve25519::reduce() for a description of how this works.
320  carry = ((dlimb_t)(t[NUM_LIMBS_128BIT] >> 2)) +
321  (t[NUM_LIMBS_128BIT] & ~((limb_t)3));
322  t[NUM_LIMBS_128BIT] &= 0x0003;
323  for (i = 0; i < NUM_LIMBS_128BIT; ++i) {
324  // Shift the next word of t up by (LIMB_BITS - 2) bits and then
325  // multiply it by 5. Breaking it down, we can add the results
326  // of shifting up by LIMB_BITS and shifting up by (LIMB_BITS - 2).
327  // The main wrinkle here is that this can result in an intermediate
328  // carry that is (LIMB_BITS * 2 + 1) bits in size which doesn't
329  // fit within a dlimb_t variable. However, we can defer adding
330  // (word << LIMB_BITS) until after the "carry >>= LIMB_BITS" step
331  // because it won't affect the low bits of the carry.
332  word = t[i + NUM_LIMBS_130BIT];
333  carry += ((dlimb_t)word) << (LIMB_BITS - 2);
334  carry += t[i];
335  state.h[i] = (limb_t)carry;
336  carry >>= LIMB_BITS;
337  carry += word;
338  }
339  state.h[i] = (limb_t)(carry + t[NUM_LIMBS_128BIT]);
340 
341  // At this point, h is either the answer of reducing modulo (2^130 - 5)
342  // or it is at most 5 subtractions away from the answer we want.
343  // Leave it as-is for now with h less than (2^130 - 5) * 6. It is
344  // still within a range where the next h * r step will not overflow.
345 }
void reset(const void *key)
Resets the Poly1305 message authenticator for a new session.
Definition: Poly1305.cpp:113
Poly1305()
Constructs a new Poly1305 message authenticator.
Definition: Poly1305.cpp:92
void finalize(const void *nonce, void *token, size_t len)
Finalizes the authentication process and returns the token.
Definition: Poly1305.cpp:182
void pad()
Pads the input stream with zero bytes to a multiple of 16.
Definition: Poly1305.cpp:254
~Poly1305()
Destroys this Poly1305 message authenticator after clearing all sensitive information.
Definition: Poly1305.cpp:101
void clear()
Clears the authenticator's state, removing all sensitive data.
Definition: Poly1305.cpp:268
void update(const void *data, size_t len)
Updates the message authenticator with more data.
Definition: Poly1305.cpp:145