Arduino Cryptography Library
P521.cpp
1 /*
2  * Copyright (C) 2016 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "P521.h"
24 #include "Crypto.h"
25 #include "RNG.h"
26 #include "SHA512.h"
27 #include "utility/LimbUtil.h"
28 #include <string.h>
29 
48 // Number of limbs that are needed to represent a 521-bit number.
49 #define NUM_LIMBS_521BIT NUM_LIMBS_BITS(521)
50 
51 // Number of limbs that are needed to represent a 1042-bit number.
52 // To simply things we also require that this be twice the size of
53 // NUM_LIMB_521BIT which involves a little wastage at the high end
54 // of one extra limb for 8-bit and 32-bit limbs. There is no
55 // wastage for 16-bit limbs.
56 #define NUM_LIMBS_1042BIT (NUM_LIMBS_BITS(521) * 2)
57 
58 // The overhead of clean() calls in mul(), etc can add up to a lot of
59 // processing time. Only do such cleanups if strict mode has been enabled.
60 #if defined(P521_STRICT_CLEAN)
61 #define strict_clean(x) clean(x)
62 #else
63 #define strict_clean(x) do { ; } while (0)
64 #endif
65 
66 // Expand the partial 9-bit left over limb at the top of a 521-bit number.
67 #if BIGNUMBER_LIMB_8BIT
68 #define LIMB_PARTIAL(value) ((uint8_t)(value)), \
69  ((uint8_t)((value) >> 8))
70 #else
71 #define LIMB_PARTIAL(value) (value)
72 #endif
73 
76 // The group order "q" value from RFC 4754 and RFC 5903. This is the
77 // same as the "n" value from Appendix D.1.2.5 of NIST FIPS 186-4.
78 static limb_t const P521_q[NUM_LIMBS_521BIT] PROGMEM = {
79  LIMB_PAIR(0x91386409, 0xbb6fb71e), LIMB_PAIR(0x899c47ae, 0x3bb5c9b8),
80  LIMB_PAIR(0xf709a5d0, 0x7fcc0148), LIMB_PAIR(0xbf2f966b, 0x51868783),
81  LIMB_PAIR(0xfffffffa, 0xffffffff), LIMB_PAIR(0xffffffff, 0xffffffff),
82  LIMB_PAIR(0xffffffff, 0xffffffff), LIMB_PAIR(0xffffffff, 0xffffffff),
83  LIMB_PARTIAL(0x1ff)
84 };
85 
86 // The "b" value from Appendix D.1.2.5 of NIST FIPS 186-4.
87 static limb_t const P521_b[NUM_LIMBS_521BIT] PROGMEM = {
88  LIMB_PAIR(0x6b503f00, 0xef451fd4), LIMB_PAIR(0x3d2c34f1, 0x3573df88),
89  LIMB_PAIR(0x3bb1bf07, 0x1652c0bd), LIMB_PAIR(0xec7e937b, 0x56193951),
90  LIMB_PAIR(0x8ef109e1, 0xb8b48991), LIMB_PAIR(0x99b315f3, 0xa2da725b),
91  LIMB_PAIR(0xb68540ee, 0x929a21a0), LIMB_PAIR(0x8e1c9a1f, 0x953eb961),
92  LIMB_PARTIAL(0x051)
93 };
94 
95 // The "Gx" value from Appendix D.1.2.5 of NIST FIPS 186-4.
96 static limb_t const P521_Gx[NUM_LIMBS_521BIT] PROGMEM = {
97  LIMB_PAIR(0xc2e5bd66, 0xf97e7e31), LIMB_PAIR(0x856a429b, 0x3348b3c1),
98  LIMB_PAIR(0xa2ffa8de, 0xfe1dc127), LIMB_PAIR(0xefe75928, 0xa14b5e77),
99  LIMB_PAIR(0x6b4d3dba, 0xf828af60), LIMB_PAIR(0x053fb521, 0x9c648139),
100  LIMB_PAIR(0x2395b442, 0x9e3ecb66), LIMB_PAIR(0x0404e9cd, 0x858e06b7),
101  LIMB_PARTIAL(0x0c6)
102 };
103 
104 // The "Gy" value from Appendix D.1.2.5 of NIST FIPS 186-4.
105 static limb_t const P521_Gy[NUM_LIMBS_521BIT] PROGMEM = {
106  LIMB_PAIR(0x9fd16650, 0x88be9476), LIMB_PAIR(0xa272c240, 0x353c7086),
107  LIMB_PAIR(0x3fad0761, 0xc550b901), LIMB_PAIR(0x5ef42640, 0x97ee7299),
108  LIMB_PAIR(0x273e662c, 0x17afbd17), LIMB_PAIR(0x579b4468, 0x98f54449),
109  LIMB_PAIR(0x2c7d1bd9, 0x5c8a5fb4), LIMB_PAIR(0x9a3bc004, 0x39296a78),
110  LIMB_PARTIAL(0x118)
111 };
112 
135 bool P521::eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132])
136 {
137  limb_t x[NUM_LIMBS_521BIT];
138  limb_t y[NUM_LIMBS_521BIT];
139  bool ok;
140 
141  // Unpack the curve point from the parameters and validate it.
142  if (point) {
143  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, point, 66);
144  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, point + 66, 66);
145  ok = validate(x, y);
146  } else {
147  memcpy_P(x, P521_Gx, sizeof(x));
148  memcpy_P(y, P521_Gy, sizeof(y));
149  ok = true;
150  }
151 
152  // Evaluate the curve function.
153  evaluate(x, y, f);
154 
155  // Pack the answer into the result array.
156  BigNumberUtil::packBE(result, 66, x, NUM_LIMBS_521BIT);
157  BigNumberUtil::packBE(result + 66, 66, y, NUM_LIMBS_521BIT);
158 
159  // Clean up.
160  clean(x);
161  clean(y);
162  return ok;
163 }
164 
208 void P521::dh1(uint8_t k[132], uint8_t f[66])
209 {
211  derivePublicKey(k, f);
212 }
213 
229 bool P521::dh2(const uint8_t k[132], uint8_t f[66])
230 {
231  // Unpack the (x, y) point from k.
232  limb_t x[NUM_LIMBS_521BIT];
233  limb_t y[NUM_LIMBS_521BIT];
234  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, k, 66);
235  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k + 66, 66);
236 
237  // Validate the curve point. We keep going to preserve the timing.
238  bool ok = validate(x, y);
239 
240  // Evaluate the curve function.
241  evaluate(x, y, f);
242 
243  // The secret key is the x component of the final value.
244  BigNumberUtil::packBE(f, 66, x, NUM_LIMBS_521BIT);
245 
246  // Clean up.
247  clean(x);
248  clean(y);
249  return ok;
250 }
251 
276 void P521::sign(uint8_t signature[132], const uint8_t privateKey[66],
277  const void *message, size_t len, Hash *hash)
278 {
279  uint8_t hm[66];
280  uint8_t k[66];
281  limb_t x[NUM_LIMBS_521BIT];
282  limb_t y[NUM_LIMBS_521BIT];
283  limb_t t[NUM_LIMBS_521BIT];
284  uint64_t count = 0;
285 
286  // Format the incoming message, hashing it if necessary.
287  if (hash) {
288  // Hash the message.
289  hash->reset();
290  hash->update(message, len);
291  len = hash->hashSize();
292  if (len > 64)
293  len = 64;
294  memset(hm, 0, 66 - len);
295  hash->finalize(hm + 66 - len, len);
296  } else {
297  // The message is the hash.
298  if (len > 64)
299  len = 64;
300  memset(hm, 0, 66 - len);
301  memcpy(hm + 66 - len, message, len);
302  }
303 
304  // Keep generating k values until both r and s are non-zero.
305  for (;;) {
306  // Generate the k value deterministically according to RFC 6979.
307  if (hash)
308  generateK(k, hm, privateKey, hash, count);
309  else
310  generateK(k, hm, privateKey, count);
311 
312  // Generate r = kG.x mod q.
313  memcpy_P(x, P521_Gx, sizeof(x));
314  memcpy_P(y, P521_Gy, sizeof(y));
315  evaluate(x, y, k);
316  BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT);
317  BigNumberUtil::packBE(signature, 66, x, NUM_LIMBS_521BIT);
318 
319  // If r is zero, then we need to generate a new k value.
320  // This is utterly improbable, but let's be safe anyway.
321  if (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT)) {
322  ++count;
323  continue;
324  }
325 
326  // Generate s = (privateKey * r + hm) / k mod q.
327  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, privateKey, 66);
328  mulQ(y, y, x);
329  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, hm, 66);
330  BigNumberUtil::add(x, x, y, NUM_LIMBS_521BIT);
331  BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT);
332  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k, 66);
333  recipQ(t, y);
334  mulQ(x, x, t);
335  BigNumberUtil::packBE(signature + 66, 66, x, NUM_LIMBS_521BIT);
336 
337  // Exit the loop if s is non-zero.
338  if (!BigNumberUtil::isZero(x, NUM_LIMBS_521BIT))
339  break;
340 
341  // We need to generate a new k value according to RFC 6979.
342  // This is utterly improbable, but let's be safe anyway.
343  ++count;
344  }
345 
346  // Clean up.
347  clean(hm);
348  clean(k);
349  clean(x);
350  clean(y);
351  clean(t);
352 }
353 
373 bool P521::verify(const uint8_t signature[132],
374  const uint8_t publicKey[132],
375  const void *message, size_t len, Hash *hash)
376 {
377  limb_t x[NUM_LIMBS_521BIT];
378  limb_t y[NUM_LIMBS_521BIT];
379  limb_t r[NUM_LIMBS_521BIT];
380  limb_t s[NUM_LIMBS_521BIT];
381  limb_t u1[NUM_LIMBS_521BIT];
382  limb_t u2[NUM_LIMBS_521BIT];
383  uint8_t t[66];
384  bool ok = false;
385 
386  // Because we are operating on public values, we don't need to
387  // be as strict about constant time. Bail out early if there
388  // is a problem with the parameters.
389 
390  // Unpack the signature. The values must be between 1 and q - 1.
391  BigNumberUtil::unpackBE(r, NUM_LIMBS_521BIT, signature, 66);
392  BigNumberUtil::unpackBE(s, NUM_LIMBS_521BIT, signature + 66, 66);
393  if (BigNumberUtil::isZero(r, NUM_LIMBS_521BIT) ||
394  BigNumberUtil::isZero(s, NUM_LIMBS_521BIT) ||
395  !BigNumberUtil::sub_P(x, r, P521_q, NUM_LIMBS_521BIT) ||
396  !BigNumberUtil::sub_P(x, s, P521_q, NUM_LIMBS_521BIT)) {
397  goto failed;
398  }
399 
400  // Unpack the public key and check that it is a valid curve point.
401  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66);
402  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66);
403  if (!validate(x, y)) {
404  goto failed;
405  }
406 
407  // Hash the message to generate hm, which we store into u1.
408  if (hash) {
409  // Hash the message.
410  hash->reset();
411  hash->update(message, len);
412  len = hash->hashSize();
413  if (len > 64)
414  len = 64;
415  hash->finalize(u2, len);
416  BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)u2, len);
417  } else {
418  // The message is the hash.
419  if (len > 64)
420  len = 64;
421  BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)message, len);
422  }
423 
424  // Compute u1 = hm * s^-1 mod q and u2 = r * s^-1 mod q.
425  recipQ(u2, s);
426  mulQ(u1, u1, u2);
427  mulQ(u2, r, u2);
428 
429  // Compute the curve point R = u2 * publicKey + u1 * G.
430  BigNumberUtil::packBE(t, 66, u2, NUM_LIMBS_521BIT);
431  evaluate(x, y, t);
432  memcpy_P(u2, P521_Gx, sizeof(x));
433  memcpy_P(s, P521_Gy, sizeof(y));
434  BigNumberUtil::packBE(t, 66, u1, NUM_LIMBS_521BIT);
435  evaluate(u2, s, t);
436  addAffine(u2, s, x, y);
437 
438  // If R.x = r mod q, then the signature is valid.
439  BigNumberUtil::reduceQuick_P(u1, u2, P521_q, NUM_LIMBS_521BIT);
440  ok = secure_compare(u1, r, NUM_LIMBS_521BIT * sizeof(limb_t));
441 
442  // Clean up and exit.
443 failed:
444  clean(x);
445  clean(y);
446  clean(r);
447  clean(s);
448  clean(u1);
449  clean(u2);
450  clean(t);
451  return ok;
452 }
453 
466 void P521::generatePrivateKey(uint8_t privateKey[66])
467 {
468  // Generate a random 521-bit value for the private key. The value
469  // must be generated uniformly at random between 1 and q - 1 where q
470  // is the group order (RFC 6090). We use the recommended algorithm
471  // from Appendix B of RFC 6090: generate a random 521-bit value
472  // and discard it if it is not within the range 1 to q - 1.
473  limb_t x[NUM_LIMBS_521BIT];
474  do {
475  RNG.rand((uint8_t *)x, sizeof(x));
476 #if BIGNUMBER_LIMB_8BIT
477  x[NUM_LIMBS_521BIT - 1] &= 0x01;
478 #else
479  x[NUM_LIMBS_521BIT - 1] &= 0x1FF;
480 #endif
481  BigNumberUtil::packBE(privateKey, 66, x, NUM_LIMBS_521BIT);
482  } while (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT) ||
483  !BigNumberUtil::sub_P(x, x, P521_q, NUM_LIMBS_521BIT));
484  clean(x);
485 }
486 
497 void P521::derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66])
498 {
499  // Evaluate the curve function starting with the generator.
500  limb_t x[NUM_LIMBS_521BIT];
501  limb_t y[NUM_LIMBS_521BIT];
502  memcpy_P(x, P521_Gx, sizeof(x));
503  memcpy_P(y, P521_Gy, sizeof(y));
504  evaluate(x, y, privateKey);
505 
506  // Pack the (x, y) point into the public key.
507  BigNumberUtil::packBE(publicKey, 66, x, NUM_LIMBS_521BIT);
508  BigNumberUtil::packBE(publicKey + 66, 66, y, NUM_LIMBS_521BIT);
509 
510  // Clean up.
511  clean(x);
512  clean(y);
513 }
514 
524 bool P521::isValidPrivateKey(const uint8_t privateKey[66])
525 {
526  // The value "q" as a byte array from most to least significant.
527  static uint8_t const P521_q_bytes[66] PROGMEM = {
528  0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
529  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
530  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
531  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
532  0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F,
533  0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
534  0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C,
535  0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38,
536  0x64, 0x09
537  };
538  uint8_t zeroTest = 0;
539  uint8_t posn = 66;
540  uint16_t borrow = 0;
541  while (posn > 0) {
542  --posn;
543 
544  // Check for zero.
545  zeroTest |= privateKey[posn];
546 
547  // Subtract P521_q_bytes from the key. If there is no borrow,
548  // then the key value was greater than or equal to q.
549  borrow = ((uint16_t)(privateKey[posn])) -
550  pgm_read_byte(&(P521_q_bytes[posn])) -
551  ((borrow >> 8) & 0x01);
552  }
553  return zeroTest != 0 && borrow != 0;
554 }
555 
564 bool P521::isValidPublicKey(const uint8_t publicKey[132])
565 {
566  limb_t x[NUM_LIMBS_521BIT];
567  limb_t y[NUM_LIMBS_521BIT];
568  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66);
569  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66);
570  bool ok = validate(x, y);
571  clean(x);
572  clean(y);
573  return ok;
574 }
575 
597 void P521::evaluate(limb_t *x, limb_t *y, const uint8_t f[66])
598 {
599  limb_t x1[NUM_LIMBS_521BIT];
600  limb_t y1[NUM_LIMBS_521BIT];
601  limb_t z1[NUM_LIMBS_521BIT];
602  limb_t x2[NUM_LIMBS_521BIT];
603  limb_t y2[NUM_LIMBS_521BIT];
604  limb_t z2[NUM_LIMBS_521BIT];
605 
606  // We want the input in Jacobian co-ordinates. The point (x, y, z)
607  // corresponds to the affine point (x / z^2, y / z^3), so if we set z
608  // to 1 we end up with Jacobian co-ordinates. Remember that z is 1
609  // and continue on.
610 
611  // Set the answer to the point-at-infinity initially (z = 0).
612  memset(x1, 0, sizeof(x1));
613  memset(y1, 0, sizeof(y1));
614  memset(z1, 0, sizeof(z1));
615 
616  // Special handling for the highest bit. We can skip dblPoint()/addPoint()
617  // and simply conditionally move (x, y, z) into (x1, y1, z1).
618  uint8_t select = (f[0] & 0x01);
619  cmove(select, x1, x);
620  cmove(select, y1, y);
621  cmove1(select, z1); // z = 1
622 
623  // Iterate over the remaining 520 bits of f from highest to lowest.
624  uint8_t mask = 0x80;
625  uint8_t fposn = 1;
626  for (uint16_t t = 520; t > 0; --t) {
627  // Double the answer.
628  dblPoint(x1, y1, z1, x1, y1, z1);
629 
630  // Add (x, y, z) to (x1, y1, z1) for the next 1 bit.
631  // We must always do this to preserve the overall timing.
632  // The z value is always 1 so we can omit that argument.
633  addPoint(x2, y2, z2, x1, y1, z1, x, y/*, z*/);
634 
635  // If the bit was 1, then move (x2, y2, z2) into (x1, y1, z1).
636  select = (f[fposn] & mask);
637  cmove(select, x1, x2);
638  cmove(select, y1, y2);
639  cmove(select, z1, z2);
640 
641  // Move onto the next bit.
642  mask >>= 1;
643  if (!mask) {
644  ++fposn;
645  mask = 0x80;
646  }
647  }
648 
649  // Convert from Jacobian co-ordinates back into affine co-ordinates.
650  // x = x1 * (z1^2)^-1, y = y1 * (z1^3)^-1.
651  recip(x2, z1);
652  square(y2, x2);
653  mul(x, x1, y2);
654  mul(y2, y2, x2);
655  mul(y, y1, y2);
656 
657  // Clean up.
658  clean(x1);
659  clean(y1);
660  clean(z1);
661  clean(x2);
662  clean(y2);
663  clean(z2);
664 }
665 
676 void P521::addAffine(limb_t *x1, limb_t *y1, const limb_t *x2, const limb_t *y2)
677 {
678  limb_t xout[NUM_LIMBS_521BIT];
679  limb_t yout[NUM_LIMBS_521BIT];
680  limb_t zout[NUM_LIMBS_521BIT];
681  limb_t z1[NUM_LIMBS_521BIT];
682 
683  // z1 = 1
684  z1[0] = 1;
685  memset(z1 + 1, 0, (NUM_LIMBS_521BIT - 1) * sizeof(limb_t));
686 
687  // Add the two points.
688  addPoint(xout, yout, zout, x1, y1, z1, x2, y2/*, z2*/);
689 
690  // Convert from Jacobian co-ordinates back into affine co-ordinates.
691  // x1 = xout * (zout^2)^-1, y1 = yout * (zout^3)^-1.
692  recip(z1, zout);
693  square(zout, z1);
694  mul(x1, xout, zout);
695  mul(zout, zout, z1);
696  mul(y1, yout, zout);
697 
698  // Clean up.
699  clean(xout);
700  clean(yout);
701  clean(zout);
702  clean(z1);
703 }
704 
714 bool P521::validate(const limb_t *x, const limb_t *y)
715 {
716  bool result;
717 
718  // If x or y is greater than or equal to 2^521 - 1, then the
719  // point is definitely not on the curve. Preserve timing by
720  // delaying the reporting of the result until later.
721  result = inRange(x);
722  result &= inRange(y);
723 
724  // We need to check that y^2 = x^3 - 3 * x + b mod 2^521 - 1.
725  limb_t t1[NUM_LIMBS_521BIT];
726  limb_t t2[NUM_LIMBS_521BIT];
727  square(t1, x);
728  mul(t1, t1, x);
729  mulLiteral(t2, x, 3);
730  sub(t1, t1, t2);
731  memcpy_P(t2, P521_b, sizeof(t2));
732  add(t1, t1, t2);
733  square(t2, y);
734  result &= secure_compare(t1, t2, sizeof(t1));
735  clean(t1);
736  clean(t2);
737  return result;
738 }
739 
748 bool P521::inRange(const limb_t *x)
749 {
750  // Do a trial subtraction of 2^521 - 1 from x, which is equivalent
751  // to adding 1 and subtracting 2^521. We only need the carry.
752  dlimb_t carry = 1;
753  limb_t word = 0;
754  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
755  carry += *x++;
756  word = (limb_t)carry;
757  carry >>= LIMB_BITS;
758  }
759 
760  // Determine the carry out from the low 521 bits.
761 #if BIGNUMBER_LIMB_8BIT
762  carry = (carry << 7) + (word >> 1);
763 #else
764  carry = (carry << (LIMB_BITS - 9)) + (word >> 9);
765 #endif
766 
767  // If the carry is zero, then x was in range. Otherwise it is out
768  // of range. Check for zero in a way that preserves constant timing.
769  word = (limb_t)(carry | (carry >> LIMB_BITS));
770  word = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - word) >> LIMB_BITS);
771  return (bool)word;
772 }
773 
783 void P521::reduce(limb_t *result, const limb_t *x)
784 {
785 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT
786  // According to NIST FIPS 186-4, we add the high 521 bits to the
787  // low 521 bits and then do a trial subtraction of 2^521 - 1.
788  // We do both in a single step. Subtracting 2^521 - 1 is equivalent
789  // to adding 1 and subtracting 2^521.
790  uint8_t index;
791  const limb_t *xl = x;
792  const limb_t *xh = x + NUM_LIMBS_521BIT;
793  limb_t *rr = result;
794  dlimb_t carry;
795  limb_t word = x[NUM_LIMBS_521BIT - 1];
796  carry = (word >> 9) + 1;
797  word &= 0x1FF;
798  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
799  carry += *xl++;
800  carry += ((dlimb_t)(*xh++)) << (LIMB_BITS - 9);
801  *rr++ = (limb_t)carry;
802  carry >>= LIMB_BITS;
803  }
804  carry += word;
805  carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << (LIMB_BITS - 9);
806  word = (limb_t)carry;
807  *rr = word;
808 
809  // If the carry out was 1, then mask it off and we have the answer.
810  // If the carry out was 0, then we need to add 2^521 - 1 back again.
811  // To preserve the timing we perform a conditional subtract of 1 and
812  // then mask off the high bits.
813  carry = ((word >> 9) ^ 0x01) & 0x01;
814  rr = result;
815  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
816  carry = ((dlimb_t)(*rr)) - carry;
817  *rr++ = (limb_t)carry;
818  carry = (carry >> LIMB_BITS) & 0x01;
819  }
820  *(--rr) &= 0x1FF;
821 #elif BIGNUMBER_LIMB_8BIT
822  // Same as above, but for 8-bit limbs.
823  uint8_t index;
824  const limb_t *xl = x;
825  const limb_t *xh = x + NUM_LIMBS_521BIT;
826  limb_t *rr = result;
827  dlimb_t carry;
828  limb_t word = x[NUM_LIMBS_521BIT - 1];
829  carry = (word >> 1) + 1;
830  word &= 0x01;
831  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
832  carry += *xl++;
833  carry += ((dlimb_t)(*xh++)) << 7;
834  *rr++ = (limb_t)carry;
835  carry >>= LIMB_BITS;
836  }
837  carry += word;
838  carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << 1;
839  word = (limb_t)carry;
840  *rr = word;
841  carry = ((word >> 1) ^ 0x01) & 0x01;
842  rr = result;
843  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
844  carry = ((dlimb_t)(*rr)) - carry;
845  *rr++ = (limb_t)carry;
846  carry = (carry >> LIMB_BITS) & 0x01;
847  }
848  *(--rr) &= 0x01;
849 #else
850  #error "Don't know how to reduce values mod 2^521 - 1"
851 #endif
852 }
853 
866 void P521::reduceQuick(limb_t *x)
867 {
868  // Perform a trial subtraction of 2^521 - 1 from x. This is
869  // equivalent to adding 1 and subtracting 2^521 - 1.
870  uint8_t index;
871  limb_t *xx = x;
872  dlimb_t carry = 1;
873  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
874  carry += *xx;
875  *xx++ = (limb_t)carry;
876  carry >>= LIMB_BITS;
877  }
878 
879  // If the carry out was 1, then mask it off and we have the answer.
880  // If the carry out was 0, then we need to add 2^521 - 1 back again.
881  // To preserve the timing we perform a conditional subtract of 1 and
882  // then mask off the high bits.
883 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT
884  carry = ((x[NUM_LIMBS_521BIT - 1] >> 9) ^ 0x01) & 0x01;
885  xx = x;
886  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
887  carry = ((dlimb_t)(*xx)) - carry;
888  *xx++ = (limb_t)carry;
889  carry = (carry >> LIMB_BITS) & 0x01;
890  }
891  *(--xx) &= 0x1FF;
892 #elif BIGNUMBER_LIMB_8BIT
893  carry = ((x[NUM_LIMBS_521BIT - 1] >> 1) ^ 0x01) & 0x01;
894  xx = x;
895  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
896  carry = ((dlimb_t)(*xx)) - carry;
897  *xx++ = (limb_t)carry;
898  carry = (carry >> LIMB_BITS) & 0x01;
899  }
900  *(--xx) &= 0x01;
901 #endif
902 }
903 
916 void P521::mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y)
917 {
918  uint8_t i, j;
919  dlimb_t carry;
920  limb_t word;
921  const limb_t *yy;
922  limb_t *rr;
923 
924  // Multiply the lowest word of x by y.
925  carry = 0;
926  word = x[0];
927  yy = y;
928  rr = result;
929  for (i = 0; i < NUM_LIMBS_521BIT; ++i) {
930  carry += ((dlimb_t)(*yy++)) * word;
931  *rr++ = (limb_t)carry;
932  carry >>= LIMB_BITS;
933  }
934  *rr = (limb_t)carry;
935 
936  // Multiply and add the remaining words of x by y.
937  for (i = 1; i < NUM_LIMBS_521BIT; ++i) {
938  word = x[i];
939  carry = 0;
940  yy = y;
941  rr = result + i;
942  for (j = 0; j < NUM_LIMBS_521BIT; ++j) {
943  carry += ((dlimb_t)(*yy++)) * word;
944  carry += *rr;
945  *rr++ = (limb_t)carry;
946  carry >>= LIMB_BITS;
947  }
948  *rr = (limb_t)carry;
949  }
950 }
951 
962 void P521::mul(limb_t *result, const limb_t *x, const limb_t *y)
963 {
964  limb_t temp[NUM_LIMBS_1042BIT];
965  mulNoReduce(temp, x, y);
966  reduce(result, temp);
967  strict_clean(temp);
968  crypto_feed_watchdog();
969 }
970 
990 void P521::mulLiteral(limb_t *result, const limb_t *x, limb_t y)
991 {
992  uint8_t index;
993  dlimb_t carry = 0;
994  const limb_t *xx = x;
995  limb_t *rr = result;
996 
997  // Multiply x by the literal and put it into the result array.
998  // We assume that y is small enough that overflow from the
999  // highest limb will not occur during this process.
1000  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1001  carry += ((dlimb_t)(*xx++)) * y;
1002  *rr++ = (limb_t)carry;
1003  carry >>= LIMB_BITS;
1004  }
1005 
1006  // Reduce the value modulo 2^521 - 1. The high half is only a
1007  // single limb, so we can short-cut some of reduce() here.
1008 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT
1009  limb_t word = result[NUM_LIMBS_521BIT - 1];
1010  carry = (word >> 9) + 1;
1011  word &= 0x1FF;
1012  rr = result;
1013  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
1014  carry += *rr;
1015  *rr++ = (limb_t)carry;
1016  carry >>= LIMB_BITS;
1017  }
1018  carry += word;
1019  word = (limb_t)carry;
1020  *rr = word;
1021 
1022  // If the carry out was 1, then mask it off and we have the answer.
1023  // If the carry out was 0, then we need to add 2^521 - 1 back again.
1024  // To preserve the timing we perform a conditional subtract of 1 and
1025  // then mask off the high bits.
1026  carry = ((word >> 9) ^ 0x01) & 0x01;
1027  rr = result;
1028  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1029  carry = ((dlimb_t)(*rr)) - carry;
1030  *rr++ = (limb_t)carry;
1031  carry = (carry >> LIMB_BITS) & 0x01;
1032  }
1033  *(--rr) &= 0x1FF;
1034 #elif BIGNUMBER_LIMB_8BIT
1035  // Same as above, but for 8-bit limbs.
1036  limb_t word = result[NUM_LIMBS_521BIT - 1];
1037  carry = (word >> 1) + 1;
1038  word &= 0x01;
1039  rr = result;
1040  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
1041  carry += *rr;
1042  *rr++ = (limb_t)carry;
1043  carry >>= LIMB_BITS;
1044  }
1045  carry += word;
1046  word = (limb_t)carry;
1047  *rr = word;
1048  carry = ((word >> 1) ^ 0x01) & 0x01;
1049  rr = result;
1050  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1051  carry = ((dlimb_t)(*rr)) - carry;
1052  *rr++ = (limb_t)carry;
1053  carry = (carry >> LIMB_BITS) & 0x01;
1054  }
1055  *(--rr) &= 0x01;
1056 #endif
1057 }
1058 
1069 void P521::add(limb_t *result, const limb_t *x, const limb_t *y)
1070 {
1071  dlimb_t carry = 0;
1072  limb_t *rr = result;
1073  for (uint8_t posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1074  carry += *x++;
1075  carry += *y++;
1076  *rr++ = (limb_t)carry;
1077  carry >>= LIMB_BITS;
1078  }
1079  reduceQuick(result);
1080 }
1081 
1092 void P521::sub(limb_t *result, const limb_t *x, const limb_t *y)
1093 {
1094  dlimb_t borrow;
1095  uint8_t posn;
1096  limb_t *rr = result;
1097 
1098  // Subtract y from x to generate the intermediate result.
1099  borrow = 0;
1100  for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1101  borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01);
1102  *rr++ = (limb_t)borrow;
1103  }
1104 
1105  // If we had a borrow, then the result has gone negative and we
1106  // have to add 2^521 - 1 to the result to make it positive again.
1107  // The top bits of "borrow" will be all 1's if there is a borrow
1108  // or it will be all 0's if there was no borrow. Easiest is to
1109  // conditionally subtract 1 and then mask off the high bits.
1110  rr = result;
1111  borrow = (borrow >> LIMB_BITS) & 1U;
1112  borrow = ((dlimb_t)(*rr)) - borrow;
1113  *rr++ = (limb_t)borrow;
1114  for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) {
1115  borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01);
1116  *rr++ = (limb_t)borrow;
1117  }
1118 #if BIGNUMBER_LIMB_8BIT
1119  *(--rr) &= 0x01;
1120 #else
1121  *(--rr) &= 0x1FF;
1122 #endif
1123 }
1124 
1140 void P521::dblPoint(limb_t *xout, limb_t *yout, limb_t *zout,
1141  const limb_t *xin, const limb_t *yin,
1142  const limb_t *zin)
1143 {
1144  limb_t alpha[NUM_LIMBS_521BIT];
1145  limb_t beta[NUM_LIMBS_521BIT];
1146  limb_t gamma[NUM_LIMBS_521BIT];
1147  limb_t delta[NUM_LIMBS_521BIT];
1148  limb_t tmp[NUM_LIMBS_521BIT];
1149 
1150  // Double the point. If it is the point at infinity (z = 0),
1151  // then zout will still be zero at the end of this process so
1152  // we don't need any special handling for that case.
1153  square(delta, zin); // delta = z^2
1154  square(gamma, yin); // gamma = y^2
1155  mul(beta, xin, gamma); // beta = x * gamma
1156  sub(tmp, xin, delta); // alpha = 3 * (x - delta) * (x + delta)
1157  mulLiteral(alpha, tmp, 3);
1158  add(tmp, xin, delta);
1159  mul(alpha, alpha, tmp);
1160  square(xout, alpha); // xout = alpha^2 - 8 * beta
1161  mulLiteral(tmp, beta, 8);
1162  sub(xout, xout, tmp);
1163  add(zout, yin, zin); // zout = (y + z)^2 - gamma - delta
1164  square(zout, zout);
1165  sub(zout, zout, gamma);
1166  sub(zout, zout, delta);
1167  mulLiteral(yout, beta, 4);// yout = alpha * (4 * beta - xout) - 8 * gamma^2
1168  sub(yout, yout, xout);
1169  mul(yout, alpha, yout);
1170  square(gamma, gamma);
1171  mulLiteral(gamma, gamma, 8);
1172  sub(yout, yout, gamma);
1173 
1174  // Clean up.
1175  strict_clean(alpha);
1176  strict_clean(beta);
1177  strict_clean(gamma);
1178  strict_clean(delta);
1179  strict_clean(tmp);
1180 }
1181 
1201 void P521::addPoint(limb_t *xout, limb_t *yout, limb_t *zout,
1202  const limb_t *x1, const limb_t *y1,
1203  const limb_t *z1, const limb_t *x2,
1204  const limb_t *y2)
1205 {
1206  limb_t z1z1[NUM_LIMBS_521BIT];
1207  limb_t u2[NUM_LIMBS_521BIT];
1208  limb_t s2[NUM_LIMBS_521BIT];
1209  limb_t h[NUM_LIMBS_521BIT];
1210  limb_t i[NUM_LIMBS_521BIT];
1211  limb_t j[NUM_LIMBS_521BIT];
1212  limb_t r[NUM_LIMBS_521BIT];
1213  limb_t v[NUM_LIMBS_521BIT];
1214 
1215  // Determine if the first value is the point-at-infinity identity element.
1216  // The second z value is always 1 so it cannot be the point-at-infinity.
1217  limb_t p1IsIdentity = BigNumberUtil::isZero(z1, NUM_LIMBS_521BIT);
1218 
1219  // Multiply the points, assuming that z2 = 1.
1220  square(z1z1, z1); // z1z1 = z1^2
1221  mul(u2, x2, z1z1); // u2 = x2 * z1z1
1222  mul(s2, y2, z1); // s2 = y2 * z1 * z1z1
1223  mul(s2, s2, z1z1);
1224  sub(h, u2, x1); // h = u2 - x1
1225  mulLiteral(i, h, 2); // i = (2 * h)^2
1226  square(i, i);
1227  sub(r, s2, y1); // r = 2 * (s2 - y1)
1228  add(r, r, r);
1229  mul(j, h, i); // j = h * i
1230  mul(v, x1, i); // v = x1 * i
1231  square(xout, r); // xout = r^2 - j - 2 * v
1232  sub(xout, xout, j);
1233  sub(xout, xout, v);
1234  sub(xout, xout, v);
1235  sub(yout, v, xout); // yout = r * (v - xout) - 2 * y1 * j
1236  mul(yout, r, yout);
1237  mul(j, y1, j);
1238  sub(yout, yout, j);
1239  sub(yout, yout, j);
1240  mul(zout, z1, h); // zout = 2 * z1 * h
1241  add(zout, zout, zout);
1242 
1243  // Select the answer to return. If (x1, y1, z1) was the identity,
1244  // then the answer is (x2, y2, z2). Otherwise it is (xout, yout, zout).
1245  // Conditionally move the second argument over the output if necessary.
1246  cmove(p1IsIdentity, xout, x2);
1247  cmove(p1IsIdentity, yout, y2);
1248  cmove1(p1IsIdentity, zout); // z2 = 1
1249 
1250  // Clean up.
1251  strict_clean(z1z1);
1252  strict_clean(u2);
1253  strict_clean(s2);
1254  strict_clean(h);
1255  strict_clean(i);
1256  strict_clean(j);
1257  strict_clean(r);
1258  strict_clean(v);
1259 }
1260 
1273 void P521::cmove(limb_t select, limb_t *x, const limb_t *y)
1274 {
1275  uint8_t posn;
1276  limb_t dummy;
1277  limb_t sel;
1278 
1279  // Turn "select" into an all-zeroes or all-ones mask. We don't care
1280  // which bit or bits is set in the original "select" value.
1281  sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
1282  --sel;
1283 
1284  // Move y into x based on "select".
1285  for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1286  dummy = sel & (*x ^ *y++);
1287  *x++ ^= dummy;
1288  }
1289 }
1290 
1302 void P521::cmove1(limb_t select, limb_t *x)
1303 {
1304  uint8_t posn;
1305  limb_t dummy;
1306  limb_t sel;
1307 
1308  // Turn "select" into an all-zeroes or all-ones mask. We don't care
1309  // which bit or bits is set in the original "select" value.
1310  sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
1311  --sel;
1312 
1313  // Move 1 into x based on "select".
1314  dummy = sel & (*x ^ 1);
1315  *x++ ^= dummy;
1316  for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) {
1317  dummy = sel & *x;
1318  *x++ ^= dummy;
1319  }
1320 }
1321 
1330 void P521::recip(limb_t *result, const limb_t *x)
1331 {
1332  limb_t t1[NUM_LIMBS_521BIT];
1333 
1334  // The reciprocal is the same as x ^ (p - 2) where p = 2^521 - 1.
1335  // The big-endian hexadecimal expansion of (p - 2) is:
1336  // 01FF FFFFFFF FFFFFFFF ... FFFFFFFF FFFFFFFD
1337  //
1338  // The naive implementation needs to do 2 multiplications per 1 bit and
1339  // 1 multiplication per 0 bit. We can improve upon this by creating a
1340  // pattern 1111 and then shifting and multiplying to create 11111111,
1341  // and then 1111111111111111, and so on for the top 512-bits.
1342 
1343  // Build a 4-bit pattern 1111 in the result.
1344  square(result, x);
1345  mul(result, result, x);
1346  square(result, result);
1347  mul(result, result, x);
1348  square(result, result);
1349  mul(result, result, x);
1350 
1351  // Shift and multiply by increasing powers of two. This turns
1352  // 1111 into 11111111, and then 1111111111111111, and so on.
1353  for (size_t power = 4; power <= 256; power <<= 1) {
1354  square(t1, result);
1355  for (size_t temp = 1; temp < power; ++temp)
1356  square(t1, t1);
1357  mul(result, result, t1);
1358  }
1359 
1360  // Handle the 9 lowest bits of (p - 2), 111111101, from highest to lowest.
1361  for (uint8_t index = 0; index < 7; ++index) {
1362  square(result, result);
1363  mul(result, result, x);
1364  }
1365  square(result, result);
1366  square(result, result);
1367  mul(result, result, x);
1368 
1369  // Clean up.
1370  clean(t1);
1371 }
1372 
1381 void P521::reduceQ(limb_t *result, const limb_t *r)
1382 {
1383  // Algorithm from: http://en.wikipedia.org/wiki/Barrett_reduction
1384  //
1385  // We assume that r is less than or equal to (q - 1)^2.
1386  //
1387  // We want to compute result = r mod q. Find the smallest k such
1388  // that 2^k > q. In our case, k = 521. Then set m = floor(4^k / q)
1389  // and let r = r - q * floor(m * r / 4^k). This will be the result
1390  // or it will be at most one subtraction of q away from the result.
1391  //
1392  // Note: m is a 522-bit number, which fits in the same number of limbs
1393  // as a 521-bit number assuming that limbs are 8 bits or more in size.
1394  static limb_t const numM[NUM_LIMBS_521BIT] PROGMEM = {
1395  LIMB_PAIR(0x6EC79BF7, 0x449048E1), LIMB_PAIR(0x7663B851, 0xC44A3647),
1396  LIMB_PAIR(0x08F65A2F, 0x8033FEB7), LIMB_PAIR(0x40D06994, 0xAE79787C),
1397  LIMB_PAIR(0x00000005, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000),
1398  LIMB_PAIR(0x00000000, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000),
1399  LIMB_PARTIAL(0x200)
1400  };
1401  limb_t temp[NUM_LIMBS_1042BIT + NUM_LIMBS_521BIT];
1402  limb_t temp2[NUM_LIMBS_521BIT];
1403 
1404  // Multiply r by m.
1405  BigNumberUtil::mul_P(temp, r, NUM_LIMBS_1042BIT, numM, NUM_LIMBS_521BIT);
1406 
1407  // Compute (m * r / 4^521) = (m * r / 2^1042).
1408 #if BIGNUMBER_LIMB_8BIT || BIGNUMBER_LIMB_16BIT
1409  dlimb_t carry = temp[NUM_LIMBS_BITS(1040)] >> 2;
1410  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
1411  carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1040) + index + 1])) << (LIMB_BITS - 2);
1412  temp2[index] = (limb_t)carry;
1413  carry >>= LIMB_BITS;
1414  }
1415 #elif BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT
1416  dlimb_t carry = temp[NUM_LIMBS_BITS(1024)] >> 18;
1417  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
1418  carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1024) + index + 1])) << (LIMB_BITS - 18);
1419  temp2[index] = (limb_t)carry;
1420  carry >>= LIMB_BITS;
1421  }
1422 #endif
1423 
1424  // Multiply (m * r) / 2^1042 by q and subtract it from r.
1425  // We can ignore the high words of the subtraction result
1426  // because they will all turn into zero after the subtraction.
1427  BigNumberUtil::mul_P(temp, temp2, NUM_LIMBS_521BIT,
1428  P521_q, NUM_LIMBS_521BIT);
1429  BigNumberUtil::sub(result, r, temp, NUM_LIMBS_521BIT);
1430 
1431  // Perform a trial subtraction of q from the result to reduce it.
1432  BigNumberUtil::reduceQuick_P(result, result, P521_q, NUM_LIMBS_521BIT);
1433 
1434  // Clean up and exit.
1435  clean(temp);
1436  clean(temp2);
1437 }
1438 
1449 void P521::mulQ(limb_t *result, const limb_t *x, const limb_t *y)
1450 {
1451  limb_t temp[NUM_LIMBS_1042BIT];
1452  mulNoReduce(temp, x, y);
1453  reduceQ(result, temp);
1454  strict_clean(temp);
1455 }
1456 
1465 void P521::recipQ(limb_t *result, const limb_t *x)
1466 {
1467  // Bottom 265 bits of q - 2. The top 256 bits are all-1's.
1468  static limb_t const P521_q_m2[] PROGMEM = {
1469  LIMB_PAIR(0x91386407, 0xbb6fb71e), LIMB_PAIR(0x899c47ae, 0x3bb5c9b8),
1470  LIMB_PAIR(0xf709a5d0, 0x7fcc0148), LIMB_PAIR(0xbf2f966b, 0x51868783),
1471  LIMB_PARTIAL(0x1fa)
1472  };
1473 
1474  // Raise x to the power of q - 2, mod q. We start with the top
1475  // 256 bits which are all-1's, using a similar technique to recip().
1476  limb_t t1[NUM_LIMBS_521BIT];
1477  mulQ(result, x, x);
1478  mulQ(result, result, x);
1479  mulQ(result, result, result);
1480  mulQ(result, result, x);
1481  mulQ(result, result, result);
1482  mulQ(result, result, x);
1483  for (size_t power = 4; power <= 128; power <<= 1) {
1484  mulQ(t1, result, result);
1485  for (size_t temp = 1; temp < power; ++temp)
1486  mulQ(t1, t1, t1);
1487  mulQ(result, result, t1);
1488  }
1489  clean(t1);
1490 
1491  // Deal with the bottom 265 bits from highest to lowest. Square for
1492  // each bit and multiply in x whenever there is a 1 bit. The timing
1493  // is based on the publicly-known constant q - 2, not on the value of x.
1494  size_t bit = 265;
1495  while (bit > 0) {
1496  --bit;
1497  mulQ(result, result, result);
1498  if (pgm_read_limb(&(P521_q_m2[bit / LIMB_BITS])) &
1499  (((limb_t)1) << (bit % LIMB_BITS))) {
1500  mulQ(result, result, x);
1501  }
1502  }
1503 }
1504 
1515 void P521::generateK(uint8_t k[66], const uint8_t hm[66],
1516  const uint8_t x[66], Hash *hash, uint64_t count)
1517 {
1518  size_t hlen = hash->hashSize();
1519  uint8_t V[64];
1520  uint8_t K[64];
1521  uint8_t marker;
1522 
1523  // If for some reason a hash function was supplied with more than
1524  // 512 bits of output, truncate hash values to the first 512 bits.
1525  // We cannot support more than this yet.
1526  if (hlen > 64)
1527  hlen = 64;
1528 
1529  // RFC 6979, Section 3.2, Step a. Hash the message, reduce modulo q,
1530  // and produce an octet string the same length as q, bits2octets(H(m)).
1531  // We support hashes up to 512 bits and q is a 521-bit number, so "hm"
1532  // is already the bits2octets(H(m)) value that we need.
1533 
1534  // Steps b and c. Set V to all-ones and K to all-zeroes.
1535  memset(V, 0x01, hlen);
1536  memset(K, 0x00, hlen);
1537 
1538  // Step d. K = HMAC_K(V || 0x00 || x || hm). We make a small
1539  // modification here to append the count value if it is non-zero.
1540  // We use this to generate a new k if we have to re-enter this
1541  // function because the previous one was rejected by sign().
1542  // This is slightly different to RFC 6979 which says that the
1543  // loop in step h below should be continued. That code path is
1544  // difficult to access, so instead modify K and V in steps d and f.
1545  // This alternative construction is compatible with the second
1546  // variant described in section 3.6 of RFC 6979.
1547  hash->resetHMAC(K, hlen);
1548  hash->update(V, hlen);
1549  marker = 0x00;
1550  hash->update(&marker, 1);
1551  hash->update(x, 66);
1552  hash->update(hm, 66);
1553  if (count)
1554  hash->update(&count, sizeof(count));
1555  hash->finalizeHMAC(K, hlen, K, hlen);
1556 
1557  // Step e. V = HMAC_K(V)
1558  hash->resetHMAC(K, hlen);
1559  hash->update(V, hlen);
1560  hash->finalizeHMAC(K, hlen, V, hlen);
1561 
1562  // Step f. K = HMAC_K(V || 0x01 || x || hm)
1563  hash->resetHMAC(K, hlen);
1564  hash->update(V, hlen);
1565  marker = 0x01;
1566  hash->update(&marker, 1);
1567  hash->update(x, 66);
1568  hash->update(hm, 66);
1569  if (count)
1570  hash->update(&count, sizeof(count));
1571  hash->finalizeHMAC(K, hlen, K, hlen);
1572 
1573  // Step g. V = HMAC_K(V)
1574  hash->resetHMAC(K, hlen);
1575  hash->update(V, hlen);
1576  hash->finalizeHMAC(K, hlen, V, hlen);
1577 
1578  // Step h. Generate candidate k values until we find what we want.
1579  for (;;) {
1580  // Step h.1 and h.2. Generate a string of 66 bytes in length.
1581  // T = empty
1582  // while (len(T) < 66)
1583  // V = HMAC_K(V)
1584  // T = T || V
1585  size_t posn = 0;
1586  while (posn < 66) {
1587  size_t temp = 66 - posn;
1588  if (temp > hlen)
1589  temp = hlen;
1590  hash->resetHMAC(K, hlen);
1591  hash->update(V, hlen);
1592  hash->finalizeHMAC(K, hlen, V, hlen);
1593  memcpy(k + posn, V, temp);
1594  posn += temp;
1595  }
1596 
1597  // Step h.3. k = bits2int(T) and exit the loop if k is not in
1598  // the range 1 to q - 1. Note: We have to extract the 521 most
1599  // significant bits of T, which means shifting it right by seven
1600  // bits to put it into the correct form.
1601  for (posn = 65; posn > 0; --posn)
1602  k[posn] = (k[posn - 1] << 1) | (k[posn] >> 7);
1603  k[0] >>= 7;
1604  if (isValidPrivateKey(k))
1605  break;
1606 
1607  // Generate new K and V values and try again.
1608  // K = HMAC_K(V || 0x00)
1609  // V = HMAC_K(V)
1610  hash->resetHMAC(K, hlen);
1611  hash->update(V, hlen);
1612  marker = 0x00;
1613  hash->update(&marker, 1);
1614  hash->finalizeHMAC(K, hlen, K, hlen);
1615  hash->resetHMAC(K, hlen);
1616  hash->update(V, hlen);
1617  hash->finalizeHMAC(K, hlen, V, hlen);
1618  }
1619 
1620  // Clean up.
1621  clean(V);
1622  clean(K);
1623 }
1624 
1637 void P521::generateK(uint8_t k[66], const uint8_t hm[66],
1638  const uint8_t x[66], uint64_t count)
1639 {
1640  SHA512 hash;
1641  generateK(k, hm, x, &hash, count);
1642 }
static void reduceQuick_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Reduces x modulo y using subtraction where y is in program memory.
static void unpackBE(limb_t *limbs, size_t count, const uint8_t *bytes, size_t len)
Unpacks the big-endian byte representation of a big number into a limb array.
static limb_t sub(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Subtracts one big number from another.
static limb_t add(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Adds two big numbers.
static void mul_P(limb_t *result, const limb_t *x, size_t xcount, const limb_t *y, size_t ycount)
Multiplies two big numbers where one is in program memory.
static limb_t isZero(const limb_t *x, size_t size)
Determine if a big number is zero.
static limb_t sub_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Subtracts one big number from another where one is in program memory.
static void packBE(uint8_t *bytes, size_t len, const limb_t *limbs, size_t count)
Packs the big-endian byte representation of a big number into a byte array.
Abstract base class for cryptographic hash algorithms.
Definition: Hash.h:30
virtual void finalize(void *hash, size_t len)=0
Finalizes the hashing process and returns the hash.
virtual void reset()=0
Resets the hash ready for a new hashing process.
virtual void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen)=0
Finalizes the HMAC hashing process and returns the hash.
virtual size_t hashSize() const =0
Size of the hash result from finalize().
virtual void resetHMAC(const void *key, size_t keyLen)=0
Resets the hash ready for a new HMAC hashing process.
virtual void update(const void *data, size_t len)=0
Updates the hash with more data.
static void derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66])
Derives the public key from a private key for P-521 signing operations.
Definition: P521.cpp:497
static bool isValidPrivateKey(const uint8_t privateKey[66])
Validates a private key value to ensure that it is between 1 and q - 1.
Definition: P521.cpp:524
static bool dh2(const uint8_t k[132], uint8_t f[66])
Performs phase 2 of an ECDH key exchange using P-521.
Definition: P521.cpp:229
static void sign(uint8_t signature[132], const uint8_t privateKey[66], const void *message, size_t len, Hash *hash=0)
Signs a message using a specific P-521 private key.
Definition: P521.cpp:276
static bool verify(const uint8_t signature[132], const uint8_t publicKey[132], const void *message, size_t len, Hash *hash=0)
Verifies a signature using a specific P-521 public key.
Definition: P521.cpp:373
static bool eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132])
Evaluates the curve function.
Definition: P521.cpp:135
static void dh1(uint8_t k[132], uint8_t f[66])
Performs phase 1 of an ECDH key exchange using P-521.
Definition: P521.cpp:208
static void generatePrivateKey(uint8_t privateKey[66])
Generates a private key for P-521 signing operations.
Definition: P521.cpp:466
static bool isValidPublicKey(const uint8_t publicKey[132])
Validates a public key to ensure that it is a valid curve point.
Definition: P521.cpp:564
void rand(uint8_t *data, size_t len)
Generates random bytes into a caller-supplied buffer.
Definition: RNG.cpp:660
SHA-512 hash algorithm.
Definition: SHA512.h:31