kernel: 5.4: import wireguard backport
[openwrt/openwrt.git] / target / linux / generic / backport-5.4 / 080-wireguard-0013-crypto-poly1305-move-core-routines-into-a-separate-l.patch
1 From 905432633564215220707ee97f64ffb249a029f2 Mon Sep 17 00:00:00 2001
2 From: Ard Biesheuvel <ardb@kernel.org>
3 Date: Fri, 8 Nov 2019 13:22:19 +0100
4 Subject: [PATCH 013/124] crypto: poly1305 - move core routines into a separate
5 library
6
7 commit 48ea8c6ebc96bc0990e12ee1c43d0832c23576bb upstream.
8
9 Move the core Poly1305 routines shared between the generic Poly1305
10 shash driver and the Adiantum and NHPoly1305 drivers into a separate
11 library so that using just this pieces does not pull in the crypto
12 API pieces of the generic Poly1305 routine.
13
14 In a subsequent patch, we will augment this generic library with
15 init/update/final routines so that Poyl1305 algorithm can be used
16 directly without the need for using the crypto API's shash abstraction.
17
18 Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
19 Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
20 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
21 ---
22 arch/x86/crypto/poly1305_glue.c | 2 +-
23 crypto/Kconfig | 5 +-
24 crypto/adiantum.c | 5 +-
25 crypto/nhpoly1305.c | 3 +-
26 crypto/poly1305_generic.c | 195 ++---------------------------
27 include/crypto/internal/poly1305.h | 67 ++++++++++
28 include/crypto/poly1305.h | 23 ----
29 lib/crypto/Kconfig | 3 +
30 lib/crypto/Makefile | 3 +
31 lib/crypto/poly1305.c | 158 +++++++++++++++++++++++
32 10 files changed, 248 insertions(+), 216 deletions(-)
33 create mode 100644 include/crypto/internal/poly1305.h
34 create mode 100644 lib/crypto/poly1305.c
35
36 --- a/arch/x86/crypto/poly1305_glue.c
37 +++ b/arch/x86/crypto/poly1305_glue.c
38 @@ -7,8 +7,8 @@
39
40 #include <crypto/algapi.h>
41 #include <crypto/internal/hash.h>
42 +#include <crypto/internal/poly1305.h>
43 #include <crypto/internal/simd.h>
44 -#include <crypto/poly1305.h>
45 #include <linux/crypto.h>
46 #include <linux/kernel.h>
47 #include <linux/module.h>
48 --- a/crypto/Kconfig
49 +++ b/crypto/Kconfig
50 @@ -446,7 +446,7 @@ config CRYPTO_KEYWRAP
51 config CRYPTO_NHPOLY1305
52 tristate
53 select CRYPTO_HASH
54 - select CRYPTO_POLY1305
55 + select CRYPTO_LIB_POLY1305_GENERIC
56
57 config CRYPTO_NHPOLY1305_SSE2
58 tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)"
59 @@ -467,7 +467,7 @@ config CRYPTO_NHPOLY1305_AVX2
60 config CRYPTO_ADIANTUM
61 tristate "Adiantum support"
62 select CRYPTO_CHACHA20
63 - select CRYPTO_POLY1305
64 + select CRYPTO_LIB_POLY1305_GENERIC
65 select CRYPTO_NHPOLY1305
66 select CRYPTO_MANAGER
67 help
68 @@ -686,6 +686,7 @@ config CRYPTO_GHASH
69 config CRYPTO_POLY1305
70 tristate "Poly1305 authenticator algorithm"
71 select CRYPTO_HASH
72 + select CRYPTO_LIB_POLY1305_GENERIC
73 help
74 Poly1305 authenticator algorithm, RFC7539.
75
76 --- a/crypto/adiantum.c
77 +++ b/crypto/adiantum.c
78 @@ -33,6 +33,7 @@
79 #include <crypto/b128ops.h>
80 #include <crypto/chacha.h>
81 #include <crypto/internal/hash.h>
82 +#include <crypto/internal/poly1305.h>
83 #include <crypto/internal/skcipher.h>
84 #include <crypto/nhpoly1305.h>
85 #include <crypto/scatterwalk.h>
86 @@ -242,11 +243,11 @@ static void adiantum_hash_header(struct
87
88 BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
89 poly1305_core_blocks(&state, &tctx->header_hash_key,
90 - &header, sizeof(header) / POLY1305_BLOCK_SIZE);
91 + &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
92
93 BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
94 poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
95 - TWEAK_SIZE / POLY1305_BLOCK_SIZE);
96 + TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
97
98 poly1305_core_emit(&state, &rctx->header_hash);
99 }
100 --- a/crypto/nhpoly1305.c
101 +++ b/crypto/nhpoly1305.c
102 @@ -33,6 +33,7 @@
103 #include <asm/unaligned.h>
104 #include <crypto/algapi.h>
105 #include <crypto/internal/hash.h>
106 +#include <crypto/internal/poly1305.h>
107 #include <crypto/nhpoly1305.h>
108 #include <linux/crypto.h>
109 #include <linux/kernel.h>
110 @@ -78,7 +79,7 @@ static void process_nh_hash_value(struct
111 BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
112
113 poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
114 - NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
115 + NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1);
116 }
117
118 /*
119 --- a/crypto/poly1305_generic.c
120 +++ b/crypto/poly1305_generic.c
121 @@ -13,27 +13,12 @@
122
123 #include <crypto/algapi.h>
124 #include <crypto/internal/hash.h>
125 -#include <crypto/poly1305.h>
126 +#include <crypto/internal/poly1305.h>
127 #include <linux/crypto.h>
128 #include <linux/kernel.h>
129 #include <linux/module.h>
130 #include <asm/unaligned.h>
131
132 -static inline u64 mlt(u64 a, u64 b)
133 -{
134 - return a * b;
135 -}
136 -
137 -static inline u32 sr(u64 v, u_char n)
138 -{
139 - return v >> n;
140 -}
141 -
142 -static inline u32 and(u32 v, u32 mask)
143 -{
144 - return v & mask;
145 -}
146 -
147 int crypto_poly1305_init(struct shash_desc *desc)
148 {
149 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
150 @@ -47,124 +32,8 @@ int crypto_poly1305_init(struct shash_de
151 }
152 EXPORT_SYMBOL_GPL(crypto_poly1305_init);
153
154 -void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
155 -{
156 - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
157 - key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
158 - key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
159 - key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
160 - key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
161 - key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
162 -}
163 -EXPORT_SYMBOL_GPL(poly1305_core_setkey);
164 -
165 -/*
166 - * Poly1305 requires a unique key for each tag, which implies that we can't set
167 - * it on the tfm that gets accessed by multiple users simultaneously. Instead we
168 - * expect the key as the first 32 bytes in the update() call.
169 - */
170 -unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
171 - const u8 *src, unsigned int srclen)
172 -{
173 - if (!dctx->sset) {
174 - if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
175 - poly1305_core_setkey(&dctx->r, src);
176 - src += POLY1305_BLOCK_SIZE;
177 - srclen -= POLY1305_BLOCK_SIZE;
178 - dctx->rset = true;
179 - }
180 - if (srclen >= POLY1305_BLOCK_SIZE) {
181 - dctx->s[0] = get_unaligned_le32(src + 0);
182 - dctx->s[1] = get_unaligned_le32(src + 4);
183 - dctx->s[2] = get_unaligned_le32(src + 8);
184 - dctx->s[3] = get_unaligned_le32(src + 12);
185 - src += POLY1305_BLOCK_SIZE;
186 - srclen -= POLY1305_BLOCK_SIZE;
187 - dctx->sset = true;
188 - }
189 - }
190 - return srclen;
191 -}
192 -EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
193 -
194 -static void poly1305_blocks_internal(struct poly1305_state *state,
195 - const struct poly1305_key *key,
196 - const void *src, unsigned int nblocks,
197 - u32 hibit)
198 -{
199 - u32 r0, r1, r2, r3, r4;
200 - u32 s1, s2, s3, s4;
201 - u32 h0, h1, h2, h3, h4;
202 - u64 d0, d1, d2, d3, d4;
203 -
204 - if (!nblocks)
205 - return;
206 -
207 - r0 = key->r[0];
208 - r1 = key->r[1];
209 - r2 = key->r[2];
210 - r3 = key->r[3];
211 - r4 = key->r[4];
212 -
213 - s1 = r1 * 5;
214 - s2 = r2 * 5;
215 - s3 = r3 * 5;
216 - s4 = r4 * 5;
217 -
218 - h0 = state->h[0];
219 - h1 = state->h[1];
220 - h2 = state->h[2];
221 - h3 = state->h[3];
222 - h4 = state->h[4];
223 -
224 - do {
225 - /* h += m[i] */
226 - h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
227 - h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
228 - h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
229 - h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
230 - h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
231 -
232 - /* h *= r */
233 - d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
234 - mlt(h3, s2) + mlt(h4, s1);
235 - d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
236 - mlt(h3, s3) + mlt(h4, s2);
237 - d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
238 - mlt(h3, s4) + mlt(h4, s3);
239 - d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
240 - mlt(h3, r0) + mlt(h4, s4);
241 - d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
242 - mlt(h3, r1) + mlt(h4, r0);
243 -
244 - /* (partial) h %= p */
245 - d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
246 - d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
247 - d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
248 - d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
249 - h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
250 - h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
251 -
252 - src += POLY1305_BLOCK_SIZE;
253 - } while (--nblocks);
254 -
255 - state->h[0] = h0;
256 - state->h[1] = h1;
257 - state->h[2] = h2;
258 - state->h[3] = h3;
259 - state->h[4] = h4;
260 -}
261 -
262 -void poly1305_core_blocks(struct poly1305_state *state,
263 - const struct poly1305_key *key,
264 - const void *src, unsigned int nblocks)
265 -{
266 - poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
267 -}
268 -EXPORT_SYMBOL_GPL(poly1305_core_blocks);
269 -
270 -static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
271 - const u8 *src, unsigned int srclen, u32 hibit)
272 +static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
273 + unsigned int srclen)
274 {
275 unsigned int datalen;
276
277 @@ -174,8 +43,8 @@ static void poly1305_blocks(struct poly1
278 srclen = datalen;
279 }
280
281 - poly1305_blocks_internal(&dctx->h, &dctx->r,
282 - src, srclen / POLY1305_BLOCK_SIZE, hibit);
283 + poly1305_core_blocks(&dctx->h, &dctx->r, src,
284 + srclen / POLY1305_BLOCK_SIZE, 1);
285 }
286
287 int crypto_poly1305_update(struct shash_desc *desc,
288 @@ -193,13 +62,13 @@ int crypto_poly1305_update(struct shash_
289
290 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
291 poly1305_blocks(dctx, dctx->buf,
292 - POLY1305_BLOCK_SIZE, 1 << 24);
293 + POLY1305_BLOCK_SIZE);
294 dctx->buflen = 0;
295 }
296 }
297
298 if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
299 - poly1305_blocks(dctx, src, srclen, 1 << 24);
300 + poly1305_blocks(dctx, src, srclen);
301 src += srclen - (srclen % POLY1305_BLOCK_SIZE);
302 srclen %= POLY1305_BLOCK_SIZE;
303 }
304 @@ -213,54 +82,6 @@ int crypto_poly1305_update(struct shash_
305 }
306 EXPORT_SYMBOL_GPL(crypto_poly1305_update);
307
308 -void poly1305_core_emit(const struct poly1305_state *state, void *dst)
309 -{
310 - u32 h0, h1, h2, h3, h4;
311 - u32 g0, g1, g2, g3, g4;
312 - u32 mask;
313 -
314 - /* fully carry h */
315 - h0 = state->h[0];
316 - h1 = state->h[1];
317 - h2 = state->h[2];
318 - h3 = state->h[3];
319 - h4 = state->h[4];
320 -
321 - h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
322 - h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
323 - h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
324 - h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
325 - h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
326 -
327 - /* compute h + -p */
328 - g0 = h0 + 5;
329 - g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
330 - g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
331 - g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
332 - g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
333 -
334 - /* select h if h < p, or h + -p if h >= p */
335 - mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
336 - g0 &= mask;
337 - g1 &= mask;
338 - g2 &= mask;
339 - g3 &= mask;
340 - g4 &= mask;
341 - mask = ~mask;
342 - h0 = (h0 & mask) | g0;
343 - h1 = (h1 & mask) | g1;
344 - h2 = (h2 & mask) | g2;
345 - h3 = (h3 & mask) | g3;
346 - h4 = (h4 & mask) | g4;
347 -
348 - /* h = h % (2^128) */
349 - put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
350 - put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
351 - put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
352 - put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
353 -}
354 -EXPORT_SYMBOL_GPL(poly1305_core_emit);
355 -
356 int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
357 {
358 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
359 @@ -274,7 +95,7 @@ int crypto_poly1305_final(struct shash_d
360 dctx->buf[dctx->buflen++] = 1;
361 memset(dctx->buf + dctx->buflen, 0,
362 POLY1305_BLOCK_SIZE - dctx->buflen);
363 - poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
364 + poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
365 }
366
367 poly1305_core_emit(&dctx->h, digest);
368 --- /dev/null
369 +++ b/include/crypto/internal/poly1305.h
370 @@ -0,0 +1,67 @@
371 +/* SPDX-License-Identifier: GPL-2.0 */
372 +/*
373 + * Common values for the Poly1305 algorithm
374 + */
375 +
376 +#ifndef _CRYPTO_INTERNAL_POLY1305_H
377 +#define _CRYPTO_INTERNAL_POLY1305_H
378 +
379 +#include <asm/unaligned.h>
380 +#include <linux/types.h>
381 +#include <crypto/poly1305.h>
382 +
383 +struct shash_desc;
384 +
385 +/*
386 + * Poly1305 core functions. These implement the ε-almost-∆-universal hash
387 + * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
388 + * ("s key") at the end. They also only support block-aligned inputs.
389 + */
390 +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
391 +static inline void poly1305_core_init(struct poly1305_state *state)
392 +{
393 + *state = (struct poly1305_state){};
394 +}
395 +
396 +void poly1305_core_blocks(struct poly1305_state *state,
397 + const struct poly1305_key *key, const void *src,
398 + unsigned int nblocks, u32 hibit);
399 +void poly1305_core_emit(const struct poly1305_state *state, void *dst);
400 +
401 +/* Crypto API helper functions for the Poly1305 MAC */
402 +int crypto_poly1305_init(struct shash_desc *desc);
403 +
404 +int crypto_poly1305_update(struct shash_desc *desc,
405 + const u8 *src, unsigned int srclen);
406 +int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
407 +
408 +/*
409 + * Poly1305 requires a unique key for each tag, which implies that we can't set
410 + * it on the tfm that gets accessed by multiple users simultaneously. Instead we
411 + * expect the key as the first 32 bytes in the update() call.
412 + */
413 +static inline
414 +unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
415 + const u8 *src, unsigned int srclen)
416 +{
417 + if (!dctx->sset) {
418 + if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
419 + poly1305_core_setkey(&dctx->r, src);
420 + src += POLY1305_BLOCK_SIZE;
421 + srclen -= POLY1305_BLOCK_SIZE;
422 + dctx->rset = true;
423 + }
424 + if (srclen >= POLY1305_BLOCK_SIZE) {
425 + dctx->s[0] = get_unaligned_le32(src + 0);
426 + dctx->s[1] = get_unaligned_le32(src + 4);
427 + dctx->s[2] = get_unaligned_le32(src + 8);
428 + dctx->s[3] = get_unaligned_le32(src + 12);
429 + src += POLY1305_BLOCK_SIZE;
430 + srclen -= POLY1305_BLOCK_SIZE;
431 + dctx->sset = true;
432 + }
433 + }
434 + return srclen;
435 +}
436 +
437 +#endif
438 --- a/include/crypto/poly1305.h
439 +++ b/include/crypto/poly1305.h
440 @@ -38,27 +38,4 @@ struct poly1305_desc_ctx {
441 bool sset;
442 };
443
444 -/*
445 - * Poly1305 core functions. These implement the ε-almost-∆-universal hash
446 - * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
447 - * ("s key") at the end. They also only support block-aligned inputs.
448 - */
449 -void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
450 -static inline void poly1305_core_init(struct poly1305_state *state)
451 -{
452 - memset(state->h, 0, sizeof(state->h));
453 -}
454 -void poly1305_core_blocks(struct poly1305_state *state,
455 - const struct poly1305_key *key,
456 - const void *src, unsigned int nblocks);
457 -void poly1305_core_emit(const struct poly1305_state *state, void *dst);
458 -
459 -/* Crypto API helper functions for the Poly1305 MAC */
460 -int crypto_poly1305_init(struct shash_desc *desc);
461 -unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
462 - const u8 *src, unsigned int srclen);
463 -int crypto_poly1305_update(struct shash_desc *desc,
464 - const u8 *src, unsigned int srclen);
465 -int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
466 -
467 #endif
468 --- a/lib/crypto/Kconfig
469 +++ b/lib/crypto/Kconfig
470 @@ -37,5 +37,8 @@ config CRYPTO_LIB_CHACHA
471 config CRYPTO_LIB_DES
472 tristate
473
474 +config CRYPTO_LIB_POLY1305_GENERIC
475 + tristate
476 +
477 config CRYPTO_LIB_SHA256
478 tristate
479 --- a/lib/crypto/Makefile
480 +++ b/lib/crypto/Makefile
481 @@ -13,5 +13,8 @@ libarc4-y := arc4.o
482 obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
483 libdes-y := des.o
484
485 +obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o
486 +libpoly1305-y := poly1305.o
487 +
488 obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
489 libsha256-y := sha256.o
490 --- /dev/null
491 +++ b/lib/crypto/poly1305.c
492 @@ -0,0 +1,158 @@
493 +// SPDX-License-Identifier: GPL-2.0-or-later
494 +/*
495 + * Poly1305 authenticator algorithm, RFC7539
496 + *
497 + * Copyright (C) 2015 Martin Willi
498 + *
499 + * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
500 + */
501 +
502 +#include <crypto/internal/poly1305.h>
503 +#include <linux/kernel.h>
504 +#include <linux/module.h>
505 +#include <asm/unaligned.h>
506 +
507 +static inline u64 mlt(u64 a, u64 b)
508 +{
509 + return a * b;
510 +}
511 +
512 +static inline u32 sr(u64 v, u_char n)
513 +{
514 + return v >> n;
515 +}
516 +
517 +static inline u32 and(u32 v, u32 mask)
518 +{
519 + return v & mask;
520 +}
521 +
522 +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
523 +{
524 + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
525 + key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
526 + key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
527 + key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
528 + key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
529 + key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
530 +}
531 +EXPORT_SYMBOL_GPL(poly1305_core_setkey);
532 +
533 +void poly1305_core_blocks(struct poly1305_state *state,
534 + const struct poly1305_key *key, const void *src,
535 + unsigned int nblocks, u32 hibit)
536 +{
537 + u32 r0, r1, r2, r3, r4;
538 + u32 s1, s2, s3, s4;
539 + u32 h0, h1, h2, h3, h4;
540 + u64 d0, d1, d2, d3, d4;
541 +
542 + if (!nblocks)
543 + return;
544 +
545 + r0 = key->r[0];
546 + r1 = key->r[1];
547 + r2 = key->r[2];
548 + r3 = key->r[3];
549 + r4 = key->r[4];
550 +
551 + s1 = r1 * 5;
552 + s2 = r2 * 5;
553 + s3 = r3 * 5;
554 + s4 = r4 * 5;
555 +
556 + h0 = state->h[0];
557 + h1 = state->h[1];
558 + h2 = state->h[2];
559 + h3 = state->h[3];
560 + h4 = state->h[4];
561 +
562 + do {
563 + /* h += m[i] */
564 + h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
565 + h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
566 + h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
567 + h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
568 + h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
569 +
570 + /* h *= r */
571 + d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
572 + mlt(h3, s2) + mlt(h4, s1);
573 + d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
574 + mlt(h3, s3) + mlt(h4, s2);
575 + d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
576 + mlt(h3, s4) + mlt(h4, s3);
577 + d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
578 + mlt(h3, r0) + mlt(h4, s4);
579 + d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
580 + mlt(h3, r1) + mlt(h4, r0);
581 +
582 + /* (partial) h %= p */
583 + d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
584 + d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
585 + d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
586 + d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
587 + h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
588 + h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
589 +
590 + src += POLY1305_BLOCK_SIZE;
591 + } while (--nblocks);
592 +
593 + state->h[0] = h0;
594 + state->h[1] = h1;
595 + state->h[2] = h2;
596 + state->h[3] = h3;
597 + state->h[4] = h4;
598 +}
599 +EXPORT_SYMBOL_GPL(poly1305_core_blocks);
600 +
601 +void poly1305_core_emit(const struct poly1305_state *state, void *dst)
602 +{
603 + u32 h0, h1, h2, h3, h4;
604 + u32 g0, g1, g2, g3, g4;
605 + u32 mask;
606 +
607 + /* fully carry h */
608 + h0 = state->h[0];
609 + h1 = state->h[1];
610 + h2 = state->h[2];
611 + h3 = state->h[3];
612 + h4 = state->h[4];
613 +
614 + h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
615 + h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
616 + h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
617 + h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
618 + h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
619 +
620 + /* compute h + -p */
621 + g0 = h0 + 5;
622 + g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
623 + g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
624 + g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
625 + g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
626 +
627 + /* select h if h < p, or h + -p if h >= p */
628 + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
629 + g0 &= mask;
630 + g1 &= mask;
631 + g2 &= mask;
632 + g3 &= mask;
633 + g4 &= mask;
634 + mask = ~mask;
635 + h0 = (h0 & mask) | g0;
636 + h1 = (h1 & mask) | g1;
637 + h2 = (h2 & mask) | g2;
638 + h3 = (h3 & mask) | g3;
639 + h4 = (h4 & mask) | g4;
640 +
641 + /* h = h % (2^128) */
642 + put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
643 + put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
644 + put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
645 + put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
646 +}
647 +EXPORT_SYMBOL_GPL(poly1305_core_emit);
648 +
649 +MODULE_LICENSE("GPL");
650 +MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");