kernel: 5.4: import wireguard backport
[openwrt/openwrt.git] / target / linux / generic / backport-5.4 / 080-wireguard-0013-crypto-poly1305-move-core-routines-into-a-separate-l.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Ard Biesheuvel <ardb@kernel.org>
3 Date: Fri, 8 Nov 2019 13:22:19 +0100
4 Subject: [PATCH] crypto: poly1305 - move core routines into a separate library
5
6 commit 48ea8c6ebc96bc0990e12ee1c43d0832c23576bb upstream.
7
8 Move the core Poly1305 routines shared between the generic Poly1305
9 shash driver and the Adiantum and NHPoly1305 drivers into a separate
10 library so that using just this pieces does not pull in the crypto
11 API pieces of the generic Poly1305 routine.
12
13 In a subsequent patch, we will augment this generic library with
14 init/update/final routines so that Poyl1305 algorithm can be used
15 directly without the need for using the crypto API's shash abstraction.
16
17 Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
18 Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
19 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
20 ---
21 arch/x86/crypto/poly1305_glue.c | 2 +-
22 crypto/Kconfig | 5 +-
23 crypto/adiantum.c | 5 +-
24 crypto/nhpoly1305.c | 3 +-
25 crypto/poly1305_generic.c | 195 ++---------------------------
26 include/crypto/internal/poly1305.h | 67 ++++++++++
27 include/crypto/poly1305.h | 23 ----
28 lib/crypto/Kconfig | 3 +
29 lib/crypto/Makefile | 3 +
30 lib/crypto/poly1305.c | 158 +++++++++++++++++++++++
31 10 files changed, 248 insertions(+), 216 deletions(-)
32 create mode 100644 include/crypto/internal/poly1305.h
33 create mode 100644 lib/crypto/poly1305.c
34
35 --- a/arch/x86/crypto/poly1305_glue.c
36 +++ b/arch/x86/crypto/poly1305_glue.c
37 @@ -7,8 +7,8 @@
38
39 #include <crypto/algapi.h>
40 #include <crypto/internal/hash.h>
41 +#include <crypto/internal/poly1305.h>
42 #include <crypto/internal/simd.h>
43 -#include <crypto/poly1305.h>
44 #include <linux/crypto.h>
45 #include <linux/kernel.h>
46 #include <linux/module.h>
47 --- a/crypto/Kconfig
48 +++ b/crypto/Kconfig
49 @@ -446,7 +446,7 @@ config CRYPTO_KEYWRAP
50 config CRYPTO_NHPOLY1305
51 tristate
52 select CRYPTO_HASH
53 - select CRYPTO_POLY1305
54 + select CRYPTO_LIB_POLY1305_GENERIC
55
56 config CRYPTO_NHPOLY1305_SSE2
57 tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)"
58 @@ -467,7 +467,7 @@ config CRYPTO_NHPOLY1305_AVX2
59 config CRYPTO_ADIANTUM
60 tristate "Adiantum support"
61 select CRYPTO_CHACHA20
62 - select CRYPTO_POLY1305
63 + select CRYPTO_LIB_POLY1305_GENERIC
64 select CRYPTO_NHPOLY1305
65 select CRYPTO_MANAGER
66 help
67 @@ -686,6 +686,7 @@ config CRYPTO_GHASH
68 config CRYPTO_POLY1305
69 tristate "Poly1305 authenticator algorithm"
70 select CRYPTO_HASH
71 + select CRYPTO_LIB_POLY1305_GENERIC
72 help
73 Poly1305 authenticator algorithm, RFC7539.
74
75 --- a/crypto/adiantum.c
76 +++ b/crypto/adiantum.c
77 @@ -33,6 +33,7 @@
78 #include <crypto/b128ops.h>
79 #include <crypto/chacha.h>
80 #include <crypto/internal/hash.h>
81 +#include <crypto/internal/poly1305.h>
82 #include <crypto/internal/skcipher.h>
83 #include <crypto/nhpoly1305.h>
84 #include <crypto/scatterwalk.h>
85 @@ -242,11 +243,11 @@ static void adiantum_hash_header(struct
86
87 BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
88 poly1305_core_blocks(&state, &tctx->header_hash_key,
89 - &header, sizeof(header) / POLY1305_BLOCK_SIZE);
90 + &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
91
92 BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
93 poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
94 - TWEAK_SIZE / POLY1305_BLOCK_SIZE);
95 + TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
96
97 poly1305_core_emit(&state, &rctx->header_hash);
98 }
99 --- a/crypto/nhpoly1305.c
100 +++ b/crypto/nhpoly1305.c
101 @@ -33,6 +33,7 @@
102 #include <asm/unaligned.h>
103 #include <crypto/algapi.h>
104 #include <crypto/internal/hash.h>
105 +#include <crypto/internal/poly1305.h>
106 #include <crypto/nhpoly1305.h>
107 #include <linux/crypto.h>
108 #include <linux/kernel.h>
109 @@ -78,7 +79,7 @@ static void process_nh_hash_value(struct
110 BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
111
112 poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
113 - NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
114 + NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1);
115 }
116
117 /*
118 --- a/crypto/poly1305_generic.c
119 +++ b/crypto/poly1305_generic.c
120 @@ -13,27 +13,12 @@
121
122 #include <crypto/algapi.h>
123 #include <crypto/internal/hash.h>
124 -#include <crypto/poly1305.h>
125 +#include <crypto/internal/poly1305.h>
126 #include <linux/crypto.h>
127 #include <linux/kernel.h>
128 #include <linux/module.h>
129 #include <asm/unaligned.h>
130
131 -static inline u64 mlt(u64 a, u64 b)
132 -{
133 - return a * b;
134 -}
135 -
136 -static inline u32 sr(u64 v, u_char n)
137 -{
138 - return v >> n;
139 -}
140 -
141 -static inline u32 and(u32 v, u32 mask)
142 -{
143 - return v & mask;
144 -}
145 -
146 int crypto_poly1305_init(struct shash_desc *desc)
147 {
148 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
149 @@ -47,124 +32,8 @@ int crypto_poly1305_init(struct shash_de
150 }
151 EXPORT_SYMBOL_GPL(crypto_poly1305_init);
152
153 -void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
154 -{
155 - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
156 - key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
157 - key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
158 - key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
159 - key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
160 - key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
161 -}
162 -EXPORT_SYMBOL_GPL(poly1305_core_setkey);
163 -
164 -/*
165 - * Poly1305 requires a unique key for each tag, which implies that we can't set
166 - * it on the tfm that gets accessed by multiple users simultaneously. Instead we
167 - * expect the key as the first 32 bytes in the update() call.
168 - */
169 -unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
170 - const u8 *src, unsigned int srclen)
171 -{
172 - if (!dctx->sset) {
173 - if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
174 - poly1305_core_setkey(&dctx->r, src);
175 - src += POLY1305_BLOCK_SIZE;
176 - srclen -= POLY1305_BLOCK_SIZE;
177 - dctx->rset = true;
178 - }
179 - if (srclen >= POLY1305_BLOCK_SIZE) {
180 - dctx->s[0] = get_unaligned_le32(src + 0);
181 - dctx->s[1] = get_unaligned_le32(src + 4);
182 - dctx->s[2] = get_unaligned_le32(src + 8);
183 - dctx->s[3] = get_unaligned_le32(src + 12);
184 - src += POLY1305_BLOCK_SIZE;
185 - srclen -= POLY1305_BLOCK_SIZE;
186 - dctx->sset = true;
187 - }
188 - }
189 - return srclen;
190 -}
191 -EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
192 -
193 -static void poly1305_blocks_internal(struct poly1305_state *state,
194 - const struct poly1305_key *key,
195 - const void *src, unsigned int nblocks,
196 - u32 hibit)
197 -{
198 - u32 r0, r1, r2, r3, r4;
199 - u32 s1, s2, s3, s4;
200 - u32 h0, h1, h2, h3, h4;
201 - u64 d0, d1, d2, d3, d4;
202 -
203 - if (!nblocks)
204 - return;
205 -
206 - r0 = key->r[0];
207 - r1 = key->r[1];
208 - r2 = key->r[2];
209 - r3 = key->r[3];
210 - r4 = key->r[4];
211 -
212 - s1 = r1 * 5;
213 - s2 = r2 * 5;
214 - s3 = r3 * 5;
215 - s4 = r4 * 5;
216 -
217 - h0 = state->h[0];
218 - h1 = state->h[1];
219 - h2 = state->h[2];
220 - h3 = state->h[3];
221 - h4 = state->h[4];
222 -
223 - do {
224 - /* h += m[i] */
225 - h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
226 - h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
227 - h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
228 - h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
229 - h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
230 -
231 - /* h *= r */
232 - d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
233 - mlt(h3, s2) + mlt(h4, s1);
234 - d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
235 - mlt(h3, s3) + mlt(h4, s2);
236 - d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
237 - mlt(h3, s4) + mlt(h4, s3);
238 - d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
239 - mlt(h3, r0) + mlt(h4, s4);
240 - d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
241 - mlt(h3, r1) + mlt(h4, r0);
242 -
243 - /* (partial) h %= p */
244 - d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
245 - d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
246 - d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
247 - d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
248 - h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
249 - h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
250 -
251 - src += POLY1305_BLOCK_SIZE;
252 - } while (--nblocks);
253 -
254 - state->h[0] = h0;
255 - state->h[1] = h1;
256 - state->h[2] = h2;
257 - state->h[3] = h3;
258 - state->h[4] = h4;
259 -}
260 -
261 -void poly1305_core_blocks(struct poly1305_state *state,
262 - const struct poly1305_key *key,
263 - const void *src, unsigned int nblocks)
264 -{
265 - poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
266 -}
267 -EXPORT_SYMBOL_GPL(poly1305_core_blocks);
268 -
269 -static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
270 - const u8 *src, unsigned int srclen, u32 hibit)
271 +static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
272 + unsigned int srclen)
273 {
274 unsigned int datalen;
275
276 @@ -174,8 +43,8 @@ static void poly1305_blocks(struct poly1
277 srclen = datalen;
278 }
279
280 - poly1305_blocks_internal(&dctx->h, &dctx->r,
281 - src, srclen / POLY1305_BLOCK_SIZE, hibit);
282 + poly1305_core_blocks(&dctx->h, &dctx->r, src,
283 + srclen / POLY1305_BLOCK_SIZE, 1);
284 }
285
286 int crypto_poly1305_update(struct shash_desc *desc,
287 @@ -193,13 +62,13 @@ int crypto_poly1305_update(struct shash_
288
289 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
290 poly1305_blocks(dctx, dctx->buf,
291 - POLY1305_BLOCK_SIZE, 1 << 24);
292 + POLY1305_BLOCK_SIZE);
293 dctx->buflen = 0;
294 }
295 }
296
297 if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
298 - poly1305_blocks(dctx, src, srclen, 1 << 24);
299 + poly1305_blocks(dctx, src, srclen);
300 src += srclen - (srclen % POLY1305_BLOCK_SIZE);
301 srclen %= POLY1305_BLOCK_SIZE;
302 }
303 @@ -213,54 +82,6 @@ int crypto_poly1305_update(struct shash_
304 }
305 EXPORT_SYMBOL_GPL(crypto_poly1305_update);
306
307 -void poly1305_core_emit(const struct poly1305_state *state, void *dst)
308 -{
309 - u32 h0, h1, h2, h3, h4;
310 - u32 g0, g1, g2, g3, g4;
311 - u32 mask;
312 -
313 - /* fully carry h */
314 - h0 = state->h[0];
315 - h1 = state->h[1];
316 - h2 = state->h[2];
317 - h3 = state->h[3];
318 - h4 = state->h[4];
319 -
320 - h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
321 - h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
322 - h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
323 - h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
324 - h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
325 -
326 - /* compute h + -p */
327 - g0 = h0 + 5;
328 - g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
329 - g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
330 - g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
331 - g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
332 -
333 - /* select h if h < p, or h + -p if h >= p */
334 - mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
335 - g0 &= mask;
336 - g1 &= mask;
337 - g2 &= mask;
338 - g3 &= mask;
339 - g4 &= mask;
340 - mask = ~mask;
341 - h0 = (h0 & mask) | g0;
342 - h1 = (h1 & mask) | g1;
343 - h2 = (h2 & mask) | g2;
344 - h3 = (h3 & mask) | g3;
345 - h4 = (h4 & mask) | g4;
346 -
347 - /* h = h % (2^128) */
348 - put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
349 - put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
350 - put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
351 - put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
352 -}
353 -EXPORT_SYMBOL_GPL(poly1305_core_emit);
354 -
355 int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
356 {
357 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
358 @@ -274,7 +95,7 @@ int crypto_poly1305_final(struct shash_d
359 dctx->buf[dctx->buflen++] = 1;
360 memset(dctx->buf + dctx->buflen, 0,
361 POLY1305_BLOCK_SIZE - dctx->buflen);
362 - poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
363 + poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
364 }
365
366 poly1305_core_emit(&dctx->h, digest);
367 --- /dev/null
368 +++ b/include/crypto/internal/poly1305.h
369 @@ -0,0 +1,67 @@
370 +/* SPDX-License-Identifier: GPL-2.0 */
371 +/*
372 + * Common values for the Poly1305 algorithm
373 + */
374 +
375 +#ifndef _CRYPTO_INTERNAL_POLY1305_H
376 +#define _CRYPTO_INTERNAL_POLY1305_H
377 +
378 +#include <asm/unaligned.h>
379 +#include <linux/types.h>
380 +#include <crypto/poly1305.h>
381 +
382 +struct shash_desc;
383 +
384 +/*
385 + * Poly1305 core functions. These implement the ε-almost-∆-universal hash
386 + * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
387 + * ("s key") at the end. They also only support block-aligned inputs.
388 + */
389 +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
390 +static inline void poly1305_core_init(struct poly1305_state *state)
391 +{
392 + *state = (struct poly1305_state){};
393 +}
394 +
395 +void poly1305_core_blocks(struct poly1305_state *state,
396 + const struct poly1305_key *key, const void *src,
397 + unsigned int nblocks, u32 hibit);
398 +void poly1305_core_emit(const struct poly1305_state *state, void *dst);
399 +
400 +/* Crypto API helper functions for the Poly1305 MAC */
401 +int crypto_poly1305_init(struct shash_desc *desc);
402 +
403 +int crypto_poly1305_update(struct shash_desc *desc,
404 + const u8 *src, unsigned int srclen);
405 +int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
406 +
407 +/*
408 + * Poly1305 requires a unique key for each tag, which implies that we can't set
409 + * it on the tfm that gets accessed by multiple users simultaneously. Instead we
410 + * expect the key as the first 32 bytes in the update() call.
411 + */
412 +static inline
413 +unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
414 + const u8 *src, unsigned int srclen)
415 +{
416 + if (!dctx->sset) {
417 + if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
418 + poly1305_core_setkey(&dctx->r, src);
419 + src += POLY1305_BLOCK_SIZE;
420 + srclen -= POLY1305_BLOCK_SIZE;
421 + dctx->rset = true;
422 + }
423 + if (srclen >= POLY1305_BLOCK_SIZE) {
424 + dctx->s[0] = get_unaligned_le32(src + 0);
425 + dctx->s[1] = get_unaligned_le32(src + 4);
426 + dctx->s[2] = get_unaligned_le32(src + 8);
427 + dctx->s[3] = get_unaligned_le32(src + 12);
428 + src += POLY1305_BLOCK_SIZE;
429 + srclen -= POLY1305_BLOCK_SIZE;
430 + dctx->sset = true;
431 + }
432 + }
433 + return srclen;
434 +}
435 +
436 +#endif
437 --- a/include/crypto/poly1305.h
438 +++ b/include/crypto/poly1305.h
439 @@ -38,27 +38,4 @@ struct poly1305_desc_ctx {
440 bool sset;
441 };
442
443 -/*
444 - * Poly1305 core functions. These implement the ε-almost-∆-universal hash
445 - * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
446 - * ("s key") at the end. They also only support block-aligned inputs.
447 - */
448 -void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
449 -static inline void poly1305_core_init(struct poly1305_state *state)
450 -{
451 - memset(state->h, 0, sizeof(state->h));
452 -}
453 -void poly1305_core_blocks(struct poly1305_state *state,
454 - const struct poly1305_key *key,
455 - const void *src, unsigned int nblocks);
456 -void poly1305_core_emit(const struct poly1305_state *state, void *dst);
457 -
458 -/* Crypto API helper functions for the Poly1305 MAC */
459 -int crypto_poly1305_init(struct shash_desc *desc);
460 -unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
461 - const u8 *src, unsigned int srclen);
462 -int crypto_poly1305_update(struct shash_desc *desc,
463 - const u8 *src, unsigned int srclen);
464 -int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
465 -
466 #endif
467 --- a/lib/crypto/Kconfig
468 +++ b/lib/crypto/Kconfig
469 @@ -37,5 +37,8 @@ config CRYPTO_LIB_CHACHA
470 config CRYPTO_LIB_DES
471 tristate
472
473 +config CRYPTO_LIB_POLY1305_GENERIC
474 + tristate
475 +
476 config CRYPTO_LIB_SHA256
477 tristate
478 --- a/lib/crypto/Makefile
479 +++ b/lib/crypto/Makefile
480 @@ -13,5 +13,8 @@ libarc4-y := arc4.o
481 obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
482 libdes-y := des.o
483
484 +obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o
485 +libpoly1305-y := poly1305.o
486 +
487 obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
488 libsha256-y := sha256.o
489 --- /dev/null
490 +++ b/lib/crypto/poly1305.c
491 @@ -0,0 +1,158 @@
492 +// SPDX-License-Identifier: GPL-2.0-or-later
493 +/*
494 + * Poly1305 authenticator algorithm, RFC7539
495 + *
496 + * Copyright (C) 2015 Martin Willi
497 + *
498 + * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
499 + */
500 +
501 +#include <crypto/internal/poly1305.h>
502 +#include <linux/kernel.h>
503 +#include <linux/module.h>
504 +#include <asm/unaligned.h>
505 +
506 +static inline u64 mlt(u64 a, u64 b)
507 +{
508 + return a * b;
509 +}
510 +
511 +static inline u32 sr(u64 v, u_char n)
512 +{
513 + return v >> n;
514 +}
515 +
516 +static inline u32 and(u32 v, u32 mask)
517 +{
518 + return v & mask;
519 +}
520 +
521 +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
522 +{
523 + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
524 + key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
525 + key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
526 + key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
527 + key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
528 + key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
529 +}
530 +EXPORT_SYMBOL_GPL(poly1305_core_setkey);
531 +
532 +void poly1305_core_blocks(struct poly1305_state *state,
533 + const struct poly1305_key *key, const void *src,
534 + unsigned int nblocks, u32 hibit)
535 +{
536 + u32 r0, r1, r2, r3, r4;
537 + u32 s1, s2, s3, s4;
538 + u32 h0, h1, h2, h3, h4;
539 + u64 d0, d1, d2, d3, d4;
540 +
541 + if (!nblocks)
542 + return;
543 +
544 + r0 = key->r[0];
545 + r1 = key->r[1];
546 + r2 = key->r[2];
547 + r3 = key->r[3];
548 + r4 = key->r[4];
549 +
550 + s1 = r1 * 5;
551 + s2 = r2 * 5;
552 + s3 = r3 * 5;
553 + s4 = r4 * 5;
554 +
555 + h0 = state->h[0];
556 + h1 = state->h[1];
557 + h2 = state->h[2];
558 + h3 = state->h[3];
559 + h4 = state->h[4];
560 +
561 + do {
562 + /* h += m[i] */
563 + h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
564 + h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
565 + h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
566 + h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
567 + h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
568 +
569 + /* h *= r */
570 + d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
571 + mlt(h3, s2) + mlt(h4, s1);
572 + d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
573 + mlt(h3, s3) + mlt(h4, s2);
574 + d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
575 + mlt(h3, s4) + mlt(h4, s3);
576 + d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
577 + mlt(h3, r0) + mlt(h4, s4);
578 + d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
579 + mlt(h3, r1) + mlt(h4, r0);
580 +
581 + /* (partial) h %= p */
582 + d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
583 + d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
584 + d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
585 + d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
586 + h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
587 + h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
588 +
589 + src += POLY1305_BLOCK_SIZE;
590 + } while (--nblocks);
591 +
592 + state->h[0] = h0;
593 + state->h[1] = h1;
594 + state->h[2] = h2;
595 + state->h[3] = h3;
596 + state->h[4] = h4;
597 +}
598 +EXPORT_SYMBOL_GPL(poly1305_core_blocks);
599 +
600 +void poly1305_core_emit(const struct poly1305_state *state, void *dst)
601 +{
602 + u32 h0, h1, h2, h3, h4;
603 + u32 g0, g1, g2, g3, g4;
604 + u32 mask;
605 +
606 + /* fully carry h */
607 + h0 = state->h[0];
608 + h1 = state->h[1];
609 + h2 = state->h[2];
610 + h3 = state->h[3];
611 + h4 = state->h[4];
612 +
613 + h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
614 + h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
615 + h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
616 + h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
617 + h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
618 +
619 + /* compute h + -p */
620 + g0 = h0 + 5;
621 + g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
622 + g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
623 + g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
624 + g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
625 +
626 + /* select h if h < p, or h + -p if h >= p */
627 + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
628 + g0 &= mask;
629 + g1 &= mask;
630 + g2 &= mask;
631 + g3 &= mask;
632 + g4 &= mask;
633 + mask = ~mask;
634 + h0 = (h0 & mask) | g0;
635 + h1 = (h1 & mask) | g1;
636 + h2 = (h2 & mask) | g2;
637 + h3 = (h3 & mask) | g3;
638 + h4 = (h4 & mask) | g4;
639 +
640 + /* h = h % (2^128) */
641 + put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
642 + put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
643 + put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
644 + put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
645 +}
646 +EXPORT_SYMBOL_GPL(poly1305_core_emit);
647 +
648 +MODULE_LICENSE("GPL");
649 +MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");