kernel: 5.4: import wireguard backport
[openwrt/openwrt.git] / target / linux / generic / backport-5.4 / 080-wireguard-0014-crypto-x86-poly1305-unify-Poly1305-state-struct-with.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Ard Biesheuvel <ardb@kernel.org>
3 Date: Fri, 8 Nov 2019 13:22:20 +0100
4 Subject: [PATCH] crypto: x86/poly1305 - unify Poly1305 state struct with
5 generic code
6
7 commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream.
8
9 In preparation of exposing a Poly1305 library interface directly from
10 the accelerated x86 driver, align the state descriptor of the x86 code
11 with the one used by the generic driver. This is needed to make the
12 library interface unified between all implementations.
13
14 Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
15 Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
16 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
17 ---
18 arch/x86/crypto/poly1305_glue.c | 88 ++++++++++--------------------
19 crypto/poly1305_generic.c | 6 +-
20 include/crypto/internal/poly1305.h | 4 +-
21 include/crypto/poly1305.h | 18 +++---
22 4 files changed, 43 insertions(+), 73 deletions(-)
23
24 --- a/arch/x86/crypto/poly1305_glue.c
25 +++ b/arch/x86/crypto/poly1305_glue.c
26 @@ -14,40 +14,14 @@
27 #include <linux/module.h>
28 #include <asm/simd.h>
29
30 -struct poly1305_simd_desc_ctx {
31 - struct poly1305_desc_ctx base;
32 - /* derived key u set? */
33 - bool uset;
34 -#ifdef CONFIG_AS_AVX2
35 - /* derived keys r^3, r^4 set? */
36 - bool wset;
37 -#endif
38 - /* derived Poly1305 key r^2 */
39 - u32 u[5];
40 - /* ... silently appended r^3 and r^4 when using AVX2 */
41 -};
42 -
43 asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
44 const u32 *r, unsigned int blocks);
45 asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
46 unsigned int blocks, const u32 *u);
47 -#ifdef CONFIG_AS_AVX2
48 asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
49 unsigned int blocks, const u32 *u);
50 -static bool poly1305_use_avx2;
51 -#endif
52
53 -static int poly1305_simd_init(struct shash_desc *desc)
54 -{
55 - struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
56 -
57 - sctx->uset = false;
58 -#ifdef CONFIG_AS_AVX2
59 - sctx->wset = false;
60 -#endif
61 -
62 - return crypto_poly1305_init(desc);
63 -}
64 +static bool poly1305_use_avx2 __ro_after_init;
65
66 static void poly1305_simd_mult(u32 *a, const u32 *b)
67 {
68 @@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, c
69 static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
70 const u8 *src, unsigned int srclen)
71 {
72 - struct poly1305_simd_desc_ctx *sctx;
73 unsigned int blocks, datalen;
74
75 - BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
76 - sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
77 -
78 if (unlikely(!dctx->sset)) {
79 datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
80 src += srclen - datalen;
81 srclen = datalen;
82 }
83
84 -#ifdef CONFIG_AS_AVX2
85 - if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
86 - if (unlikely(!sctx->wset)) {
87 - if (!sctx->uset) {
88 - memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
89 - poly1305_simd_mult(sctx->u, dctx->r.r);
90 - sctx->uset = true;
91 + if (IS_ENABLED(CONFIG_AS_AVX2) &&
92 + poly1305_use_avx2 &&
93 + srclen >= POLY1305_BLOCK_SIZE * 4) {
94 + if (unlikely(dctx->rset < 4)) {
95 + if (dctx->rset < 2) {
96 + dctx->r[1] = dctx->r[0];
97 + poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
98 }
99 - memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
100 - poly1305_simd_mult(sctx->u + 5, dctx->r.r);
101 - memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
102 - poly1305_simd_mult(sctx->u + 10, dctx->r.r);
103 - sctx->wset = true;
104 + dctx->r[2] = dctx->r[1];
105 + poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
106 + dctx->r[3] = dctx->r[2];
107 + poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
108 + dctx->rset = 4;
109 }
110 blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
111 - poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
112 - sctx->u);
113 + poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
114 + dctx->r[1].r);
115 src += POLY1305_BLOCK_SIZE * 4 * blocks;
116 srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
117 }
118 -#endif
119 +
120 if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
121 - if (unlikely(!sctx->uset)) {
122 - memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
123 - poly1305_simd_mult(sctx->u, dctx->r.r);
124 - sctx->uset = true;
125 + if (unlikely(dctx->rset < 2)) {
126 + dctx->r[1] = dctx->r[0];
127 + poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
128 + dctx->rset = 2;
129 }
130 blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
131 - poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
132 - sctx->u);
133 + poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
134 + blocks, dctx->r[1].r);
135 src += POLY1305_BLOCK_SIZE * 2 * blocks;
136 srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
137 }
138 if (srclen >= POLY1305_BLOCK_SIZE) {
139 - poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
140 + poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
141 srclen -= POLY1305_BLOCK_SIZE;
142 }
143 return srclen;
144 @@ -159,10 +129,10 @@ static int poly1305_simd_update(struct s
145
146 static struct shash_alg alg = {
147 .digestsize = POLY1305_DIGEST_SIZE,
148 - .init = poly1305_simd_init,
149 + .init = crypto_poly1305_init,
150 .update = poly1305_simd_update,
151 .final = crypto_poly1305_final,
152 - .descsize = sizeof(struct poly1305_simd_desc_ctx),
153 + .descsize = sizeof(struct poly1305_desc_ctx),
154 .base = {
155 .cra_name = "poly1305",
156 .cra_driver_name = "poly1305-simd",
157 @@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init
158 if (!boot_cpu_has(X86_FEATURE_XMM2))
159 return -ENODEV;
160
161 -#ifdef CONFIG_AS_AVX2
162 - poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
163 + poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
164 + boot_cpu_has(X86_FEATURE_AVX) &&
165 boot_cpu_has(X86_FEATURE_AVX2) &&
166 cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
167 - alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
168 + alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
169 if (poly1305_use_avx2)
170 alg.descsize += 10 * sizeof(u32);
171 -#endif
172 +
173 return crypto_register_shash(&alg);
174 }
175
176 --- a/crypto/poly1305_generic.c
177 +++ b/crypto/poly1305_generic.c
178 @@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_de
179
180 poly1305_core_init(&dctx->h);
181 dctx->buflen = 0;
182 - dctx->rset = false;
183 + dctx->rset = 0;
184 dctx->sset = false;
185
186 return 0;
187 @@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1
188 srclen = datalen;
189 }
190
191 - poly1305_core_blocks(&dctx->h, &dctx->r, src,
192 + poly1305_core_blocks(&dctx->h, dctx->r, src,
193 srclen / POLY1305_BLOCK_SIZE, 1);
194 }
195
196 @@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_d
197 dctx->buf[dctx->buflen++] = 1;
198 memset(dctx->buf + dctx->buflen, 0,
199 POLY1305_BLOCK_SIZE - dctx->buflen);
200 - poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
201 + poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
202 }
203
204 poly1305_core_emit(&dctx->h, digest);
205 --- a/include/crypto/internal/poly1305.h
206 +++ b/include/crypto/internal/poly1305.h
207 @@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(
208 {
209 if (!dctx->sset) {
210 if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
211 - poly1305_core_setkey(&dctx->r, src);
212 + poly1305_core_setkey(dctx->r, src);
213 src += POLY1305_BLOCK_SIZE;
214 srclen -= POLY1305_BLOCK_SIZE;
215 - dctx->rset = true;
216 + dctx->rset = 1;
217 }
218 if (srclen >= POLY1305_BLOCK_SIZE) {
219 dctx->s[0] = get_unaligned_le32(src + 0);
220 --- a/include/crypto/poly1305.h
221 +++ b/include/crypto/poly1305.h
222 @@ -22,20 +22,20 @@ struct poly1305_state {
223 };
224
225 struct poly1305_desc_ctx {
226 - /* key */
227 - struct poly1305_key r;
228 - /* finalize key */
229 - u32 s[4];
230 - /* accumulator */
231 - struct poly1305_state h;
232 /* partial buffer */
233 u8 buf[POLY1305_BLOCK_SIZE];
234 /* bytes used in partial buffer */
235 unsigned int buflen;
236 - /* r key has been set */
237 - bool rset;
238 - /* s key has been set */
239 + /* how many keys have been set in r[] */
240 + unsigned short rset;
241 + /* whether s[] has been set */
242 bool sset;
243 + /* finalize key */
244 + u32 s[4];
245 + /* accumulator */
246 + struct poly1305_state h;
247 + /* key */
248 + struct poly1305_key r[1];
249 };
250
251 #endif