kernel: update U-Boot nvmem driver to v6.2 release version
[openwrt/openwrt.git] / target / linux / generic / backport-5.10 / 050-v5.16-03-mips-bpf-Add-new-eBPF-JIT-for-64-bit-MIPS.patch
1 From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
2 Date: Tue, 5 Oct 2021 18:54:05 +0200
3 Subject: [PATCH] mips: bpf: Add new eBPF JIT for 64-bit MIPS
4
5 This is an implementation on of an eBPF JIT for 64-bit MIPS III-V and
6 MIPS64r1-r6. It uses the same framework introduced by the 32-bit JIT.
7
8 Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
9 ---
10 create mode 100644 arch/mips/net/bpf_jit_comp64.c
11
12 --- /dev/null
13 +++ b/arch/mips/net/bpf_jit_comp64.c
14 @@ -0,0 +1,991 @@
15 +// SPDX-License-Identifier: GPL-2.0-only
16 +/*
17 + * Just-In-Time compiler for eBPF bytecode on MIPS.
18 + * Implementation of JIT functions for 64-bit CPUs.
19 + *
20 + * Copyright (c) 2021 Anyfi Networks AB.
21 + * Author: Johan Almbladh <johan.almbladh@gmail.com>
22 + *
23 + * Based on code and ideas from
24 + * Copyright (c) 2017 Cavium, Inc.
25 + * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
26 + * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
27 + */
28 +
29 +#include <linux/errno.h>
30 +#include <linux/filter.h>
31 +#include <linux/bpf.h>
32 +#include <asm/cpu-features.h>
33 +#include <asm/isa-rev.h>
34 +#include <asm/uasm.h>
35 +
36 +#include "bpf_jit_comp.h"
37 +
38 +/* MIPS t0-t3 are not available in the n64 ABI */
39 +#undef MIPS_R_T0
40 +#undef MIPS_R_T1
41 +#undef MIPS_R_T2
42 +#undef MIPS_R_T3
43 +
44 +/* Stack is 16-byte aligned in n64 ABI */
45 +#define MIPS_STACK_ALIGNMENT 16
46 +
47 +/* Extra 64-bit eBPF registers used by JIT */
48 +#define JIT_REG_TC (MAX_BPF_JIT_REG + 0)
49 +#define JIT_REG_ZX (MAX_BPF_JIT_REG + 1)
50 +
51 +/* Number of prologue bytes to skip when doing a tail call */
52 +#define JIT_TCALL_SKIP 4
53 +
54 +/* Callee-saved CPU registers that the JIT must preserve */
55 +#define JIT_CALLEE_REGS \
56 + (BIT(MIPS_R_S0) | \
57 + BIT(MIPS_R_S1) | \
58 + BIT(MIPS_R_S2) | \
59 + BIT(MIPS_R_S3) | \
60 + BIT(MIPS_R_S4) | \
61 + BIT(MIPS_R_S5) | \
62 + BIT(MIPS_R_S6) | \
63 + BIT(MIPS_R_S7) | \
64 + BIT(MIPS_R_GP) | \
65 + BIT(MIPS_R_FP) | \
66 + BIT(MIPS_R_RA))
67 +
68 +/* Caller-saved CPU registers available for JIT use */
69 +#define JIT_CALLER_REGS \
70 + (BIT(MIPS_R_A5) | \
71 + BIT(MIPS_R_A6) | \
72 + BIT(MIPS_R_A7))
73 +/*
74 + * Mapping of 64-bit eBPF registers to 64-bit native MIPS registers.
75 + * MIPS registers t4 - t7 may be used by the JIT as temporary registers.
76 + * MIPS registers t8 - t9 are reserved for single-register common functions.
77 + */
78 +static const u8 bpf2mips64[] = {
79 + /* Return value from in-kernel function, and exit value from eBPF */
80 + [BPF_REG_0] = MIPS_R_V0,
81 + /* Arguments from eBPF program to in-kernel function */
82 + [BPF_REG_1] = MIPS_R_A0,
83 + [BPF_REG_2] = MIPS_R_A1,
84 + [BPF_REG_3] = MIPS_R_A2,
85 + [BPF_REG_4] = MIPS_R_A3,
86 + [BPF_REG_5] = MIPS_R_A4,
87 + /* Callee-saved registers that in-kernel function will preserve */
88 + [BPF_REG_6] = MIPS_R_S0,
89 + [BPF_REG_7] = MIPS_R_S1,
90 + [BPF_REG_8] = MIPS_R_S2,
91 + [BPF_REG_9] = MIPS_R_S3,
92 + /* Read-only frame pointer to access the eBPF stack */
93 + [BPF_REG_FP] = MIPS_R_FP,
94 + /* Temporary register for blinding constants */
95 + [BPF_REG_AX] = MIPS_R_AT,
96 + /* Tail call count register, caller-saved */
97 + [JIT_REG_TC] = MIPS_R_A5,
98 + /* Constant for register zero-extension */
99 + [JIT_REG_ZX] = MIPS_R_V1,
100 +};
101 +
102 +/*
103 + * MIPS 32-bit operations on 64-bit registers generate a sign-extended
104 + * result. However, the eBPF ISA mandates zero-extension, so we rely on the
105 + * verifier to add that for us (emit_zext_ver). In addition, ALU arithmetic
106 + * operations, right shift and byte swap require properly sign-extended
107 + * operands or the result is unpredictable. We emit explicit sign-extensions
108 + * in those cases.
109 + */
110 +
111 +/* Sign extension */
112 +static void emit_sext(struct jit_context *ctx, u8 dst, u8 src)
113 +{
114 + emit(ctx, sll, dst, src, 0);
115 + clobber_reg(ctx, dst);
116 +}
117 +
118 +/* Zero extension */
119 +static void emit_zext(struct jit_context *ctx, u8 dst)
120 +{
121 + if (cpu_has_mips64r2 || cpu_has_mips64r6) {
122 + emit(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
123 + } else {
124 + emit(ctx, and, dst, dst, bpf2mips64[JIT_REG_ZX]);
125 + access_reg(ctx, JIT_REG_ZX); /* We need the ZX register */
126 + }
127 + clobber_reg(ctx, dst);
128 +}
129 +
130 +/* Zero extension, if verifier does not do it for us */
131 +static void emit_zext_ver(struct jit_context *ctx, u8 dst)
132 +{
133 + if (!ctx->program->aux->verifier_zext)
134 + emit_zext(ctx, dst);
135 +}
136 +
137 +/* dst = imm (64-bit) */
138 +static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64)
139 +{
140 + if (imm64 >= 0xffffffffffff8000ULL || imm64 < 0x8000ULL) {
141 + emit(ctx, daddiu, dst, MIPS_R_ZERO, (s16)imm64);
142 + } else if (imm64 >= 0xffffffff80000000ULL ||
143 + (imm64 < 0x80000000 && imm64 > 0xffff)) {
144 + emit(ctx, lui, dst, (s16)(imm64 >> 16));
145 + emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff);
146 + } else {
147 + u8 acc = MIPS_R_ZERO;
148 + int k;
149 +
150 + for (k = 0; k < 4; k++) {
151 + u16 half = imm64 >> (48 - 16 * k);
152 +
153 + if (acc == dst)
154 + emit(ctx, dsll, dst, dst, 16);
155 +
156 + if (half) {
157 + emit(ctx, ori, dst, acc, half);
158 + acc = dst;
159 + }
160 + }
161 + }
162 + clobber_reg(ctx, dst);
163 +}
164 +
165 +/* ALU immediate operation (64-bit) */
166 +static void emit_alu_i64(struct jit_context *ctx, u8 dst, s32 imm, u8 op)
167 +{
168 + switch (BPF_OP(op)) {
169 + /* dst = dst | imm */
170 + case BPF_OR:
171 + emit(ctx, ori, dst, dst, (u16)imm);
172 + break;
173 + /* dst = dst ^ imm */
174 + case BPF_XOR:
175 + emit(ctx, xori, dst, dst, (u16)imm);
176 + break;
177 + /* dst = -dst */
178 + case BPF_NEG:
179 + emit(ctx, dsubu, dst, MIPS_R_ZERO, dst);
180 + break;
181 + /* dst = dst << imm */
182 + case BPF_LSH:
183 + emit(ctx, dsll_safe, dst, dst, imm);
184 + break;
185 + /* dst = dst >> imm */
186 + case BPF_RSH:
187 + emit(ctx, dsrl_safe, dst, dst, imm);
188 + break;
189 + /* dst = dst >> imm (arithmetic) */
190 + case BPF_ARSH:
191 + emit(ctx, dsra_safe, dst, dst, imm);
192 + break;
193 + /* dst = dst + imm */
194 + case BPF_ADD:
195 + emit(ctx, daddiu, dst, dst, imm);
196 + break;
197 + /* dst = dst - imm */
198 + case BPF_SUB:
199 + emit(ctx, daddiu, dst, dst, -imm);
200 + break;
201 + default:
202 + /* Width-generic operations */
203 + emit_alu_i(ctx, dst, imm, op);
204 + }
205 + clobber_reg(ctx, dst);
206 +}
207 +
208 +/* ALU register operation (64-bit) */
209 +static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
210 +{
211 + switch (BPF_OP(op)) {
212 + /* dst = dst << src */
213 + case BPF_LSH:
214 + emit(ctx, dsllv, dst, dst, src);
215 + break;
216 + /* dst = dst >> src */
217 + case BPF_RSH:
218 + emit(ctx, dsrlv, dst, dst, src);
219 + break;
220 + /* dst = dst >> src (arithmetic) */
221 + case BPF_ARSH:
222 + emit(ctx, dsrav, dst, dst, src);
223 + break;
224 + /* dst = dst + src */
225 + case BPF_ADD:
226 + emit(ctx, daddu, dst, dst, src);
227 + break;
228 + /* dst = dst - src */
229 + case BPF_SUB:
230 + emit(ctx, dsubu, dst, dst, src);
231 + break;
232 + /* dst = dst * src */
233 + case BPF_MUL:
234 + if (cpu_has_mips64r6) {
235 + emit(ctx, dmulu, dst, dst, src);
236 + } else {
237 + emit(ctx, dmultu, dst, src);
238 + emit(ctx, mflo, dst);
239 + }
240 + break;
241 + /* dst = dst / src */
242 + case BPF_DIV:
243 + if (cpu_has_mips64r6) {
244 + emit(ctx, ddivu_r6, dst, dst, src);
245 + } else {
246 + emit(ctx, ddivu, dst, src);
247 + emit(ctx, mflo, dst);
248 + }
249 + break;
250 + /* dst = dst % src */
251 + case BPF_MOD:
252 + if (cpu_has_mips64r6) {
253 + emit(ctx, dmodu, dst, dst, src);
254 + } else {
255 + emit(ctx, ddivu, dst, src);
256 + emit(ctx, mfhi, dst);
257 + }
258 + break;
259 + default:
260 + /* Width-generic operations */
261 + emit_alu_r(ctx, dst, src, op);
262 + }
263 + clobber_reg(ctx, dst);
264 +}
265 +
266 +/* Swap sub words in a register double word */
267 +static void emit_swap_r64(struct jit_context *ctx, u8 dst, u8 mask, u32 bits)
268 +{
269 + u8 tmp = MIPS_R_T9;
270 +
271 + emit(ctx, and, tmp, dst, mask); /* tmp = dst & mask */
272 + emit(ctx, dsll, tmp, tmp, bits); /* tmp = tmp << bits */
273 + emit(ctx, dsrl, dst, dst, bits); /* dst = dst >> bits */
274 + emit(ctx, and, dst, dst, mask); /* dst = dst & mask */
275 + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
276 +}
277 +
278 +/* Swap bytes and truncate a register double word, word or half word */
279 +static void emit_bswap_r64(struct jit_context *ctx, u8 dst, u32 width)
280 +{
281 + switch (width) {
282 + /* Swap bytes in a double word */
283 + case 64:
284 + if (cpu_has_mips64r2 || cpu_has_mips64r6) {
285 + emit(ctx, dsbh, dst, dst);
286 + emit(ctx, dshd, dst, dst);
287 + } else {
288 + u8 t1 = MIPS_R_T6;
289 + u8 t2 = MIPS_R_T7;
290 +
291 + emit(ctx, dsll32, t2, dst, 0); /* t2 = dst << 32 */
292 + emit(ctx, dsrl32, dst, dst, 0); /* dst = dst >> 32 */
293 + emit(ctx, or, dst, dst, t2); /* dst = dst | t2 */
294 +
295 + emit(ctx, ori, t2, MIPS_R_ZERO, 0xffff);
296 + emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */
297 + emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */
298 + emit_swap_r64(ctx, dst, t1, 16);/* dst = swap16(dst) */
299 +
300 + emit(ctx, lui, t2, 0xff); /* t2 = 0x00ff0000 */
301 + emit(ctx, ori, t2, t2, 0xff); /* t2 = t2 | 0x00ff */
302 + emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */
303 + emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */
304 + emit_swap_r64(ctx, dst, t1, 8); /* dst = swap8(dst) */
305 + }
306 + break;
307 + /* Swap bytes in a half word */
308 + /* Swap bytes in a word */
309 + case 32:
310 + case 16:
311 + emit_sext(ctx, dst, dst);
312 + emit_bswap_r(ctx, dst, width);
313 + if (cpu_has_mips64r2 || cpu_has_mips64r6)
314 + emit_zext(ctx, dst);
315 + break;
316 + }
317 + clobber_reg(ctx, dst);
318 +}
319 +
320 +/* Truncate a register double word, word or half word */
321 +static void emit_trunc_r64(struct jit_context *ctx, u8 dst, u32 width)
322 +{
323 + switch (width) {
324 + case 64:
325 + break;
326 + /* Zero-extend a word */
327 + case 32:
328 + emit_zext(ctx, dst);
329 + break;
330 + /* Zero-extend a half word */
331 + case 16:
332 + emit(ctx, andi, dst, dst, 0xffff);
333 + break;
334 + }
335 + clobber_reg(ctx, dst);
336 +}
337 +
338 +/* Load operation: dst = *(size*)(src + off) */
339 +static void emit_ldx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
340 +{
341 + switch (size) {
342 + /* Load a byte */
343 + case BPF_B:
344 + emit(ctx, lbu, dst, off, src);
345 + break;
346 + /* Load a half word */
347 + case BPF_H:
348 + emit(ctx, lhu, dst, off, src);
349 + break;
350 + /* Load a word */
351 + case BPF_W:
352 + emit(ctx, lwu, dst, off, src);
353 + break;
354 + /* Load a double word */
355 + case BPF_DW:
356 + emit(ctx, ld, dst, off, src);
357 + break;
358 + }
359 + clobber_reg(ctx, dst);
360 +}
361 +
362 +/* Store operation: *(size *)(dst + off) = src */
363 +static void emit_stx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
364 +{
365 + switch (size) {
366 + /* Store a byte */
367 + case BPF_B:
368 + emit(ctx, sb, src, off, dst);
369 + break;
370 + /* Store a half word */
371 + case BPF_H:
372 + emit(ctx, sh, src, off, dst);
373 + break;
374 + /* Store a word */
375 + case BPF_W:
376 + emit(ctx, sw, src, off, dst);
377 + break;
378 + /* Store a double word */
379 + case BPF_DW:
380 + emit(ctx, sd, src, off, dst);
381 + break;
382 + }
383 +}
384 +
385 +/* Atomic read-modify-write */
386 +static void emit_atomic_r64(struct jit_context *ctx,
387 + u8 dst, u8 src, s16 off, u8 code)
388 +{
389 + u8 t1 = MIPS_R_T6;
390 + u8 t2 = MIPS_R_T7;
391 +
392 + emit(ctx, lld, t1, off, dst);
393 + switch (code) {
394 + case BPF_ADD:
395 + emit(ctx, daddu, t2, t1, src);
396 + break;
397 + case BPF_AND:
398 + emit(ctx, and, t2, t1, src);
399 + break;
400 + case BPF_OR:
401 + emit(ctx, or, t2, t1, src);
402 + break;
403 + case BPF_XOR:
404 + emit(ctx, xor, t2, t1, src);
405 + break;
406 + }
407 + emit(ctx, scd, t2, off, dst);
408 + emit(ctx, beqz, t2, -16);
409 + emit(ctx, nop); /* Delay slot */
410 +}
411 +
412 +/* Function call */
413 +static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
414 +{
415 + u8 zx = bpf2mips64[JIT_REG_ZX];
416 + u8 tmp = MIPS_R_T6;
417 + bool fixed;
418 + u64 addr;
419 +
420 + /* Decode the call address */
421 + if (bpf_jit_get_func_addr(ctx->program, insn, false,
422 + &addr, &fixed) < 0)
423 + return -1;
424 + if (!fixed)
425 + return -1;
426 +
427 + /* Push caller-saved registers on stack */
428 + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
429 +
430 + /* Emit function call */
431 + emit_mov_i64(ctx, tmp, addr);
432 + emit(ctx, jalr, MIPS_R_RA, tmp);
433 + emit(ctx, nop); /* Delay slot */
434 +
435 + /* Restore caller-saved registers */
436 + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
437 +
438 + /* Re-initialize the JIT zero-extension register if accessed */
439 + if (ctx->accessed & BIT(JIT_REG_ZX)) {
440 + emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
441 + emit(ctx, dsrl32, zx, zx, 0);
442 + }
443 +
444 + clobber_reg(ctx, MIPS_R_RA);
445 + clobber_reg(ctx, MIPS_R_V0);
446 + clobber_reg(ctx, MIPS_R_V1);
447 + return 0;
448 +}
449 +
450 +/* Function tail call */
451 +static int emit_tail_call(struct jit_context *ctx)
452 +{
453 + u8 ary = bpf2mips64[BPF_REG_2];
454 + u8 ind = bpf2mips64[BPF_REG_3];
455 + u8 tcc = bpf2mips64[JIT_REG_TC];
456 + u8 tmp = MIPS_R_T6;
457 + int off;
458 +
459 + /*
460 + * Tail call:
461 + * eBPF R1 - function argument (context ptr), passed in a0-a1
462 + * eBPF R2 - ptr to object with array of function entry points
463 + * eBPF R3 - array index of function to be called
464 + */
465 +
466 + /* if (ind >= ary->map.max_entries) goto out */
467 + off = offsetof(struct bpf_array, map.max_entries);
468 + if (off > 0x7fff)
469 + return -1;
470 + emit(ctx, lwu, tmp, off, ary); /* tmp = ary->map.max_entrs*/
471 + emit(ctx, sltu, tmp, ind, tmp); /* tmp = ind < t1 */
472 + emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
473 +
474 + /* if (--TCC < 0) goto out */
475 + emit(ctx, daddiu, tcc, tcc, -1); /* tcc-- (delay slot) */
476 + emit(ctx, bltz, tcc, get_offset(ctx, 1)); /* PC += off(1) if tcc < 0 */
477 + /* (next insn delay slot) */
478 + /* prog = ary->ptrs[ind] */
479 + off = offsetof(struct bpf_array, ptrs);
480 + if (off > 0x7fff)
481 + return -1;
482 + emit(ctx, dsll, tmp, ind, 3); /* tmp = ind << 3 */
483 + emit(ctx, daddu, tmp, tmp, ary); /* tmp += ary */
484 + emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */
485 +
486 + /* if (prog == 0) goto out */
487 + emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
488 + emit(ctx, nop); /* Delay slot */
489 +
490 + /* func = prog->bpf_func + 8 (prologue skip offset) */
491 + off = offsetof(struct bpf_prog, bpf_func);
492 + if (off > 0x7fff)
493 + return -1;
494 + emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */
495 + emit(ctx, daddiu, tmp, tmp, JIT_TCALL_SKIP); /* tmp += skip (4) */
496 +
497 + /* goto func */
498 + build_epilogue(ctx, tmp);
499 + access_reg(ctx, JIT_REG_TC);
500 + return 0;
501 +}
502 +
503 +/*
504 + * Stack frame layout for a JITed program (stack grows down).
505 + *
506 + * Higher address : Previous stack frame :
507 + * +===========================+ <--- MIPS sp before call
508 + * | Callee-saved registers, |
509 + * | including RA and FP |
510 + * +---------------------------+ <--- eBPF FP (MIPS fp)
511 + * | Local eBPF variables |
512 + * | allocated by program |
513 + * +---------------------------+
514 + * | Reserved for caller-saved |
515 + * | registers |
516 + * Lower address +===========================+ <--- MIPS sp
517 + */
518 +
519 +/* Build program prologue to set up the stack and registers */
520 +void build_prologue(struct jit_context *ctx)
521 +{
522 + u8 fp = bpf2mips64[BPF_REG_FP];
523 + u8 tc = bpf2mips64[JIT_REG_TC];
524 + u8 zx = bpf2mips64[JIT_REG_ZX];
525 + int stack, saved, locals, reserved;
526 +
527 + /*
528 + * The first instruction initializes the tail call count register.
529 + * On a tail call, the calling function jumps into the prologue
530 + * after this instruction.
531 + */
532 + emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff));
533 +
534 + /* === Entry-point for tail calls === */
535 +
536 + /*
537 + * If the eBPF frame pointer and tail call count registers were
538 + * accessed they must be preserved. Mark them as clobbered here
539 + * to save and restore them on the stack as needed.
540 + */
541 + if (ctx->accessed & BIT(BPF_REG_FP))
542 + clobber_reg(ctx, fp);
543 + if (ctx->accessed & BIT(JIT_REG_TC))
544 + clobber_reg(ctx, tc);
545 + if (ctx->accessed & BIT(JIT_REG_ZX))
546 + clobber_reg(ctx, zx);
547 +
548 + /* Compute the stack space needed for callee-saved registers */
549 + saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u64);
550 + saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
551 +
552 + /* Stack space used by eBPF program local data */
553 + locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
554 +
555 + /*
556 + * If we are emitting function calls, reserve extra stack space for
557 + * caller-saved registers needed by the JIT. The required space is
558 + * computed automatically during resource usage discovery (pass 1).
559 + */
560 + reserved = ctx->stack_used;
561 +
562 + /* Allocate the stack frame */
563 + stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
564 + if (stack)
565 + emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack);
566 +
567 + /* Store callee-saved registers on stack */
568 + push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
569 +
570 + /* Initialize the eBPF frame pointer if accessed */
571 + if (ctx->accessed & BIT(BPF_REG_FP))
572 + emit(ctx, daddiu, fp, MIPS_R_SP, stack - saved);
573 +
574 + /* Initialize the ePF JIT zero-extension register if accessed */
575 + if (ctx->accessed & BIT(JIT_REG_ZX)) {
576 + emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
577 + emit(ctx, dsrl32, zx, zx, 0);
578 + }
579 +
580 + ctx->saved_size = saved;
581 + ctx->stack_size = stack;
582 +}
583 +
584 +/* Build the program epilogue to restore the stack and registers */
585 +void build_epilogue(struct jit_context *ctx, int dest_reg)
586 +{
587 + /* Restore callee-saved registers from stack */
588 + pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
589 + ctx->stack_size - ctx->saved_size);
590 +
591 + /* Release the stack frame */
592 + if (ctx->stack_size)
593 + emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
594 +
595 + /* Jump to return address and sign-extend the 32-bit return value */
596 + emit(ctx, jr, dest_reg);
597 + emit(ctx, sll, MIPS_R_V0, MIPS_R_V0, 0); /* Delay slot */
598 +}
599 +
600 +/* Build one eBPF instruction */
601 +int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
602 +{
603 + u8 dst = bpf2mips64[insn->dst_reg];
604 + u8 src = bpf2mips64[insn->src_reg];
605 + u8 code = insn->code;
606 + s16 off = insn->off;
607 + s32 imm = insn->imm;
608 + s32 val, rel;
609 + u8 alu, jmp;
610 +
611 + switch (code) {
612 + /* ALU operations */
613 + /* dst = imm */
614 + case BPF_ALU | BPF_MOV | BPF_K:
615 + emit_mov_i(ctx, dst, imm);
616 + emit_zext_ver(ctx, dst);
617 + break;
618 + /* dst = src */
619 + case BPF_ALU | BPF_MOV | BPF_X:
620 + if (imm == 1) {
621 + /* Special mov32 for zext */
622 + emit_zext(ctx, dst);
623 + } else {
624 + emit_mov_r(ctx, dst, src);
625 + emit_zext_ver(ctx, dst);
626 + }
627 + break;
628 + /* dst = -dst */
629 + case BPF_ALU | BPF_NEG:
630 + emit_sext(ctx, dst, dst);
631 + emit_alu_i(ctx, dst, 0, BPF_NEG);
632 + emit_zext_ver(ctx, dst);
633 + break;
634 + /* dst = dst & imm */
635 + /* dst = dst | imm */
636 + /* dst = dst ^ imm */
637 + /* dst = dst << imm */
638 + case BPF_ALU | BPF_OR | BPF_K:
639 + case BPF_ALU | BPF_AND | BPF_K:
640 + case BPF_ALU | BPF_XOR | BPF_K:
641 + case BPF_ALU | BPF_LSH | BPF_K:
642 + if (!valid_alu_i(BPF_OP(code), imm)) {
643 + emit_mov_i(ctx, MIPS_R_T4, imm);
644 + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
645 + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
646 + emit_alu_i(ctx, dst, val, alu);
647 + }
648 + emit_zext_ver(ctx, dst);
649 + break;
650 + /* dst = dst >> imm */
651 + /* dst = dst >> imm (arithmetic) */
652 + /* dst = dst + imm */
653 + /* dst = dst - imm */
654 + /* dst = dst * imm */
655 + /* dst = dst / imm */
656 + /* dst = dst % imm */
657 + case BPF_ALU | BPF_RSH | BPF_K:
658 + case BPF_ALU | BPF_ARSH | BPF_K:
659 + case BPF_ALU | BPF_ADD | BPF_K:
660 + case BPF_ALU | BPF_SUB | BPF_K:
661 + case BPF_ALU | BPF_MUL | BPF_K:
662 + case BPF_ALU | BPF_DIV | BPF_K:
663 + case BPF_ALU | BPF_MOD | BPF_K:
664 + if (!valid_alu_i(BPF_OP(code), imm)) {
665 + emit_sext(ctx, dst, dst);
666 + emit_mov_i(ctx, MIPS_R_T4, imm);
667 + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
668 + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
669 + emit_sext(ctx, dst, dst);
670 + emit_alu_i(ctx, dst, val, alu);
671 + }
672 + emit_zext_ver(ctx, dst);
673 + break;
674 + /* dst = dst & src */
675 + /* dst = dst | src */
676 + /* dst = dst ^ src */
677 + /* dst = dst << src */
678 + case BPF_ALU | BPF_AND | BPF_X:
679 + case BPF_ALU | BPF_OR | BPF_X:
680 + case BPF_ALU | BPF_XOR | BPF_X:
681 + case BPF_ALU | BPF_LSH | BPF_X:
682 + emit_alu_r(ctx, dst, src, BPF_OP(code));
683 + emit_zext_ver(ctx, dst);
684 + break;
685 + /* dst = dst >> src */
686 + /* dst = dst >> src (arithmetic) */
687 + /* dst = dst + src */
688 + /* dst = dst - src */
689 + /* dst = dst * src */
690 + /* dst = dst / src */
691 + /* dst = dst % src */
692 + case BPF_ALU | BPF_RSH | BPF_X:
693 + case BPF_ALU | BPF_ARSH | BPF_X:
694 + case BPF_ALU | BPF_ADD | BPF_X:
695 + case BPF_ALU | BPF_SUB | BPF_X:
696 + case BPF_ALU | BPF_MUL | BPF_X:
697 + case BPF_ALU | BPF_DIV | BPF_X:
698 + case BPF_ALU | BPF_MOD | BPF_X:
699 + emit_sext(ctx, dst, dst);
700 + emit_sext(ctx, MIPS_R_T4, src);
701 + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
702 + emit_zext_ver(ctx, dst);
703 + break;
704 + /* dst = imm (64-bit) */
705 + case BPF_ALU64 | BPF_MOV | BPF_K:
706 + emit_mov_i(ctx, dst, imm);
707 + break;
708 + /* dst = src (64-bit) */
709 + case BPF_ALU64 | BPF_MOV | BPF_X:
710 + emit_mov_r(ctx, dst, src);
711 + break;
712 + /* dst = -dst (64-bit) */
713 + case BPF_ALU64 | BPF_NEG:
714 + emit_alu_i64(ctx, dst, 0, BPF_NEG);
715 + break;
716 + /* dst = dst & imm (64-bit) */
717 + /* dst = dst | imm (64-bit) */
718 + /* dst = dst ^ imm (64-bit) */
719 + /* dst = dst << imm (64-bit) */
720 + /* dst = dst >> imm (64-bit) */
721 + /* dst = dst >> imm ((64-bit, arithmetic) */
722 + /* dst = dst + imm (64-bit) */
723 + /* dst = dst - imm (64-bit) */
724 + /* dst = dst * imm (64-bit) */
725 + /* dst = dst / imm (64-bit) */
726 + /* dst = dst % imm (64-bit) */
727 + case BPF_ALU64 | BPF_AND | BPF_K:
728 + case BPF_ALU64 | BPF_OR | BPF_K:
729 + case BPF_ALU64 | BPF_XOR | BPF_K:
730 + case BPF_ALU64 | BPF_LSH | BPF_K:
731 + case BPF_ALU64 | BPF_RSH | BPF_K:
732 + case BPF_ALU64 | BPF_ARSH | BPF_K:
733 + case BPF_ALU64 | BPF_ADD | BPF_K:
734 + case BPF_ALU64 | BPF_SUB | BPF_K:
735 + case BPF_ALU64 | BPF_MUL | BPF_K:
736 + case BPF_ALU64 | BPF_DIV | BPF_K:
737 + case BPF_ALU64 | BPF_MOD | BPF_K:
738 + if (!valid_alu_i(BPF_OP(code), imm)) {
739 + emit_mov_i(ctx, MIPS_R_T4, imm);
740 + emit_alu_r64(ctx, dst, MIPS_R_T4, BPF_OP(code));
741 + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
742 + emit_alu_i64(ctx, dst, val, alu);
743 + }
744 + break;
745 + /* dst = dst & src (64-bit) */
746 + /* dst = dst | src (64-bit) */
747 + /* dst = dst ^ src (64-bit) */
748 + /* dst = dst << src (64-bit) */
749 + /* dst = dst >> src (64-bit) */
750 + /* dst = dst >> src (64-bit, arithmetic) */
751 + /* dst = dst + src (64-bit) */
752 + /* dst = dst - src (64-bit) */
753 + /* dst = dst * src (64-bit) */
754 + /* dst = dst / src (64-bit) */
755 + /* dst = dst % src (64-bit) */
756 + case BPF_ALU64 | BPF_AND | BPF_X:
757 + case BPF_ALU64 | BPF_OR | BPF_X:
758 + case BPF_ALU64 | BPF_XOR | BPF_X:
759 + case BPF_ALU64 | BPF_LSH | BPF_X:
760 + case BPF_ALU64 | BPF_RSH | BPF_X:
761 + case BPF_ALU64 | BPF_ARSH | BPF_X:
762 + case BPF_ALU64 | BPF_ADD | BPF_X:
763 + case BPF_ALU64 | BPF_SUB | BPF_X:
764 + case BPF_ALU64 | BPF_MUL | BPF_X:
765 + case BPF_ALU64 | BPF_DIV | BPF_X:
766 + case BPF_ALU64 | BPF_MOD | BPF_X:
767 + emit_alu_r64(ctx, dst, src, BPF_OP(code));
768 + break;
769 + /* dst = htole(dst) */
770 + /* dst = htobe(dst) */
771 + case BPF_ALU | BPF_END | BPF_FROM_LE:
772 + case BPF_ALU | BPF_END | BPF_FROM_BE:
773 + if (BPF_SRC(code) ==
774 +#ifdef __BIG_ENDIAN
775 + BPF_FROM_LE
776 +#else
777 + BPF_FROM_BE
778 +#endif
779 + )
780 + emit_bswap_r64(ctx, dst, imm);
781 + else
782 + emit_trunc_r64(ctx, dst, imm);
783 + break;
784 + /* dst = imm64 */
785 + case BPF_LD | BPF_IMM | BPF_DW:
786 + emit_mov_i64(ctx, dst, (u32)imm | ((u64)insn[1].imm << 32));
787 + return 1;
788 + /* LDX: dst = *(size *)(src + off) */
789 + case BPF_LDX | BPF_MEM | BPF_W:
790 + case BPF_LDX | BPF_MEM | BPF_H:
791 + case BPF_LDX | BPF_MEM | BPF_B:
792 + case BPF_LDX | BPF_MEM | BPF_DW:
793 + emit_ldx(ctx, dst, src, off, BPF_SIZE(code));
794 + break;
795 + /* ST: *(size *)(dst + off) = imm */
796 + case BPF_ST | BPF_MEM | BPF_W:
797 + case BPF_ST | BPF_MEM | BPF_H:
798 + case BPF_ST | BPF_MEM | BPF_B:
799 + case BPF_ST | BPF_MEM | BPF_DW:
800 + emit_mov_i(ctx, MIPS_R_T4, imm);
801 + emit_stx(ctx, dst, MIPS_R_T4, off, BPF_SIZE(code));
802 + break;
803 + /* STX: *(size *)(dst + off) = src */
804 + case BPF_STX | BPF_MEM | BPF_W:
805 + case BPF_STX | BPF_MEM | BPF_H:
806 + case BPF_STX | BPF_MEM | BPF_B:
807 + case BPF_STX | BPF_MEM | BPF_DW:
808 + emit_stx(ctx, dst, src, off, BPF_SIZE(code));
809 + break;
810 + /* Speculation barrier */
811 + case BPF_ST | BPF_NOSPEC:
812 + break;
813 + /* Atomics */
814 + case BPF_STX | BPF_XADD | BPF_W:
815 + case BPF_STX | BPF_XADD | BPF_DW:
816 + switch (imm) {
817 + case BPF_ADD:
818 + case BPF_AND:
819 + case BPF_OR:
820 + case BPF_XOR:
821 + if (BPF_SIZE(code) == BPF_DW) {
822 + emit_atomic_r64(ctx, dst, src, off, imm);
823 + } else { /* 32-bit, no fetch */
824 + emit_sext(ctx, MIPS_R_T4, src);
825 + emit_atomic_r(ctx, dst, MIPS_R_T4, off, imm);
826 + }
827 + break;
828 + default:
829 + goto notyet;
830 + }
831 + break;
832 + /* PC += off if dst == src */
833 + /* PC += off if dst != src */
834 + /* PC += off if dst & src */
835 + /* PC += off if dst > src */
836 + /* PC += off if dst >= src */
837 + /* PC += off if dst < src */
838 + /* PC += off if dst <= src */
839 + /* PC += off if dst > src (signed) */
840 + /* PC += off if dst >= src (signed) */
841 + /* PC += off if dst < src (signed) */
842 + /* PC += off if dst <= src (signed) */
843 + case BPF_JMP32 | BPF_JEQ | BPF_X:
844 + case BPF_JMP32 | BPF_JNE | BPF_X:
845 + case BPF_JMP32 | BPF_JSET | BPF_X:
846 + case BPF_JMP32 | BPF_JGT | BPF_X:
847 + case BPF_JMP32 | BPF_JGE | BPF_X:
848 + case BPF_JMP32 | BPF_JLT | BPF_X:
849 + case BPF_JMP32 | BPF_JLE | BPF_X:
850 + case BPF_JMP32 | BPF_JSGT | BPF_X:
851 + case BPF_JMP32 | BPF_JSGE | BPF_X:
852 + case BPF_JMP32 | BPF_JSLT | BPF_X:
853 + case BPF_JMP32 | BPF_JSLE | BPF_X:
854 + if (off == 0)
855 + break;
856 + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
857 + emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
858 + emit_sext(ctx, MIPS_R_T5, src); /* Sign-extended src */
859 + emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
860 + if (finish_jmp(ctx, jmp, off) < 0)
861 + goto toofar;
862 + break;
863 + /* PC += off if dst == imm */
864 + /* PC += off if dst != imm */
865 + /* PC += off if dst & imm */
866 + /* PC += off if dst > imm */
867 + /* PC += off if dst >= imm */
868 + /* PC += off if dst < imm */
869 + /* PC += off if dst <= imm */
870 + /* PC += off if dst > imm (signed) */
871 + /* PC += off if dst >= imm (signed) */
872 + /* PC += off if dst < imm (signed) */
873 + /* PC += off if dst <= imm (signed) */
874 + case BPF_JMP32 | BPF_JEQ | BPF_K:
875 + case BPF_JMP32 | BPF_JNE | BPF_K:
876 + case BPF_JMP32 | BPF_JSET | BPF_K:
877 + case BPF_JMP32 | BPF_JGT | BPF_K:
878 + case BPF_JMP32 | BPF_JGE | BPF_K:
879 + case BPF_JMP32 | BPF_JLT | BPF_K:
880 + case BPF_JMP32 | BPF_JLE | BPF_K:
881 + case BPF_JMP32 | BPF_JSGT | BPF_K:
882 + case BPF_JMP32 | BPF_JSGE | BPF_K:
883 + case BPF_JMP32 | BPF_JSLT | BPF_K:
884 + case BPF_JMP32 | BPF_JSLE | BPF_K:
885 + if (off == 0)
886 + break;
887 + setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
888 + emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
889 + if (valid_jmp_i(jmp, imm)) {
890 + emit_jmp_i(ctx, MIPS_R_T4, imm, rel, jmp);
891 + } else {
892 + /* Move large immediate to register, sign-extended */
893 + emit_mov_i(ctx, MIPS_R_T5, imm);
894 + emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
895 + }
896 + if (finish_jmp(ctx, jmp, off) < 0)
897 + goto toofar;
898 + break;
899 + /* PC += off if dst == src */
900 + /* PC += off if dst != src */
901 + /* PC += off if dst & src */
902 + /* PC += off if dst > src */
903 + /* PC += off if dst >= src */
904 + /* PC += off if dst < src */
905 + /* PC += off if dst <= src */
906 + /* PC += off if dst > src (signed) */
907 + /* PC += off if dst >= src (signed) */
908 + /* PC += off if dst < src (signed) */
909 + /* PC += off if dst <= src (signed) */
910 + case BPF_JMP | BPF_JEQ | BPF_X:
911 + case BPF_JMP | BPF_JNE | BPF_X:
912 + case BPF_JMP | BPF_JSET | BPF_X:
913 + case BPF_JMP | BPF_JGT | BPF_X:
914 + case BPF_JMP | BPF_JGE | BPF_X:
915 + case BPF_JMP | BPF_JLT | BPF_X:
916 + case BPF_JMP | BPF_JLE | BPF_X:
917 + case BPF_JMP | BPF_JSGT | BPF_X:
918 + case BPF_JMP | BPF_JSGE | BPF_X:
919 + case BPF_JMP | BPF_JSLT | BPF_X:
920 + case BPF_JMP | BPF_JSLE | BPF_X:
921 + if (off == 0)
922 + break;
923 + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
924 + emit_jmp_r(ctx, dst, src, rel, jmp);
925 + if (finish_jmp(ctx, jmp, off) < 0)
926 + goto toofar;
927 + break;
928 + /* PC += off if dst == imm */
929 + /* PC += off if dst != imm */
930 + /* PC += off if dst & imm */
931 + /* PC += off if dst > imm */
932 + /* PC += off if dst >= imm */
933 + /* PC += off if dst < imm */
934 + /* PC += off if dst <= imm */
935 + /* PC += off if dst > imm (signed) */
936 + /* PC += off if dst >= imm (signed) */
937 + /* PC += off if dst < imm (signed) */
938 + /* PC += off if dst <= imm (signed) */
939 + case BPF_JMP | BPF_JEQ | BPF_K:
940 + case BPF_JMP | BPF_JNE | BPF_K:
941 + case BPF_JMP | BPF_JSET | BPF_K:
942 + case BPF_JMP | BPF_JGT | BPF_K:
943 + case BPF_JMP | BPF_JGE | BPF_K:
944 + case BPF_JMP | BPF_JLT | BPF_K:
945 + case BPF_JMP | BPF_JLE | BPF_K:
946 + case BPF_JMP | BPF_JSGT | BPF_K:
947 + case BPF_JMP | BPF_JSGE | BPF_K:
948 + case BPF_JMP | BPF_JSLT | BPF_K:
949 + case BPF_JMP | BPF_JSLE | BPF_K:
950 + if (off == 0)
951 + break;
952 + setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
953 + if (valid_jmp_i(jmp, imm)) {
954 + emit_jmp_i(ctx, dst, imm, rel, jmp);
955 + } else {
956 + /* Move large immediate to register */
957 + emit_mov_i(ctx, MIPS_R_T4, imm);
958 + emit_jmp_r(ctx, dst, MIPS_R_T4, rel, jmp);
959 + }
960 + if (finish_jmp(ctx, jmp, off) < 0)
961 + goto toofar;
962 + break;
963 + /* PC += off */
964 + case BPF_JMP | BPF_JA:
965 + if (off == 0)
966 + break;
967 + if (emit_ja(ctx, off) < 0)
968 + goto toofar;
969 + break;
970 + /* Tail call */
971 + case BPF_JMP | BPF_TAIL_CALL:
972 + if (emit_tail_call(ctx) < 0)
973 + goto invalid;
974 + break;
975 + /* Function call */
976 + case BPF_JMP | BPF_CALL:
977 + if (emit_call(ctx, insn) < 0)
978 + goto invalid;
979 + break;
980 + /* Function return */
981 + case BPF_JMP | BPF_EXIT:
982 + /*
983 + * Optimization: when last instruction is EXIT
984 + * simply continue to epilogue.
985 + */
986 + if (ctx->bpf_index == ctx->program->len - 1)
987 + break;
988 + if (emit_exit(ctx) < 0)
989 + goto toofar;
990 + break;
991 +
992 + default:
993 +invalid:
994 + pr_err_once("unknown opcode %02x\n", code);
995 + return -EINVAL;
996 +notyet:
997 + pr_info_once("*** NOT YET: opcode %02x ***\n", code);
998 + return -EFAULT;
999 +toofar:
1000 + pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
1001 + ctx->bpf_index, code);
1002 + return -E2BIG;
1003 + }
1004 + return 0;
1005 +}