1 From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
2 Date: Tue, 5 Oct 2021 18:54:05 +0200
3 Subject: [PATCH] mips: bpf: Add new eBPF JIT for 64-bit MIPS
5 This is an implementation on of an eBPF JIT for 64-bit MIPS III-V and
6 MIPS64r1-r6. It uses the same framework introduced by the 32-bit JIT.
8 Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
10 create mode 100644 arch/mips/net/bpf_jit_comp64.c
13 +++ b/arch/mips/net/bpf_jit_comp64.c
15 +// SPDX-License-Identifier: GPL-2.0-only
17 + * Just-In-Time compiler for eBPF bytecode on MIPS.
18 + * Implementation of JIT functions for 64-bit CPUs.
20 + * Copyright (c) 2021 Anyfi Networks AB.
21 + * Author: Johan Almbladh <johan.almbladh@gmail.com>
23 + * Based on code and ideas from
24 + * Copyright (c) 2017 Cavium, Inc.
25 + * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
26 + * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
29 +#include <linux/errno.h>
30 +#include <linux/filter.h>
31 +#include <linux/bpf.h>
32 +#include <asm/cpu-features.h>
33 +#include <asm/isa-rev.h>
34 +#include <asm/uasm.h>
36 +#include "bpf_jit_comp.h"
38 +/* MIPS t0-t3 are not available in the n64 ABI */
44 +/* Stack is 16-byte aligned in n64 ABI */
45 +#define MIPS_STACK_ALIGNMENT 16
47 +/* Extra 64-bit eBPF registers used by JIT */
48 +#define JIT_REG_TC (MAX_BPF_JIT_REG + 0)
49 +#define JIT_REG_ZX (MAX_BPF_JIT_REG + 1)
51 +/* Number of prologue bytes to skip when doing a tail call */
52 +#define JIT_TCALL_SKIP 4
54 +/* Callee-saved CPU registers that the JIT must preserve */
55 +#define JIT_CALLEE_REGS \
68 +/* Caller-saved CPU registers available for JIT use */
69 +#define JIT_CALLER_REGS \
74 + * Mapping of 64-bit eBPF registers to 64-bit native MIPS registers.
75 + * MIPS registers t4 - t7 may be used by the JIT as temporary registers.
76 + * MIPS registers t8 - t9 are reserved for single-register common functions.
78 +static const u8 bpf2mips64[] = {
79 + /* Return value from in-kernel function, and exit value from eBPF */
80 + [BPF_REG_0] = MIPS_R_V0,
81 + /* Arguments from eBPF program to in-kernel function */
82 + [BPF_REG_1] = MIPS_R_A0,
83 + [BPF_REG_2] = MIPS_R_A1,
84 + [BPF_REG_3] = MIPS_R_A2,
85 + [BPF_REG_4] = MIPS_R_A3,
86 + [BPF_REG_5] = MIPS_R_A4,
87 + /* Callee-saved registers that in-kernel function will preserve */
88 + [BPF_REG_6] = MIPS_R_S0,
89 + [BPF_REG_7] = MIPS_R_S1,
90 + [BPF_REG_8] = MIPS_R_S2,
91 + [BPF_REG_9] = MIPS_R_S3,
92 + /* Read-only frame pointer to access the eBPF stack */
93 + [BPF_REG_FP] = MIPS_R_FP,
94 + /* Temporary register for blinding constants */
95 + [BPF_REG_AX] = MIPS_R_AT,
96 + /* Tail call count register, caller-saved */
97 + [JIT_REG_TC] = MIPS_R_A5,
98 + /* Constant for register zero-extension */
99 + [JIT_REG_ZX] = MIPS_R_V1,
103 + * MIPS 32-bit operations on 64-bit registers generate a sign-extended
104 + * result. However, the eBPF ISA mandates zero-extension, so we rely on the
105 + * verifier to add that for us (emit_zext_ver). In addition, ALU arithmetic
106 + * operations, right shift and byte swap require properly sign-extended
107 + * operands or the result is unpredictable. We emit explicit sign-extensions
111 +/* Sign extension */
112 +static void emit_sext(struct jit_context *ctx, u8 dst, u8 src)
114 + emit(ctx, sll, dst, src, 0);
115 + clobber_reg(ctx, dst);
118 +/* Zero extension */
119 +static void emit_zext(struct jit_context *ctx, u8 dst)
121 + if (cpu_has_mips64r2 || cpu_has_mips64r6) {
122 + emit(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
124 + emit(ctx, and, dst, dst, bpf2mips64[JIT_REG_ZX]);
125 + access_reg(ctx, JIT_REG_ZX); /* We need the ZX register */
127 + clobber_reg(ctx, dst);
130 +/* Zero extension, if verifier does not do it for us */
131 +static void emit_zext_ver(struct jit_context *ctx, u8 dst)
133 + if (!ctx->program->aux->verifier_zext)
134 + emit_zext(ctx, dst);
137 +/* dst = imm (64-bit) */
138 +static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64)
140 + if (imm64 >= 0xffffffffffff8000ULL || imm64 < 0x8000ULL) {
141 + emit(ctx, daddiu, dst, MIPS_R_ZERO, (s16)imm64);
142 + } else if (imm64 >= 0xffffffff80000000ULL ||
143 + (imm64 < 0x80000000 && imm64 > 0xffff)) {
144 + emit(ctx, lui, dst, (s16)(imm64 >> 16));
145 + emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff);
147 + u8 acc = MIPS_R_ZERO;
150 + for (k = 0; k < 4; k++) {
151 + u16 half = imm64 >> (48 - 16 * k);
154 + emit(ctx, dsll, dst, dst, 16);
157 + emit(ctx, ori, dst, acc, half);
162 + clobber_reg(ctx, dst);
165 +/* ALU immediate operation (64-bit) */
166 +static void emit_alu_i64(struct jit_context *ctx, u8 dst, s32 imm, u8 op)
168 + switch (BPF_OP(op)) {
169 + /* dst = dst | imm */
171 + emit(ctx, ori, dst, dst, (u16)imm);
173 + /* dst = dst ^ imm */
175 + emit(ctx, xori, dst, dst, (u16)imm);
179 + emit(ctx, dsubu, dst, MIPS_R_ZERO, dst);
181 + /* dst = dst << imm */
183 + emit(ctx, dsll_safe, dst, dst, imm);
185 + /* dst = dst >> imm */
187 + emit(ctx, dsrl_safe, dst, dst, imm);
189 + /* dst = dst >> imm (arithmetic) */
191 + emit(ctx, dsra_safe, dst, dst, imm);
193 + /* dst = dst + imm */
195 + emit(ctx, daddiu, dst, dst, imm);
197 + /* dst = dst - imm */
199 + emit(ctx, daddiu, dst, dst, -imm);
202 + /* Width-generic operations */
203 + emit_alu_i(ctx, dst, imm, op);
205 + clobber_reg(ctx, dst);
208 +/* ALU register operation (64-bit) */
209 +static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
211 + switch (BPF_OP(op)) {
212 + /* dst = dst << src */
214 + emit(ctx, dsllv, dst, dst, src);
216 + /* dst = dst >> src */
218 + emit(ctx, dsrlv, dst, dst, src);
220 + /* dst = dst >> src (arithmetic) */
222 + emit(ctx, dsrav, dst, dst, src);
224 + /* dst = dst + src */
226 + emit(ctx, daddu, dst, dst, src);
228 + /* dst = dst - src */
230 + emit(ctx, dsubu, dst, dst, src);
232 + /* dst = dst * src */
234 + if (cpu_has_mips64r6) {
235 + emit(ctx, dmulu, dst, dst, src);
237 + emit(ctx, dmultu, dst, src);
238 + emit(ctx, mflo, dst);
241 + /* dst = dst / src */
243 + if (cpu_has_mips64r6) {
244 + emit(ctx, ddivu_r6, dst, dst, src);
246 + emit(ctx, ddivu, dst, src);
247 + emit(ctx, mflo, dst);
250 + /* dst = dst % src */
252 + if (cpu_has_mips64r6) {
253 + emit(ctx, dmodu, dst, dst, src);
255 + emit(ctx, ddivu, dst, src);
256 + emit(ctx, mfhi, dst);
260 + /* Width-generic operations */
261 + emit_alu_r(ctx, dst, src, op);
263 + clobber_reg(ctx, dst);
266 +/* Swap sub words in a register double word */
267 +static void emit_swap_r64(struct jit_context *ctx, u8 dst, u8 mask, u32 bits)
269 + u8 tmp = MIPS_R_T9;
271 + emit(ctx, and, tmp, dst, mask); /* tmp = dst & mask */
272 + emit(ctx, dsll, tmp, tmp, bits); /* tmp = tmp << bits */
273 + emit(ctx, dsrl, dst, dst, bits); /* dst = dst >> bits */
274 + emit(ctx, and, dst, dst, mask); /* dst = dst & mask */
275 + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
278 +/* Swap bytes and truncate a register double word, word or half word */
279 +static void emit_bswap_r64(struct jit_context *ctx, u8 dst, u32 width)
282 + /* Swap bytes in a double word */
284 + if (cpu_has_mips64r2 || cpu_has_mips64r6) {
285 + emit(ctx, dsbh, dst, dst);
286 + emit(ctx, dshd, dst, dst);
291 + emit(ctx, dsll32, t2, dst, 0); /* t2 = dst << 32 */
292 + emit(ctx, dsrl32, dst, dst, 0); /* dst = dst >> 32 */
293 + emit(ctx, or, dst, dst, t2); /* dst = dst | t2 */
295 + emit(ctx, ori, t2, MIPS_R_ZERO, 0xffff);
296 + emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */
297 + emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */
298 + emit_swap_r64(ctx, dst, t1, 16);/* dst = swap16(dst) */
300 + emit(ctx, lui, t2, 0xff); /* t2 = 0x00ff0000 */
301 + emit(ctx, ori, t2, t2, 0xff); /* t2 = t2 | 0x00ff */
302 + emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */
303 + emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */
304 + emit_swap_r64(ctx, dst, t1, 8); /* dst = swap8(dst) */
307 + /* Swap bytes in a half word */
308 + /* Swap bytes in a word */
311 + emit_sext(ctx, dst, dst);
312 + emit_bswap_r(ctx, dst, width);
313 + if (cpu_has_mips64r2 || cpu_has_mips64r6)
314 + emit_zext(ctx, dst);
317 + clobber_reg(ctx, dst);
320 +/* Truncate a register double word, word or half word */
321 +static void emit_trunc_r64(struct jit_context *ctx, u8 dst, u32 width)
326 + /* Zero-extend a word */
328 + emit_zext(ctx, dst);
330 + /* Zero-extend a half word */
332 + emit(ctx, andi, dst, dst, 0xffff);
335 + clobber_reg(ctx, dst);
338 +/* Load operation: dst = *(size*)(src + off) */
339 +static void emit_ldx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
344 + emit(ctx, lbu, dst, off, src);
346 + /* Load a half word */
348 + emit(ctx, lhu, dst, off, src);
352 + emit(ctx, lwu, dst, off, src);
354 + /* Load a double word */
356 + emit(ctx, ld, dst, off, src);
359 + clobber_reg(ctx, dst);
362 +/* Store operation: *(size *)(dst + off) = src */
363 +static void emit_stx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
368 + emit(ctx, sb, src, off, dst);
370 + /* Store a half word */
372 + emit(ctx, sh, src, off, dst);
376 + emit(ctx, sw, src, off, dst);
378 + /* Store a double word */
380 + emit(ctx, sd, src, off, dst);
385 +/* Atomic read-modify-write */
386 +static void emit_atomic_r64(struct jit_context *ctx,
387 + u8 dst, u8 src, s16 off, u8 code)
392 + emit(ctx, lld, t1, off, dst);
395 + emit(ctx, daddu, t2, t1, src);
398 + emit(ctx, and, t2, t1, src);
401 + emit(ctx, or, t2, t1, src);
404 + emit(ctx, xor, t2, t1, src);
407 + emit(ctx, scd, t2, off, dst);
408 + emit(ctx, beqz, t2, -16);
409 + emit(ctx, nop); /* Delay slot */
413 +static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
415 + u8 zx = bpf2mips64[JIT_REG_ZX];
416 + u8 tmp = MIPS_R_T6;
420 + /* Decode the call address */
421 + if (bpf_jit_get_func_addr(ctx->program, insn, false,
422 + &addr, &fixed) < 0)
427 + /* Push caller-saved registers on stack */
428 + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
430 + /* Emit function call */
431 + emit_mov_i64(ctx, tmp, addr);
432 + emit(ctx, jalr, MIPS_R_RA, tmp);
433 + emit(ctx, nop); /* Delay slot */
435 + /* Restore caller-saved registers */
436 + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
438 + /* Re-initialize the JIT zero-extension register if accessed */
439 + if (ctx->accessed & BIT(JIT_REG_ZX)) {
440 + emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
441 + emit(ctx, dsrl32, zx, zx, 0);
444 + clobber_reg(ctx, MIPS_R_RA);
445 + clobber_reg(ctx, MIPS_R_V0);
446 + clobber_reg(ctx, MIPS_R_V1);
450 +/* Function tail call */
451 +static int emit_tail_call(struct jit_context *ctx)
453 + u8 ary = bpf2mips64[BPF_REG_2];
454 + u8 ind = bpf2mips64[BPF_REG_3];
455 + u8 tcc = bpf2mips64[JIT_REG_TC];
456 + u8 tmp = MIPS_R_T6;
461 + * eBPF R1 - function argument (context ptr), passed in a0-a1
462 + * eBPF R2 - ptr to object with array of function entry points
463 + * eBPF R3 - array index of function to be called
466 + /* if (ind >= ary->map.max_entries) goto out */
467 + off = offsetof(struct bpf_array, map.max_entries);
470 + emit(ctx, lwu, tmp, off, ary); /* tmp = ary->map.max_entrs*/
471 + emit(ctx, sltu, tmp, ind, tmp); /* tmp = ind < t1 */
472 + emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
474 + /* if (--TCC < 0) goto out */
475 + emit(ctx, daddiu, tcc, tcc, -1); /* tcc-- (delay slot) */
476 + emit(ctx, bltz, tcc, get_offset(ctx, 1)); /* PC += off(1) if tcc < 0 */
477 + /* (next insn delay slot) */
478 + /* prog = ary->ptrs[ind] */
479 + off = offsetof(struct bpf_array, ptrs);
482 + emit(ctx, dsll, tmp, ind, 3); /* tmp = ind << 3 */
483 + emit(ctx, daddu, tmp, tmp, ary); /* tmp += ary */
484 + emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */
486 + /* if (prog == 0) goto out */
487 + emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
488 + emit(ctx, nop); /* Delay slot */
490 + /* func = prog->bpf_func + 8 (prologue skip offset) */
491 + off = offsetof(struct bpf_prog, bpf_func);
494 + emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */
495 + emit(ctx, daddiu, tmp, tmp, JIT_TCALL_SKIP); /* tmp += skip (4) */
498 + build_epilogue(ctx, tmp);
499 + access_reg(ctx, JIT_REG_TC);
504 + * Stack frame layout for a JITed program (stack grows down).
506 + * Higher address : Previous stack frame :
507 + * +===========================+ <--- MIPS sp before call
508 + * | Callee-saved registers, |
509 + * | including RA and FP |
510 + * +---------------------------+ <--- eBPF FP (MIPS fp)
511 + * | Local eBPF variables |
512 + * | allocated by program |
513 + * +---------------------------+
514 + * | Reserved for caller-saved |
516 + * Lower address +===========================+ <--- MIPS sp
519 +/* Build program prologue to set up the stack and registers */
520 +void build_prologue(struct jit_context *ctx)
522 + u8 fp = bpf2mips64[BPF_REG_FP];
523 + u8 tc = bpf2mips64[JIT_REG_TC];
524 + u8 zx = bpf2mips64[JIT_REG_ZX];
525 + int stack, saved, locals, reserved;
528 + * The first instruction initializes the tail call count register.
529 + * On a tail call, the calling function jumps into the prologue
530 + * after this instruction.
532 + emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff));
534 + /* === Entry-point for tail calls === */
537 + * If the eBPF frame pointer and tail call count registers were
538 + * accessed they must be preserved. Mark them as clobbered here
539 + * to save and restore them on the stack as needed.
541 + if (ctx->accessed & BIT(BPF_REG_FP))
542 + clobber_reg(ctx, fp);
543 + if (ctx->accessed & BIT(JIT_REG_TC))
544 + clobber_reg(ctx, tc);
545 + if (ctx->accessed & BIT(JIT_REG_ZX))
546 + clobber_reg(ctx, zx);
548 + /* Compute the stack space needed for callee-saved registers */
549 + saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u64);
550 + saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
552 + /* Stack space used by eBPF program local data */
553 + locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
556 + * If we are emitting function calls, reserve extra stack space for
557 + * caller-saved registers needed by the JIT. The required space is
558 + * computed automatically during resource usage discovery (pass 1).
560 + reserved = ctx->stack_used;
562 + /* Allocate the stack frame */
563 + stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
565 + emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack);
567 + /* Store callee-saved registers on stack */
568 + push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
570 + /* Initialize the eBPF frame pointer if accessed */
571 + if (ctx->accessed & BIT(BPF_REG_FP))
572 + emit(ctx, daddiu, fp, MIPS_R_SP, stack - saved);
574 + /* Initialize the ePF JIT zero-extension register if accessed */
575 + if (ctx->accessed & BIT(JIT_REG_ZX)) {
576 + emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
577 + emit(ctx, dsrl32, zx, zx, 0);
580 + ctx->saved_size = saved;
581 + ctx->stack_size = stack;
584 +/* Build the program epilogue to restore the stack and registers */
585 +void build_epilogue(struct jit_context *ctx, int dest_reg)
587 + /* Restore callee-saved registers from stack */
588 + pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
589 + ctx->stack_size - ctx->saved_size);
591 + /* Release the stack frame */
592 + if (ctx->stack_size)
593 + emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
595 + /* Jump to return address and sign-extend the 32-bit return value */
596 + emit(ctx, jr, dest_reg);
597 + emit(ctx, sll, MIPS_R_V0, MIPS_R_V0, 0); /* Delay slot */
600 +/* Build one eBPF instruction */
601 +int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
603 + u8 dst = bpf2mips64[insn->dst_reg];
604 + u8 src = bpf2mips64[insn->src_reg];
605 + u8 code = insn->code;
606 + s16 off = insn->off;
607 + s32 imm = insn->imm;
612 + /* ALU operations */
614 + case BPF_ALU | BPF_MOV | BPF_K:
615 + emit_mov_i(ctx, dst, imm);
616 + emit_zext_ver(ctx, dst);
619 + case BPF_ALU | BPF_MOV | BPF_X:
621 + /* Special mov32 for zext */
622 + emit_zext(ctx, dst);
624 + emit_mov_r(ctx, dst, src);
625 + emit_zext_ver(ctx, dst);
629 + case BPF_ALU | BPF_NEG:
630 + emit_sext(ctx, dst, dst);
631 + emit_alu_i(ctx, dst, 0, BPF_NEG);
632 + emit_zext_ver(ctx, dst);
634 + /* dst = dst & imm */
635 + /* dst = dst | imm */
636 + /* dst = dst ^ imm */
637 + /* dst = dst << imm */
638 + case BPF_ALU | BPF_OR | BPF_K:
639 + case BPF_ALU | BPF_AND | BPF_K:
640 + case BPF_ALU | BPF_XOR | BPF_K:
641 + case BPF_ALU | BPF_LSH | BPF_K:
642 + if (!valid_alu_i(BPF_OP(code), imm)) {
643 + emit_mov_i(ctx, MIPS_R_T4, imm);
644 + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
645 + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
646 + emit_alu_i(ctx, dst, val, alu);
648 + emit_zext_ver(ctx, dst);
650 + /* dst = dst >> imm */
651 + /* dst = dst >> imm (arithmetic) */
652 + /* dst = dst + imm */
653 + /* dst = dst - imm */
654 + /* dst = dst * imm */
655 + /* dst = dst / imm */
656 + /* dst = dst % imm */
657 + case BPF_ALU | BPF_RSH | BPF_K:
658 + case BPF_ALU | BPF_ARSH | BPF_K:
659 + case BPF_ALU | BPF_ADD | BPF_K:
660 + case BPF_ALU | BPF_SUB | BPF_K:
661 + case BPF_ALU | BPF_MUL | BPF_K:
662 + case BPF_ALU | BPF_DIV | BPF_K:
663 + case BPF_ALU | BPF_MOD | BPF_K:
664 + if (!valid_alu_i(BPF_OP(code), imm)) {
665 + emit_sext(ctx, dst, dst);
666 + emit_mov_i(ctx, MIPS_R_T4, imm);
667 + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
668 + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
669 + emit_sext(ctx, dst, dst);
670 + emit_alu_i(ctx, dst, val, alu);
672 + emit_zext_ver(ctx, dst);
674 + /* dst = dst & src */
675 + /* dst = dst | src */
676 + /* dst = dst ^ src */
677 + /* dst = dst << src */
678 + case BPF_ALU | BPF_AND | BPF_X:
679 + case BPF_ALU | BPF_OR | BPF_X:
680 + case BPF_ALU | BPF_XOR | BPF_X:
681 + case BPF_ALU | BPF_LSH | BPF_X:
682 + emit_alu_r(ctx, dst, src, BPF_OP(code));
683 + emit_zext_ver(ctx, dst);
685 + /* dst = dst >> src */
686 + /* dst = dst >> src (arithmetic) */
687 + /* dst = dst + src */
688 + /* dst = dst - src */
689 + /* dst = dst * src */
690 + /* dst = dst / src */
691 + /* dst = dst % src */
692 + case BPF_ALU | BPF_RSH | BPF_X:
693 + case BPF_ALU | BPF_ARSH | BPF_X:
694 + case BPF_ALU | BPF_ADD | BPF_X:
695 + case BPF_ALU | BPF_SUB | BPF_X:
696 + case BPF_ALU | BPF_MUL | BPF_X:
697 + case BPF_ALU | BPF_DIV | BPF_X:
698 + case BPF_ALU | BPF_MOD | BPF_X:
699 + emit_sext(ctx, dst, dst);
700 + emit_sext(ctx, MIPS_R_T4, src);
701 + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
702 + emit_zext_ver(ctx, dst);
704 + /* dst = imm (64-bit) */
705 + case BPF_ALU64 | BPF_MOV | BPF_K:
706 + emit_mov_i(ctx, dst, imm);
708 + /* dst = src (64-bit) */
709 + case BPF_ALU64 | BPF_MOV | BPF_X:
710 + emit_mov_r(ctx, dst, src);
712 + /* dst = -dst (64-bit) */
713 + case BPF_ALU64 | BPF_NEG:
714 + emit_alu_i64(ctx, dst, 0, BPF_NEG);
716 + /* dst = dst & imm (64-bit) */
717 + /* dst = dst | imm (64-bit) */
718 + /* dst = dst ^ imm (64-bit) */
719 + /* dst = dst << imm (64-bit) */
720 + /* dst = dst >> imm (64-bit) */
721 + /* dst = dst >> imm ((64-bit, arithmetic) */
722 + /* dst = dst + imm (64-bit) */
723 + /* dst = dst - imm (64-bit) */
724 + /* dst = dst * imm (64-bit) */
725 + /* dst = dst / imm (64-bit) */
726 + /* dst = dst % imm (64-bit) */
727 + case BPF_ALU64 | BPF_AND | BPF_K:
728 + case BPF_ALU64 | BPF_OR | BPF_K:
729 + case BPF_ALU64 | BPF_XOR | BPF_K:
730 + case BPF_ALU64 | BPF_LSH | BPF_K:
731 + case BPF_ALU64 | BPF_RSH | BPF_K:
732 + case BPF_ALU64 | BPF_ARSH | BPF_K:
733 + case BPF_ALU64 | BPF_ADD | BPF_K:
734 + case BPF_ALU64 | BPF_SUB | BPF_K:
735 + case BPF_ALU64 | BPF_MUL | BPF_K:
736 + case BPF_ALU64 | BPF_DIV | BPF_K:
737 + case BPF_ALU64 | BPF_MOD | BPF_K:
738 + if (!valid_alu_i(BPF_OP(code), imm)) {
739 + emit_mov_i(ctx, MIPS_R_T4, imm);
740 + emit_alu_r64(ctx, dst, MIPS_R_T4, BPF_OP(code));
741 + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
742 + emit_alu_i64(ctx, dst, val, alu);
745 + /* dst = dst & src (64-bit) */
746 + /* dst = dst | src (64-bit) */
747 + /* dst = dst ^ src (64-bit) */
748 + /* dst = dst << src (64-bit) */
749 + /* dst = dst >> src (64-bit) */
750 + /* dst = dst >> src (64-bit, arithmetic) */
751 + /* dst = dst + src (64-bit) */
752 + /* dst = dst - src (64-bit) */
753 + /* dst = dst * src (64-bit) */
754 + /* dst = dst / src (64-bit) */
755 + /* dst = dst % src (64-bit) */
756 + case BPF_ALU64 | BPF_AND | BPF_X:
757 + case BPF_ALU64 | BPF_OR | BPF_X:
758 + case BPF_ALU64 | BPF_XOR | BPF_X:
759 + case BPF_ALU64 | BPF_LSH | BPF_X:
760 + case BPF_ALU64 | BPF_RSH | BPF_X:
761 + case BPF_ALU64 | BPF_ARSH | BPF_X:
762 + case BPF_ALU64 | BPF_ADD | BPF_X:
763 + case BPF_ALU64 | BPF_SUB | BPF_X:
764 + case BPF_ALU64 | BPF_MUL | BPF_X:
765 + case BPF_ALU64 | BPF_DIV | BPF_X:
766 + case BPF_ALU64 | BPF_MOD | BPF_X:
767 + emit_alu_r64(ctx, dst, src, BPF_OP(code));
769 + /* dst = htole(dst) */
770 + /* dst = htobe(dst) */
771 + case BPF_ALU | BPF_END | BPF_FROM_LE:
772 + case BPF_ALU | BPF_END | BPF_FROM_BE:
773 + if (BPF_SRC(code) ==
780 + emit_bswap_r64(ctx, dst, imm);
782 + emit_trunc_r64(ctx, dst, imm);
785 + case BPF_LD | BPF_IMM | BPF_DW:
786 + emit_mov_i64(ctx, dst, (u32)imm | ((u64)insn[1].imm << 32));
788 + /* LDX: dst = *(size *)(src + off) */
789 + case BPF_LDX | BPF_MEM | BPF_W:
790 + case BPF_LDX | BPF_MEM | BPF_H:
791 + case BPF_LDX | BPF_MEM | BPF_B:
792 + case BPF_LDX | BPF_MEM | BPF_DW:
793 + emit_ldx(ctx, dst, src, off, BPF_SIZE(code));
795 + /* ST: *(size *)(dst + off) = imm */
796 + case BPF_ST | BPF_MEM | BPF_W:
797 + case BPF_ST | BPF_MEM | BPF_H:
798 + case BPF_ST | BPF_MEM | BPF_B:
799 + case BPF_ST | BPF_MEM | BPF_DW:
800 + emit_mov_i(ctx, MIPS_R_T4, imm);
801 + emit_stx(ctx, dst, MIPS_R_T4, off, BPF_SIZE(code));
803 + /* STX: *(size *)(dst + off) = src */
804 + case BPF_STX | BPF_MEM | BPF_W:
805 + case BPF_STX | BPF_MEM | BPF_H:
806 + case BPF_STX | BPF_MEM | BPF_B:
807 + case BPF_STX | BPF_MEM | BPF_DW:
808 + emit_stx(ctx, dst, src, off, BPF_SIZE(code));
810 + /* Speculation barrier */
811 + case BPF_ST | BPF_NOSPEC:
814 + case BPF_STX | BPF_XADD | BPF_W:
815 + case BPF_STX | BPF_XADD | BPF_DW:
821 + if (BPF_SIZE(code) == BPF_DW) {
822 + emit_atomic_r64(ctx, dst, src, off, imm);
823 + } else { /* 32-bit, no fetch */
824 + emit_sext(ctx, MIPS_R_T4, src);
825 + emit_atomic_r(ctx, dst, MIPS_R_T4, off, imm);
832 + /* PC += off if dst == src */
833 + /* PC += off if dst != src */
834 + /* PC += off if dst & src */
835 + /* PC += off if dst > src */
836 + /* PC += off if dst >= src */
837 + /* PC += off if dst < src */
838 + /* PC += off if dst <= src */
839 + /* PC += off if dst > src (signed) */
840 + /* PC += off if dst >= src (signed) */
841 + /* PC += off if dst < src (signed) */
842 + /* PC += off if dst <= src (signed) */
843 + case BPF_JMP32 | BPF_JEQ | BPF_X:
844 + case BPF_JMP32 | BPF_JNE | BPF_X:
845 + case BPF_JMP32 | BPF_JSET | BPF_X:
846 + case BPF_JMP32 | BPF_JGT | BPF_X:
847 + case BPF_JMP32 | BPF_JGE | BPF_X:
848 + case BPF_JMP32 | BPF_JLT | BPF_X:
849 + case BPF_JMP32 | BPF_JLE | BPF_X:
850 + case BPF_JMP32 | BPF_JSGT | BPF_X:
851 + case BPF_JMP32 | BPF_JSGE | BPF_X:
852 + case BPF_JMP32 | BPF_JSLT | BPF_X:
853 + case BPF_JMP32 | BPF_JSLE | BPF_X:
856 + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
857 + emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
858 + emit_sext(ctx, MIPS_R_T5, src); /* Sign-extended src */
859 + emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
860 + if (finish_jmp(ctx, jmp, off) < 0)
863 + /* PC += off if dst == imm */
864 + /* PC += off if dst != imm */
865 + /* PC += off if dst & imm */
866 + /* PC += off if dst > imm */
867 + /* PC += off if dst >= imm */
868 + /* PC += off if dst < imm */
869 + /* PC += off if dst <= imm */
870 + /* PC += off if dst > imm (signed) */
871 + /* PC += off if dst >= imm (signed) */
872 + /* PC += off if dst < imm (signed) */
873 + /* PC += off if dst <= imm (signed) */
874 + case BPF_JMP32 | BPF_JEQ | BPF_K:
875 + case BPF_JMP32 | BPF_JNE | BPF_K:
876 + case BPF_JMP32 | BPF_JSET | BPF_K:
877 + case BPF_JMP32 | BPF_JGT | BPF_K:
878 + case BPF_JMP32 | BPF_JGE | BPF_K:
879 + case BPF_JMP32 | BPF_JLT | BPF_K:
880 + case BPF_JMP32 | BPF_JLE | BPF_K:
881 + case BPF_JMP32 | BPF_JSGT | BPF_K:
882 + case BPF_JMP32 | BPF_JSGE | BPF_K:
883 + case BPF_JMP32 | BPF_JSLT | BPF_K:
884 + case BPF_JMP32 | BPF_JSLE | BPF_K:
887 + setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
888 + emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
889 + if (valid_jmp_i(jmp, imm)) {
890 + emit_jmp_i(ctx, MIPS_R_T4, imm, rel, jmp);
892 + /* Move large immediate to register, sign-extended */
893 + emit_mov_i(ctx, MIPS_R_T5, imm);
894 + emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
896 + if (finish_jmp(ctx, jmp, off) < 0)
899 + /* PC += off if dst == src */
900 + /* PC += off if dst != src */
901 + /* PC += off if dst & src */
902 + /* PC += off if dst > src */
903 + /* PC += off if dst >= src */
904 + /* PC += off if dst < src */
905 + /* PC += off if dst <= src */
906 + /* PC += off if dst > src (signed) */
907 + /* PC += off if dst >= src (signed) */
908 + /* PC += off if dst < src (signed) */
909 + /* PC += off if dst <= src (signed) */
910 + case BPF_JMP | BPF_JEQ | BPF_X:
911 + case BPF_JMP | BPF_JNE | BPF_X:
912 + case BPF_JMP | BPF_JSET | BPF_X:
913 + case BPF_JMP | BPF_JGT | BPF_X:
914 + case BPF_JMP | BPF_JGE | BPF_X:
915 + case BPF_JMP | BPF_JLT | BPF_X:
916 + case BPF_JMP | BPF_JLE | BPF_X:
917 + case BPF_JMP | BPF_JSGT | BPF_X:
918 + case BPF_JMP | BPF_JSGE | BPF_X:
919 + case BPF_JMP | BPF_JSLT | BPF_X:
920 + case BPF_JMP | BPF_JSLE | BPF_X:
923 + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
924 + emit_jmp_r(ctx, dst, src, rel, jmp);
925 + if (finish_jmp(ctx, jmp, off) < 0)
928 + /* PC += off if dst == imm */
929 + /* PC += off if dst != imm */
930 + /* PC += off if dst & imm */
931 + /* PC += off if dst > imm */
932 + /* PC += off if dst >= imm */
933 + /* PC += off if dst < imm */
934 + /* PC += off if dst <= imm */
935 + /* PC += off if dst > imm (signed) */
936 + /* PC += off if dst >= imm (signed) */
937 + /* PC += off if dst < imm (signed) */
938 + /* PC += off if dst <= imm (signed) */
939 + case BPF_JMP | BPF_JEQ | BPF_K:
940 + case BPF_JMP | BPF_JNE | BPF_K:
941 + case BPF_JMP | BPF_JSET | BPF_K:
942 + case BPF_JMP | BPF_JGT | BPF_K:
943 + case BPF_JMP | BPF_JGE | BPF_K:
944 + case BPF_JMP | BPF_JLT | BPF_K:
945 + case BPF_JMP | BPF_JLE | BPF_K:
946 + case BPF_JMP | BPF_JSGT | BPF_K:
947 + case BPF_JMP | BPF_JSGE | BPF_K:
948 + case BPF_JMP | BPF_JSLT | BPF_K:
949 + case BPF_JMP | BPF_JSLE | BPF_K:
952 + setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
953 + if (valid_jmp_i(jmp, imm)) {
954 + emit_jmp_i(ctx, dst, imm, rel, jmp);
956 + /* Move large immediate to register */
957 + emit_mov_i(ctx, MIPS_R_T4, imm);
958 + emit_jmp_r(ctx, dst, MIPS_R_T4, rel, jmp);
960 + if (finish_jmp(ctx, jmp, off) < 0)
964 + case BPF_JMP | BPF_JA:
967 + if (emit_ja(ctx, off) < 0)
971 + case BPF_JMP | BPF_TAIL_CALL:
972 + if (emit_tail_call(ctx) < 0)
975 + /* Function call */
976 + case BPF_JMP | BPF_CALL:
977 + if (emit_call(ctx, insn) < 0)
980 + /* Function return */
981 + case BPF_JMP | BPF_EXIT:
983 + * Optimization: when last instruction is EXIT
984 + * simply continue to epilogue.
986 + if (ctx->bpf_index == ctx->program->len - 1)
988 + if (emit_exit(ctx) < 0)
994 + pr_err_once("unknown opcode %02x\n", code);
997 + pr_info_once("*** NOT YET: opcode %02x ***\n", code);
1000 + pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
1001 + ctx->bpf_index, code);