793c8be24a71bf52040c90a63744331b68c19882
[openwrt/openwrt.git] / target / linux / generic / backport-5.4 / 070-v5.5-MIPS-BPF-Restore-MIPS32-cBPF-JIT.patch
1 From 36366e367ee93ced84fddb8fae6675e12985f5a4 Mon Sep 17 00:00:00 2001
2 From: Paul Burton <paulburton@kernel.org>
3 Date: Thu, 5 Dec 2019 10:23:18 -0800
4 Subject: [PATCH] MIPS: BPF: Restore MIPS32 cBPF JIT
5
6 Commit 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32
7 architecture.") enabled our eBPF JIT for MIPS32 kernels, whereas it has
8 previously only been availailable for MIPS64. It was my understanding at
9 the time that the BPF test suite was passing & JITing a comparable
10 number of tests to our cBPF JIT [1], but it turns out that was not the
11 case.
12
13 The eBPF JIT has a number of problems on MIPS32:
14
15 - Most notably various code paths still result in emission of MIPS64
16 instructions which will cause reserved instruction exceptions & kernel
17 panics when run on MIPS32 CPUs.
18
19 - The eBPF JIT doesn't account for differences between the O32 ABI used
20 by MIPS32 kernels versus the N64 ABI used by MIPS64 kernels. Notably
21 arguments beyond the first 4 are passed on the stack in O32, and this
22 is entirely unhandled when JITing a BPF_CALL instruction. Stack space
23 must be reserved for arguments even if they all fit in registers, and
24 the callee is free to assume that stack space has been reserved for
25 its use - with the eBPF JIT this is not the case, so calling any
26 function can result in clobbering values on the stack & unpredictable
27 behaviour. Function arguments in eBPF are always 64-bit values which
28 is also entirely unhandled - the JIT still uses a single (32-bit)
29 register per argument. As a result all function arguments are always
30 passed incorrectly when JITing a BPF_CALL instruction, leading to
31 kernel crashes or strange behavior.
32
33 - The JIT attempts to bail our on use of ALU64 instructions or 64-bit
34 memory access instructions. The code doing this at the start of
35 build_one_insn() incorrectly checks whether BPF_OP() equals BPF_DW,
36 when it should really be checking BPF_SIZE() & only doing so when
37 BPF_CLASS() is one of BPF_{LD,LDX,ST,STX}. This results in false
38 positives that cause more bailouts than intended, and that in turns
39 hides some of the problems described above.
40
41 - The kernel's cBPF->eBPF translation makes heavy use of 64-bit eBPF
42 instructions that the MIPS32 eBPF JIT bails out on, leading to most
43 cBPF programs not being JITed at all.
44
45 Until these problems are resolved, revert the removal of the cBPF JIT
46 performed by commit 716850ab104d ("MIPS: eBPF: Initial eBPF support for
47 MIPS32 architecture."). Together with commit f8fffebdea75 ("MIPS: BPF:
48 Disable MIPS32 eBPF JIT") this restores MIPS32 BPF JIT behavior back to
49 the same state it was prior to the introduction of the broken eBPF JIT
50 support.
51
52 [1] https://lore.kernel.org/linux-mips/MWHPR2201MB13583388481F01A422CE7D66D4410@MWHPR2201MB1358.namprd22.prod.outlook.com/
53
54 Signed-off-by: Paul Burton <paulburton@kernel.org>
55 Fixes: 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32 architecture.")
56 Cc: Daniel Borkmann <daniel@iogearbox.net>
57 Cc: Hassan Naveed <hnaveed@wavecomp.com>
58 Cc: Tony Ambardar <itugrok@yahoo.com>
59 Cc: bpf@vger.kernel.org
60 Cc: netdev@vger.kernel.org
61 Cc: linux-mips@vger.kernel.org
62 Cc: linux-kernel@vger.kernel.org
63 ---
64 arch/mips/Kconfig | 1 +
65 arch/mips/net/Makefile | 1 +
66 arch/mips/net/bpf_jit.c | 1270 +++++++++++++++++++++++++++++++++++
67 arch/mips/net/bpf_jit_asm.S | 285 ++++++++
68 4 files changed, 1557 insertions(+)
69 create mode 100644 arch/mips/net/bpf_jit.c
70 create mode 100644 arch/mips/net/bpf_jit_asm.S
71
72 diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
73 index e5c2d47608fe..33674cdc3aa8 100644
74 --- a/arch/mips/Kconfig
75 +++ b/arch/mips/Kconfig
76 @@ -46,6 +46,7 @@ config MIPS
77 select HAVE_ARCH_TRACEHOOK
78 select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
79 select HAVE_ASM_MODVERSIONS
80 + select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS
81 select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
82 select HAVE_CONTEXT_TRACKING
83 select HAVE_COPY_THREAD_TLS
84 diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile
85 index 2d03af7d6b19..d55912349039 100644
86 --- a/arch/mips/net/Makefile
87 +++ b/arch/mips/net/Makefile
88 @@ -1,4 +1,5 @@
89 # SPDX-License-Identifier: GPL-2.0-only
90 # MIPS networking code
91
92 +obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
93 obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
94 diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
95 new file mode 100644
96 index 000000000000..3a0e34f4e615
97 --- /dev/null
98 +++ b/arch/mips/net/bpf_jit.c
99 @@ -0,0 +1,1270 @@
100 +/*
101 + * Just-In-Time compiler for BPF filters on MIPS
102 + *
103 + * Copyright (c) 2014 Imagination Technologies Ltd.
104 + * Author: Markos Chandras <markos.chandras@imgtec.com>
105 + *
106 + * This program is free software; you can redistribute it and/or modify it
107 + * under the terms of the GNU General Public License as published by the
108 + * Free Software Foundation; version 2 of the License.
109 + */
110 +
111 +#include <linux/bitops.h>
112 +#include <linux/compiler.h>
113 +#include <linux/errno.h>
114 +#include <linux/filter.h>
115 +#include <linux/if_vlan.h>
116 +#include <linux/moduleloader.h>
117 +#include <linux/netdevice.h>
118 +#include <linux/string.h>
119 +#include <linux/slab.h>
120 +#include <linux/types.h>
121 +#include <asm/asm.h>
122 +#include <asm/bitops.h>
123 +#include <asm/cacheflush.h>
124 +#include <asm/cpu-features.h>
125 +#include <asm/uasm.h>
126 +
127 +#include "bpf_jit.h"
128 +
129 +/* ABI
130 + * r_skb_hl SKB header length
131 + * r_data SKB data pointer
132 + * r_off Offset
133 + * r_A BPF register A
134 + * r_X BPF register X
135 + * r_skb *skb
136 + * r_M *scratch memory
137 + * r_skb_len SKB length
138 + *
139 + * On entry (*bpf_func)(*skb, *filter)
140 + * a0 = MIPS_R_A0 = skb;
141 + * a1 = MIPS_R_A1 = filter;
142 + *
143 + * Stack
144 + * ...
145 + * M[15]
146 + * M[14]
147 + * M[13]
148 + * ...
149 + * M[0] <-- r_M
150 + * saved reg k-1
151 + * saved reg k-2
152 + * ...
153 + * saved reg 0 <-- r_sp
154 + * <no argument area>
155 + *
156 + * Packet layout
157 + *
158 + * <--------------------- len ------------------------>
159 + * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------>
160 + * ----------------------------------------------------
161 + * | skb->data |
162 + * ----------------------------------------------------
163 + */
164 +
165 +#define ptr typeof(unsigned long)
166 +
167 +#define SCRATCH_OFF(k) (4 * (k))
168 +
169 +/* JIT flags */
170 +#define SEEN_CALL (1 << BPF_MEMWORDS)
171 +#define SEEN_SREG_SFT (BPF_MEMWORDS + 1)
172 +#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT)
173 +#define SEEN_SREG(x) (SEEN_SREG_BASE << (x))
174 +#define SEEN_OFF SEEN_SREG(2)
175 +#define SEEN_A SEEN_SREG(3)
176 +#define SEEN_X SEEN_SREG(4)
177 +#define SEEN_SKB SEEN_SREG(5)
178 +#define SEEN_MEM SEEN_SREG(6)
179 +/* SEEN_SK_DATA also implies skb_hl an skb_len */
180 +#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0))
181 +
182 +/* Arguments used by JIT */
183 +#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */
184 +
185 +#define SBIT(x) (1 << (x)) /* Signed version of BIT() */
186 +
187 +/**
188 + * struct jit_ctx - JIT context
189 + * @skf: The sk_filter
190 + * @prologue_bytes: Number of bytes for prologue
191 + * @idx: Instruction index
192 + * @flags: JIT flags
193 + * @offsets: Instruction offsets
194 + * @target: Memory location for the compiled filter
195 + */
196 +struct jit_ctx {
197 + const struct bpf_prog *skf;
198 + unsigned int prologue_bytes;
199 + u32 idx;
200 + u32 flags;
201 + u32 *offsets;
202 + u32 *target;
203 +};
204 +
205 +
206 +static inline int optimize_div(u32 *k)
207 +{
208 + /* power of 2 divides can be implemented with right shift */
209 + if (!(*k & (*k-1))) {
210 + *k = ilog2(*k);
211 + return 1;
212 + }
213 +
214 + return 0;
215 +}
216 +
217 +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx);
218 +
219 +/* Simply emit the instruction if the JIT memory space has been allocated */
220 +#define emit_instr(ctx, func, ...) \
221 +do { \
222 + if ((ctx)->target != NULL) { \
223 + u32 *p = &(ctx)->target[ctx->idx]; \
224 + uasm_i_##func(&p, ##__VA_ARGS__); \
225 + } \
226 + (ctx)->idx++; \
227 +} while (0)
228 +
229 +/*
230 + * Similar to emit_instr but it must be used when we need to emit
231 + * 32-bit or 64-bit instructions
232 + */
233 +#define emit_long_instr(ctx, func, ...) \
234 +do { \
235 + if ((ctx)->target != NULL) { \
236 + u32 *p = &(ctx)->target[ctx->idx]; \
237 + UASM_i_##func(&p, ##__VA_ARGS__); \
238 + } \
239 + (ctx)->idx++; \
240 +} while (0)
241 +
242 +/* Determine if immediate is within the 16-bit signed range */
243 +static inline bool is_range16(s32 imm)
244 +{
245 + return !(imm >= SBIT(15) || imm < -SBIT(15));
246 +}
247 +
248 +static inline void emit_addu(unsigned int dst, unsigned int src1,
249 + unsigned int src2, struct jit_ctx *ctx)
250 +{
251 + emit_instr(ctx, addu, dst, src1, src2);
252 +}
253 +
254 +static inline void emit_nop(struct jit_ctx *ctx)
255 +{
256 + emit_instr(ctx, nop);
257 +}
258 +
259 +/* Load a u32 immediate to a register */
260 +static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
261 +{
262 + if (ctx->target != NULL) {
263 + /* addiu can only handle s16 */
264 + if (!is_range16(imm)) {
265 + u32 *p = &ctx->target[ctx->idx];
266 + uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
267 + p = &ctx->target[ctx->idx + 1];
268 + uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff);
269 + } else {
270 + u32 *p = &ctx->target[ctx->idx];
271 + uasm_i_addiu(&p, dst, r_zero, imm);
272 + }
273 + }
274 + ctx->idx++;
275 +
276 + if (!is_range16(imm))
277 + ctx->idx++;
278 +}
279 +
280 +static inline void emit_or(unsigned int dst, unsigned int src1,
281 + unsigned int src2, struct jit_ctx *ctx)
282 +{
283 + emit_instr(ctx, or, dst, src1, src2);
284 +}
285 +
286 +static inline void emit_ori(unsigned int dst, unsigned src, u32 imm,
287 + struct jit_ctx *ctx)
288 +{
289 + if (imm >= BIT(16)) {
290 + emit_load_imm(r_tmp, imm, ctx);
291 + emit_or(dst, src, r_tmp, ctx);
292 + } else {
293 + emit_instr(ctx, ori, dst, src, imm);
294 + }
295 +}
296 +
297 +static inline void emit_daddiu(unsigned int dst, unsigned int src,
298 + int imm, struct jit_ctx *ctx)
299 +{
300 + /*
301 + * Only used for stack, so the imm is relatively small
302 + * and it fits in 15-bits
303 + */
304 + emit_instr(ctx, daddiu, dst, src, imm);
305 +}
306 +
307 +static inline void emit_addiu(unsigned int dst, unsigned int src,
308 + u32 imm, struct jit_ctx *ctx)
309 +{
310 + if (!is_range16(imm)) {
311 + emit_load_imm(r_tmp, imm, ctx);
312 + emit_addu(dst, r_tmp, src, ctx);
313 + } else {
314 + emit_instr(ctx, addiu, dst, src, imm);
315 + }
316 +}
317 +
318 +static inline void emit_and(unsigned int dst, unsigned int src1,
319 + unsigned int src2, struct jit_ctx *ctx)
320 +{
321 + emit_instr(ctx, and, dst, src1, src2);
322 +}
323 +
324 +static inline void emit_andi(unsigned int dst, unsigned int src,
325 + u32 imm, struct jit_ctx *ctx)
326 +{
327 + /* If imm does not fit in u16 then load it to register */
328 + if (imm >= BIT(16)) {
329 + emit_load_imm(r_tmp, imm, ctx);
330 + emit_and(dst, src, r_tmp, ctx);
331 + } else {
332 + emit_instr(ctx, andi, dst, src, imm);
333 + }
334 +}
335 +
336 +static inline void emit_xor(unsigned int dst, unsigned int src1,
337 + unsigned int src2, struct jit_ctx *ctx)
338 +{
339 + emit_instr(ctx, xor, dst, src1, src2);
340 +}
341 +
342 +static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx)
343 +{
344 + /* If imm does not fit in u16 then load it to register */
345 + if (imm >= BIT(16)) {
346 + emit_load_imm(r_tmp, imm, ctx);
347 + emit_xor(dst, src, r_tmp, ctx);
348 + } else {
349 + emit_instr(ctx, xori, dst, src, imm);
350 + }
351 +}
352 +
353 +static inline void emit_stack_offset(int offset, struct jit_ctx *ctx)
354 +{
355 + emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset);
356 +}
357 +
358 +static inline void emit_subu(unsigned int dst, unsigned int src1,
359 + unsigned int src2, struct jit_ctx *ctx)
360 +{
361 + emit_instr(ctx, subu, dst, src1, src2);
362 +}
363 +
364 +static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx)
365 +{
366 + emit_subu(reg, r_zero, reg, ctx);
367 +}
368 +
369 +static inline void emit_sllv(unsigned int dst, unsigned int src,
370 + unsigned int sa, struct jit_ctx *ctx)
371 +{
372 + emit_instr(ctx, sllv, dst, src, sa);
373 +}
374 +
375 +static inline void emit_sll(unsigned int dst, unsigned int src,
376 + unsigned int sa, struct jit_ctx *ctx)
377 +{
378 + /* sa is 5-bits long */
379 + if (sa >= BIT(5))
380 + /* Shifting >= 32 results in zero */
381 + emit_jit_reg_move(dst, r_zero, ctx);
382 + else
383 + emit_instr(ctx, sll, dst, src, sa);
384 +}
385 +
386 +static inline void emit_srlv(unsigned int dst, unsigned int src,
387 + unsigned int sa, struct jit_ctx *ctx)
388 +{
389 + emit_instr(ctx, srlv, dst, src, sa);
390 +}
391 +
392 +static inline void emit_srl(unsigned int dst, unsigned int src,
393 + unsigned int sa, struct jit_ctx *ctx)
394 +{
395 + /* sa is 5-bits long */
396 + if (sa >= BIT(5))
397 + /* Shifting >= 32 results in zero */
398 + emit_jit_reg_move(dst, r_zero, ctx);
399 + else
400 + emit_instr(ctx, srl, dst, src, sa);
401 +}
402 +
403 +static inline void emit_slt(unsigned int dst, unsigned int src1,
404 + unsigned int src2, struct jit_ctx *ctx)
405 +{
406 + emit_instr(ctx, slt, dst, src1, src2);
407 +}
408 +
409 +static inline void emit_sltu(unsigned int dst, unsigned int src1,
410 + unsigned int src2, struct jit_ctx *ctx)
411 +{
412 + emit_instr(ctx, sltu, dst, src1, src2);
413 +}
414 +
415 +static inline void emit_sltiu(unsigned dst, unsigned int src,
416 + unsigned int imm, struct jit_ctx *ctx)
417 +{
418 + /* 16 bit immediate */
419 + if (!is_range16((s32)imm)) {
420 + emit_load_imm(r_tmp, imm, ctx);
421 + emit_sltu(dst, src, r_tmp, ctx);
422 + } else {
423 + emit_instr(ctx, sltiu, dst, src, imm);
424 + }
425 +
426 +}
427 +
428 +/* Store register on the stack */
429 +static inline void emit_store_stack_reg(ptr reg, ptr base,
430 + unsigned int offset,
431 + struct jit_ctx *ctx)
432 +{
433 + emit_long_instr(ctx, SW, reg, offset, base);
434 +}
435 +
436 +static inline void emit_store(ptr reg, ptr base, unsigned int offset,
437 + struct jit_ctx *ctx)
438 +{
439 + emit_instr(ctx, sw, reg, offset, base);
440 +}
441 +
442 +static inline void emit_load_stack_reg(ptr reg, ptr base,
443 + unsigned int offset,
444 + struct jit_ctx *ctx)
445 +{
446 + emit_long_instr(ctx, LW, reg, offset, base);
447 +}
448 +
449 +static inline void emit_load(unsigned int reg, unsigned int base,
450 + unsigned int offset, struct jit_ctx *ctx)
451 +{
452 + emit_instr(ctx, lw, reg, offset, base);
453 +}
454 +
455 +static inline void emit_load_byte(unsigned int reg, unsigned int base,
456 + unsigned int offset, struct jit_ctx *ctx)
457 +{
458 + emit_instr(ctx, lb, reg, offset, base);
459 +}
460 +
461 +static inline void emit_half_load(unsigned int reg, unsigned int base,
462 + unsigned int offset, struct jit_ctx *ctx)
463 +{
464 + emit_instr(ctx, lh, reg, offset, base);
465 +}
466 +
467 +static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base,
468 + unsigned int offset, struct jit_ctx *ctx)
469 +{
470 + emit_instr(ctx, lhu, reg, offset, base);
471 +}
472 +
473 +static inline void emit_mul(unsigned int dst, unsigned int src1,
474 + unsigned int src2, struct jit_ctx *ctx)
475 +{
476 + emit_instr(ctx, mul, dst, src1, src2);
477 +}
478 +
479 +static inline void emit_div(unsigned int dst, unsigned int src,
480 + struct jit_ctx *ctx)
481 +{
482 + if (ctx->target != NULL) {
483 + u32 *p = &ctx->target[ctx->idx];
484 + uasm_i_divu(&p, dst, src);
485 + p = &ctx->target[ctx->idx + 1];
486 + uasm_i_mflo(&p, dst);
487 + }
488 + ctx->idx += 2; /* 2 insts */
489 +}
490 +
491 +static inline void emit_mod(unsigned int dst, unsigned int src,
492 + struct jit_ctx *ctx)
493 +{
494 + if (ctx->target != NULL) {
495 + u32 *p = &ctx->target[ctx->idx];
496 + uasm_i_divu(&p, dst, src);
497 + p = &ctx->target[ctx->idx + 1];
498 + uasm_i_mfhi(&p, dst);
499 + }
500 + ctx->idx += 2; /* 2 insts */
501 +}
502 +
503 +static inline void emit_dsll(unsigned int dst, unsigned int src,
504 + unsigned int sa, struct jit_ctx *ctx)
505 +{
506 + emit_instr(ctx, dsll, dst, src, sa);
507 +}
508 +
509 +static inline void emit_dsrl32(unsigned int dst, unsigned int src,
510 + unsigned int sa, struct jit_ctx *ctx)
511 +{
512 + emit_instr(ctx, dsrl32, dst, src, sa);
513 +}
514 +
515 +static inline void emit_wsbh(unsigned int dst, unsigned int src,
516 + struct jit_ctx *ctx)
517 +{
518 + emit_instr(ctx, wsbh, dst, src);
519 +}
520 +
521 +/* load pointer to register */
522 +static inline void emit_load_ptr(unsigned int dst, unsigned int src,
523 + int imm, struct jit_ctx *ctx)
524 +{
525 + /* src contains the base addr of the 32/64-pointer */
526 + emit_long_instr(ctx, LW, dst, imm, src);
527 +}
528 +
529 +/* load a function pointer to register */
530 +static inline void emit_load_func(unsigned int reg, ptr imm,
531 + struct jit_ctx *ctx)
532 +{
533 + if (IS_ENABLED(CONFIG_64BIT)) {
534 + /* At this point imm is always 64-bit */
535 + emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
536 + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
537 + emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx);
538 + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
539 + emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx);
540 + } else {
541 + emit_load_imm(reg, imm, ctx);
542 + }
543 +}
544 +
545 +/* Move to real MIPS register */
546 +static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
547 +{
548 + emit_long_instr(ctx, ADDU, dst, src, r_zero);
549 +}
550 +
551 +/* Move to JIT (32-bit) register */
552 +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
553 +{
554 + emit_addu(dst, src, r_zero, ctx);
555 +}
556 +
557 +/* Compute the immediate value for PC-relative branches. */
558 +static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
559 +{
560 + if (ctx->target == NULL)
561 + return 0;
562 +
563 + /*
564 + * We want a pc-relative branch. We only do forward branches
565 + * so tgt is always after pc. tgt is the instruction offset
566 + * we want to jump to.
567 +
568 + * Branch on MIPS:
569 + * I: target_offset <- sign_extend(offset)
570 + * I+1: PC += target_offset (delay slot)
571 + *
572 + * ctx->idx currently points to the branch instruction
573 + * but the offset is added to the delay slot so we need
574 + * to subtract 4.
575 + */
576 + return ctx->offsets[tgt] -
577 + (ctx->idx * 4 - ctx->prologue_bytes) - 4;
578 +}
579 +
580 +static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2,
581 + unsigned int imm, struct jit_ctx *ctx)
582 +{
583 + if (ctx->target != NULL) {
584 + u32 *p = &ctx->target[ctx->idx];
585 +
586 + switch (cond) {
587 + case MIPS_COND_EQ:
588 + uasm_i_beq(&p, reg1, reg2, imm);
589 + break;
590 + case MIPS_COND_NE:
591 + uasm_i_bne(&p, reg1, reg2, imm);
592 + break;
593 + case MIPS_COND_ALL:
594 + uasm_i_b(&p, imm);
595 + break;
596 + default:
597 + pr_warn("%s: Unhandled branch conditional: %d\n",
598 + __func__, cond);
599 + }
600 + }
601 + ctx->idx++;
602 +}
603 +
604 +static inline void emit_b(unsigned int imm, struct jit_ctx *ctx)
605 +{
606 + emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx);
607 +}
608 +
609 +static inline void emit_jalr(unsigned int link, unsigned int reg,
610 + struct jit_ctx *ctx)
611 +{
612 + emit_instr(ctx, jalr, link, reg);
613 +}
614 +
615 +static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
616 +{
617 + emit_instr(ctx, jr, reg);
618 +}
619 +
620 +static inline u16 align_sp(unsigned int num)
621 +{
622 + /* Double word alignment for 32-bit, quadword for 64-bit */
623 + unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8;
624 + num = (num + (align - 1)) & -align;
625 + return num;
626 +}
627 +
628 +static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
629 +{
630 + int i = 0, real_off = 0;
631 + u32 sflags, tmp_flags;
632 +
633 + /* Adjust the stack pointer */
634 + if (offset)
635 + emit_stack_offset(-align_sp(offset), ctx);
636 +
637 + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
638 + /* sflags is essentially a bitmap */
639 + while (tmp_flags) {
640 + if ((sflags >> i) & 0x1) {
641 + emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
642 + ctx);
643 + real_off += SZREG;
644 + }
645 + i++;
646 + tmp_flags >>= 1;
647 + }
648 +
649 + /* save return address */
650 + if (ctx->flags & SEEN_CALL) {
651 + emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
652 + real_off += SZREG;
653 + }
654 +
655 + /* Setup r_M leaving the alignment gap if necessary */
656 + if (ctx->flags & SEEN_MEM) {
657 + if (real_off % (SZREG * 2))
658 + real_off += SZREG;
659 + emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off);
660 + }
661 +}
662 +
663 +static void restore_bpf_jit_regs(struct jit_ctx *ctx,
664 + unsigned int offset)
665 +{
666 + int i, real_off = 0;
667 + u32 sflags, tmp_flags;
668 +
669 + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
670 + /* sflags is a bitmap */
671 + i = 0;
672 + while (tmp_flags) {
673 + if ((sflags >> i) & 0x1) {
674 + emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
675 + ctx);
676 + real_off += SZREG;
677 + }
678 + i++;
679 + tmp_flags >>= 1;
680 + }
681 +
682 + /* restore return address */
683 + if (ctx->flags & SEEN_CALL)
684 + emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
685 +
686 + /* Restore the sp and discard the scrach memory */
687 + if (offset)
688 + emit_stack_offset(align_sp(offset), ctx);
689 +}
690 +
691 +static unsigned int get_stack_depth(struct jit_ctx *ctx)
692 +{
693 + int sp_off = 0;
694 +
695 +
696 + /* How may s* regs do we need to preserved? */
697 + sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG;
698 +
699 + if (ctx->flags & SEEN_MEM)
700 + sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
701 +
702 + if (ctx->flags & SEEN_CALL)
703 + sp_off += SZREG; /* Space for our ra register */
704 +
705 + return sp_off;
706 +}
707 +
708 +static void build_prologue(struct jit_ctx *ctx)
709 +{
710 + int sp_off;
711 +
712 + /* Calculate the total offset for the stack pointer */
713 + sp_off = get_stack_depth(ctx);
714 + save_bpf_jit_regs(ctx, sp_off);
715 +
716 + if (ctx->flags & SEEN_SKB)
717 + emit_reg_move(r_skb, MIPS_R_A0, ctx);
718 +
719 + if (ctx->flags & SEEN_SKB_DATA) {
720 + /* Load packet length */
721 + emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len),
722 + ctx);
723 + emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len),
724 + ctx);
725 + /* Load the data pointer */
726 + emit_load_ptr(r_skb_data, r_skb,
727 + offsetof(struct sk_buff, data), ctx);
728 + /* Load the header length */
729 + emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx);
730 + }
731 +
732 + if (ctx->flags & SEEN_X)
733 + emit_jit_reg_move(r_X, r_zero, ctx);
734 +
735 + /*
736 + * Do not leak kernel data to userspace, we only need to clear
737 + * r_A if it is ever used. In fact if it is never used, we
738 + * will not save/restore it, so clearing it in this case would
739 + * corrupt the state of the caller.
740 + */
741 + if (bpf_needs_clear_a(&ctx->skf->insns[0]) &&
742 + (ctx->flags & SEEN_A))
743 + emit_jit_reg_move(r_A, r_zero, ctx);
744 +}
745 +
746 +static void build_epilogue(struct jit_ctx *ctx)
747 +{
748 + unsigned int sp_off;
749 +
750 + /* Calculate the total offset for the stack pointer */
751 +
752 + sp_off = get_stack_depth(ctx);
753 + restore_bpf_jit_regs(ctx, sp_off);
754 +
755 + /* Return */
756 + emit_jr(r_ra, ctx);
757 + emit_nop(ctx);
758 +}
759 +
760 +#define CHOOSE_LOAD_FUNC(K, func) \
761 + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
762 + func##_positive)
763 +
764 +static int build_body(struct jit_ctx *ctx)
765 +{
766 + const struct bpf_prog *prog = ctx->skf;
767 + const struct sock_filter *inst;
768 + unsigned int i, off, condt;
769 + u32 k, b_off __maybe_unused;
770 + u8 (*sk_load_func)(unsigned long *skb, int offset);
771 +
772 + for (i = 0; i < prog->len; i++) {
773 + u16 code;
774 +
775 + inst = &(prog->insns[i]);
776 + pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
777 + __func__, inst->code, inst->jt, inst->jf, inst->k);
778 + k = inst->k;
779 + code = bpf_anc_helper(inst);
780 +
781 + if (ctx->target == NULL)
782 + ctx->offsets[i] = ctx->idx * 4;
783 +
784 + switch (code) {
785 + case BPF_LD | BPF_IMM:
786 + /* A <- k ==> li r_A, k */
787 + ctx->flags |= SEEN_A;
788 + emit_load_imm(r_A, k, ctx);
789 + break;
790 + case BPF_LD | BPF_W | BPF_LEN:
791 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
792 + /* A <- len ==> lw r_A, offset(skb) */
793 + ctx->flags |= SEEN_SKB | SEEN_A;
794 + off = offsetof(struct sk_buff, len);
795 + emit_load(r_A, r_skb, off, ctx);
796 + break;
797 + case BPF_LD | BPF_MEM:
798 + /* A <- M[k] ==> lw r_A, offset(M) */
799 + ctx->flags |= SEEN_MEM | SEEN_A;
800 + emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
801 + break;
802 + case BPF_LD | BPF_W | BPF_ABS:
803 + /* A <- P[k:4] */
804 + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word);
805 + goto load;
806 + case BPF_LD | BPF_H | BPF_ABS:
807 + /* A <- P[k:2] */
808 + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half);
809 + goto load;
810 + case BPF_LD | BPF_B | BPF_ABS:
811 + /* A <- P[k:1] */
812 + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte);
813 +load:
814 + emit_load_imm(r_off, k, ctx);
815 +load_common:
816 + ctx->flags |= SEEN_CALL | SEEN_OFF |
817 + SEEN_SKB | SEEN_A | SEEN_SKB_DATA;
818 +
819 + emit_load_func(r_s0, (ptr)sk_load_func, ctx);
820 + emit_reg_move(MIPS_R_A0, r_skb, ctx);
821 + emit_jalr(MIPS_R_RA, r_s0, ctx);
822 + /* Load second argument to delay slot */
823 + emit_reg_move(MIPS_R_A1, r_off, ctx);
824 + /* Check the error value */
825 + emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx),
826 + ctx);
827 + /* Load return register on DS for failures */
828 + emit_reg_move(r_ret, r_zero, ctx);
829 + /* Return with error */
830 + emit_b(b_imm(prog->len, ctx), ctx);
831 + emit_nop(ctx);
832 + break;
833 + case BPF_LD | BPF_W | BPF_IND:
834 + /* A <- P[X + k:4] */
835 + sk_load_func = sk_load_word;
836 + goto load_ind;
837 + case BPF_LD | BPF_H | BPF_IND:
838 + /* A <- P[X + k:2] */
839 + sk_load_func = sk_load_half;
840 + goto load_ind;
841 + case BPF_LD | BPF_B | BPF_IND:
842 + /* A <- P[X + k:1] */
843 + sk_load_func = sk_load_byte;
844 +load_ind:
845 + ctx->flags |= SEEN_OFF | SEEN_X;
846 + emit_addiu(r_off, r_X, k, ctx);
847 + goto load_common;
848 + case BPF_LDX | BPF_IMM:
849 + /* X <- k */
850 + ctx->flags |= SEEN_X;
851 + emit_load_imm(r_X, k, ctx);
852 + break;
853 + case BPF_LDX | BPF_MEM:
854 + /* X <- M[k] */
855 + ctx->flags |= SEEN_X | SEEN_MEM;
856 + emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
857 + break;
858 + case BPF_LDX | BPF_W | BPF_LEN:
859 + /* X <- len */
860 + ctx->flags |= SEEN_X | SEEN_SKB;
861 + off = offsetof(struct sk_buff, len);
862 + emit_load(r_X, r_skb, off, ctx);
863 + break;
864 + case BPF_LDX | BPF_B | BPF_MSH:
865 + /* X <- 4 * (P[k:1] & 0xf) */
866 + ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB;
867 + /* Load offset to a1 */
868 + emit_load_func(r_s0, (ptr)sk_load_byte, ctx);
869 + /*
870 + * This may emit two instructions so it may not fit
871 + * in the delay slot. So use a0 in the delay slot.
872 + */
873 + emit_load_imm(MIPS_R_A1, k, ctx);
874 + emit_jalr(MIPS_R_RA, r_s0, ctx);
875 + emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
876 + /* Check the error value */
877 + emit_bcond(MIPS_COND_NE, r_ret, 0,
878 + b_imm(prog->len, ctx), ctx);
879 + emit_reg_move(r_ret, r_zero, ctx);
880 + /* We are good */
881 + /* X <- P[1:K] & 0xf */
882 + emit_andi(r_X, r_A, 0xf, ctx);
883 + /* X << 2 */
884 + emit_b(b_imm(i + 1, ctx), ctx);
885 + emit_sll(r_X, r_X, 2, ctx); /* delay slot */
886 + break;
887 + case BPF_ST:
888 + /* M[k] <- A */
889 + ctx->flags |= SEEN_MEM | SEEN_A;
890 + emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
891 + break;
892 + case BPF_STX:
893 + /* M[k] <- X */
894 + ctx->flags |= SEEN_MEM | SEEN_X;
895 + emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
896 + break;
897 + case BPF_ALU | BPF_ADD | BPF_K:
898 + /* A += K */
899 + ctx->flags |= SEEN_A;
900 + emit_addiu(r_A, r_A, k, ctx);
901 + break;
902 + case BPF_ALU | BPF_ADD | BPF_X:
903 + /* A += X */
904 + ctx->flags |= SEEN_A | SEEN_X;
905 + emit_addu(r_A, r_A, r_X, ctx);
906 + break;
907 + case BPF_ALU | BPF_SUB | BPF_K:
908 + /* A -= K */
909 + ctx->flags |= SEEN_A;
910 + emit_addiu(r_A, r_A, -k, ctx);
911 + break;
912 + case BPF_ALU | BPF_SUB | BPF_X:
913 + /* A -= X */
914 + ctx->flags |= SEEN_A | SEEN_X;
915 + emit_subu(r_A, r_A, r_X, ctx);
916 + break;
917 + case BPF_ALU | BPF_MUL | BPF_K:
918 + /* A *= K */
919 + /* Load K to scratch register before MUL */
920 + ctx->flags |= SEEN_A;
921 + emit_load_imm(r_s0, k, ctx);
922 + emit_mul(r_A, r_A, r_s0, ctx);
923 + break;
924 + case BPF_ALU | BPF_MUL | BPF_X:
925 + /* A *= X */
926 + ctx->flags |= SEEN_A | SEEN_X;
927 + emit_mul(r_A, r_A, r_X, ctx);
928 + break;
929 + case BPF_ALU | BPF_DIV | BPF_K:
930 + /* A /= k */
931 + if (k == 1)
932 + break;
933 + if (optimize_div(&k)) {
934 + ctx->flags |= SEEN_A;
935 + emit_srl(r_A, r_A, k, ctx);
936 + break;
937 + }
938 + ctx->flags |= SEEN_A;
939 + emit_load_imm(r_s0, k, ctx);
940 + emit_div(r_A, r_s0, ctx);
941 + break;
942 + case BPF_ALU | BPF_MOD | BPF_K:
943 + /* A %= k */
944 + if (k == 1) {
945 + ctx->flags |= SEEN_A;
946 + emit_jit_reg_move(r_A, r_zero, ctx);
947 + } else {
948 + ctx->flags |= SEEN_A;
949 + emit_load_imm(r_s0, k, ctx);
950 + emit_mod(r_A, r_s0, ctx);
951 + }
952 + break;
953 + case BPF_ALU | BPF_DIV | BPF_X:
954 + /* A /= X */
955 + ctx->flags |= SEEN_X | SEEN_A;
956 + /* Check if r_X is zero */
957 + emit_bcond(MIPS_COND_EQ, r_X, r_zero,
958 + b_imm(prog->len, ctx), ctx);
959 + emit_load_imm(r_ret, 0, ctx); /* delay slot */
960 + emit_div(r_A, r_X, ctx);
961 + break;
962 + case BPF_ALU | BPF_MOD | BPF_X:
963 + /* A %= X */
964 + ctx->flags |= SEEN_X | SEEN_A;
965 + /* Check if r_X is zero */
966 + emit_bcond(MIPS_COND_EQ, r_X, r_zero,
967 + b_imm(prog->len, ctx), ctx);
968 + emit_load_imm(r_ret, 0, ctx); /* delay slot */
969 + emit_mod(r_A, r_X, ctx);
970 + break;
971 + case BPF_ALU | BPF_OR | BPF_K:
972 + /* A |= K */
973 + ctx->flags |= SEEN_A;
974 + emit_ori(r_A, r_A, k, ctx);
975 + break;
976 + case BPF_ALU | BPF_OR | BPF_X:
977 + /* A |= X */
978 + ctx->flags |= SEEN_A;
979 + emit_ori(r_A, r_A, r_X, ctx);
980 + break;
981 + case BPF_ALU | BPF_XOR | BPF_K:
982 + /* A ^= k */
983 + ctx->flags |= SEEN_A;
984 + emit_xori(r_A, r_A, k, ctx);
985 + break;
986 + case BPF_ANC | SKF_AD_ALU_XOR_X:
987 + case BPF_ALU | BPF_XOR | BPF_X:
988 + /* A ^= X */
989 + ctx->flags |= SEEN_A;
990 + emit_xor(r_A, r_A, r_X, ctx);
991 + break;
992 + case BPF_ALU | BPF_AND | BPF_K:
993 + /* A &= K */
994 + ctx->flags |= SEEN_A;
995 + emit_andi(r_A, r_A, k, ctx);
996 + break;
997 + case BPF_ALU | BPF_AND | BPF_X:
998 + /* A &= X */
999 + ctx->flags |= SEEN_A | SEEN_X;
1000 + emit_and(r_A, r_A, r_X, ctx);
1001 + break;
1002 + case BPF_ALU | BPF_LSH | BPF_K:
1003 + /* A <<= K */
1004 + ctx->flags |= SEEN_A;
1005 + emit_sll(r_A, r_A, k, ctx);
1006 + break;
1007 + case BPF_ALU | BPF_LSH | BPF_X:
1008 + /* A <<= X */
1009 + ctx->flags |= SEEN_A | SEEN_X;
1010 + emit_sllv(r_A, r_A, r_X, ctx);
1011 + break;
1012 + case BPF_ALU | BPF_RSH | BPF_K:
1013 + /* A >>= K */
1014 + ctx->flags |= SEEN_A;
1015 + emit_srl(r_A, r_A, k, ctx);
1016 + break;
1017 + case BPF_ALU | BPF_RSH | BPF_X:
1018 + ctx->flags |= SEEN_A | SEEN_X;
1019 + emit_srlv(r_A, r_A, r_X, ctx);
1020 + break;
1021 + case BPF_ALU | BPF_NEG:
1022 + /* A = -A */
1023 + ctx->flags |= SEEN_A;
1024 + emit_neg(r_A, ctx);
1025 + break;
1026 + case BPF_JMP | BPF_JA:
1027 + /* pc += K */
1028 + emit_b(b_imm(i + k + 1, ctx), ctx);
1029 + emit_nop(ctx);
1030 + break;
1031 + case BPF_JMP | BPF_JEQ | BPF_K:
1032 + /* pc += ( A == K ) ? pc->jt : pc->jf */
1033 + condt = MIPS_COND_EQ | MIPS_COND_K;
1034 + goto jmp_cmp;
1035 + case BPF_JMP | BPF_JEQ | BPF_X:
1036 + ctx->flags |= SEEN_X;
1037 + /* pc += ( A == X ) ? pc->jt : pc->jf */
1038 + condt = MIPS_COND_EQ | MIPS_COND_X;
1039 + goto jmp_cmp;
1040 + case BPF_JMP | BPF_JGE | BPF_K:
1041 + /* pc += ( A >= K ) ? pc->jt : pc->jf */
1042 + condt = MIPS_COND_GE | MIPS_COND_K;
1043 + goto jmp_cmp;
1044 + case BPF_JMP | BPF_JGE | BPF_X:
1045 + ctx->flags |= SEEN_X;
1046 + /* pc += ( A >= X ) ? pc->jt : pc->jf */
1047 + condt = MIPS_COND_GE | MIPS_COND_X;
1048 + goto jmp_cmp;
1049 + case BPF_JMP | BPF_JGT | BPF_K:
1050 + /* pc += ( A > K ) ? pc->jt : pc->jf */
1051 + condt = MIPS_COND_GT | MIPS_COND_K;
1052 + goto jmp_cmp;
1053 + case BPF_JMP | BPF_JGT | BPF_X:
1054 + ctx->flags |= SEEN_X;
1055 + /* pc += ( A > X ) ? pc->jt : pc->jf */
1056 + condt = MIPS_COND_GT | MIPS_COND_X;
1057 +jmp_cmp:
1058 + /* Greater or Equal */
1059 + if ((condt & MIPS_COND_GE) ||
1060 + (condt & MIPS_COND_GT)) {
1061 + if (condt & MIPS_COND_K) { /* K */
1062 + ctx->flags |= SEEN_A;
1063 + emit_sltiu(r_s0, r_A, k, ctx);
1064 + } else { /* X */
1065 + ctx->flags |= SEEN_A |
1066 + SEEN_X;
1067 + emit_sltu(r_s0, r_A, r_X, ctx);
1068 + }
1069 + /* A < (K|X) ? r_scrach = 1 */
1070 + b_off = b_imm(i + inst->jf + 1, ctx);
1071 + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off,
1072 + ctx);
1073 + emit_nop(ctx);
1074 + /* A > (K|X) ? scratch = 0 */
1075 + if (condt & MIPS_COND_GT) {
1076 + /* Checking for equality */
1077 + ctx->flags |= SEEN_A | SEEN_X;
1078 + if (condt & MIPS_COND_K)
1079 + emit_load_imm(r_s0, k, ctx);
1080 + else
1081 + emit_jit_reg_move(r_s0, r_X,
1082 + ctx);
1083 + b_off = b_imm(i + inst->jf + 1, ctx);
1084 + emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1085 + b_off, ctx);
1086 + emit_nop(ctx);
1087 + /* Finally, A > K|X */
1088 + b_off = b_imm(i + inst->jt + 1, ctx);
1089 + emit_b(b_off, ctx);
1090 + emit_nop(ctx);
1091 + } else {
1092 + /* A >= (K|X) so jump */
1093 + b_off = b_imm(i + inst->jt + 1, ctx);
1094 + emit_b(b_off, ctx);
1095 + emit_nop(ctx);
1096 + }
1097 + } else {
1098 + /* A == K|X */
1099 + if (condt & MIPS_COND_K) { /* K */
1100 + ctx->flags |= SEEN_A;
1101 + emit_load_imm(r_s0, k, ctx);
1102 + /* jump true */
1103 + b_off = b_imm(i + inst->jt + 1, ctx);
1104 + emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1105 + b_off, ctx);
1106 + emit_nop(ctx);
1107 + /* jump false */
1108 + b_off = b_imm(i + inst->jf + 1,
1109 + ctx);
1110 + emit_bcond(MIPS_COND_NE, r_A, r_s0,
1111 + b_off, ctx);
1112 + emit_nop(ctx);
1113 + } else { /* X */
1114 + /* jump true */
1115 + ctx->flags |= SEEN_A | SEEN_X;
1116 + b_off = b_imm(i + inst->jt + 1,
1117 + ctx);
1118 + emit_bcond(MIPS_COND_EQ, r_A, r_X,
1119 + b_off, ctx);
1120 + emit_nop(ctx);
1121 + /* jump false */
1122 + b_off = b_imm(i + inst->jf + 1, ctx);
1123 + emit_bcond(MIPS_COND_NE, r_A, r_X,
1124 + b_off, ctx);
1125 + emit_nop(ctx);
1126 + }
1127 + }
1128 + break;
1129 + case BPF_JMP | BPF_JSET | BPF_K:
1130 + ctx->flags |= SEEN_A;
1131 + /* pc += (A & K) ? pc -> jt : pc -> jf */
1132 + emit_load_imm(r_s1, k, ctx);
1133 + emit_and(r_s0, r_A, r_s1, ctx);
1134 + /* jump true */
1135 + b_off = b_imm(i + inst->jt + 1, ctx);
1136 + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1137 + emit_nop(ctx);
1138 + /* jump false */
1139 + b_off = b_imm(i + inst->jf + 1, ctx);
1140 + emit_b(b_off, ctx);
1141 + emit_nop(ctx);
1142 + break;
1143 + case BPF_JMP | BPF_JSET | BPF_X:
1144 + ctx->flags |= SEEN_X | SEEN_A;
1145 + /* pc += (A & X) ? pc -> jt : pc -> jf */
1146 + emit_and(r_s0, r_A, r_X, ctx);
1147 + /* jump true */
1148 + b_off = b_imm(i + inst->jt + 1, ctx);
1149 + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1150 + emit_nop(ctx);
1151 + /* jump false */
1152 + b_off = b_imm(i + inst->jf + 1, ctx);
1153 + emit_b(b_off, ctx);
1154 + emit_nop(ctx);
1155 + break;
1156 + case BPF_RET | BPF_A:
1157 + ctx->flags |= SEEN_A;
1158 + if (i != prog->len - 1)
1159 + /*
1160 + * If this is not the last instruction
1161 + * then jump to the epilogue
1162 + */
1163 + emit_b(b_imm(prog->len, ctx), ctx);
1164 + emit_reg_move(r_ret, r_A, ctx); /* delay slot */
1165 + break;
1166 + case BPF_RET | BPF_K:
1167 + /*
1168 + * It can emit two instructions so it does not fit on
1169 + * the delay slot.
1170 + */
1171 + emit_load_imm(r_ret, k, ctx);
1172 + if (i != prog->len - 1) {
1173 + /*
1174 + * If this is not the last instruction
1175 + * then jump to the epilogue
1176 + */
1177 + emit_b(b_imm(prog->len, ctx), ctx);
1178 + emit_nop(ctx);
1179 + }
1180 + break;
1181 + case BPF_MISC | BPF_TAX:
1182 + /* X = A */
1183 + ctx->flags |= SEEN_X | SEEN_A;
1184 + emit_jit_reg_move(r_X, r_A, ctx);
1185 + break;
1186 + case BPF_MISC | BPF_TXA:
1187 + /* A = X */
1188 + ctx->flags |= SEEN_A | SEEN_X;
1189 + emit_jit_reg_move(r_A, r_X, ctx);
1190 + break;
1191 + /* AUX */
1192 + case BPF_ANC | SKF_AD_PROTOCOL:
1193 + /* A = ntohs(skb->protocol */
1194 + ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
1195 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1196 + protocol) != 2);
1197 + off = offsetof(struct sk_buff, protocol);
1198 + emit_half_load(r_A, r_skb, off, ctx);
1199 +#ifdef CONFIG_CPU_LITTLE_ENDIAN
1200 + /* This needs little endian fixup */
1201 + if (cpu_has_wsbh) {
1202 + /* R2 and later have the wsbh instruction */
1203 + emit_wsbh(r_A, r_A, ctx);
1204 + } else {
1205 + /* Get first byte */
1206 + emit_andi(r_tmp_imm, r_A, 0xff, ctx);
1207 + /* Shift it */
1208 + emit_sll(r_tmp, r_tmp_imm, 8, ctx);
1209 + /* Get second byte */
1210 + emit_srl(r_tmp_imm, r_A, 8, ctx);
1211 + emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx);
1212 + /* Put everyting together in r_A */
1213 + emit_or(r_A, r_tmp, r_tmp_imm, ctx);
1214 + }
1215 +#endif
1216 + break;
1217 + case BPF_ANC | SKF_AD_CPU:
1218 + ctx->flags |= SEEN_A | SEEN_OFF;
1219 + /* A = current_thread_info()->cpu */
1220 + BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info,
1221 + cpu) != 4);
1222 + off = offsetof(struct thread_info, cpu);
1223 + /* $28/gp points to the thread_info struct */
1224 + emit_load(r_A, 28, off, ctx);
1225 + break;
1226 + case BPF_ANC | SKF_AD_IFINDEX:
1227 + /* A = skb->dev->ifindex */
1228 + case BPF_ANC | SKF_AD_HATYPE:
1229 + /* A = skb->dev->type */
1230 + ctx->flags |= SEEN_SKB | SEEN_A;
1231 + off = offsetof(struct sk_buff, dev);
1232 + /* Load *dev pointer */
1233 + emit_load_ptr(r_s0, r_skb, off, ctx);
1234 + /* error (0) in the delay slot */
1235 + emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
1236 + b_imm(prog->len, ctx), ctx);
1237 + emit_reg_move(r_ret, r_zero, ctx);
1238 + if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
1239 + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
1240 + off = offsetof(struct net_device, ifindex);
1241 + emit_load(r_A, r_s0, off, ctx);
1242 + } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */
1243 + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
1244 + off = offsetof(struct net_device, type);
1245 + emit_half_load_unsigned(r_A, r_s0, off, ctx);
1246 + }
1247 + break;
1248 + case BPF_ANC | SKF_AD_MARK:
1249 + ctx->flags |= SEEN_SKB | SEEN_A;
1250 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
1251 + off = offsetof(struct sk_buff, mark);
1252 + emit_load(r_A, r_skb, off, ctx);
1253 + break;
1254 + case BPF_ANC | SKF_AD_RXHASH:
1255 + ctx->flags |= SEEN_SKB | SEEN_A;
1256 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
1257 + off = offsetof(struct sk_buff, hash);
1258 + emit_load(r_A, r_skb, off, ctx);
1259 + break;
1260 + case BPF_ANC | SKF_AD_VLAN_TAG:
1261 + ctx->flags |= SEEN_SKB | SEEN_A;
1262 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1263 + vlan_tci) != 2);
1264 + off = offsetof(struct sk_buff, vlan_tci);
1265 + emit_half_load_unsigned(r_A, r_skb, off, ctx);
1266 + break;
1267 + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
1268 + ctx->flags |= SEEN_SKB | SEEN_A;
1269 + emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx);
1270 + if (PKT_VLAN_PRESENT_BIT)
1271 + emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx);
1272 + if (PKT_VLAN_PRESENT_BIT < 7)
1273 + emit_andi(r_A, r_A, 1, ctx);
1274 + break;
1275 + case BPF_ANC | SKF_AD_PKTTYPE:
1276 + ctx->flags |= SEEN_SKB;
1277 +
1278 + emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
1279 + /* Keep only the last 3 bits */
1280 + emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
1281 +#ifdef __BIG_ENDIAN_BITFIELD
1282 + /* Get the actual packet type to the lower 3 bits */
1283 + emit_srl(r_A, r_A, 5, ctx);
1284 +#endif
1285 + break;
1286 + case BPF_ANC | SKF_AD_QUEUE:
1287 + ctx->flags |= SEEN_SKB | SEEN_A;
1288 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1289 + queue_mapping) != 2);
1290 + BUILD_BUG_ON(offsetof(struct sk_buff,
1291 + queue_mapping) > 0xff);
1292 + off = offsetof(struct sk_buff, queue_mapping);
1293 + emit_half_load_unsigned(r_A, r_skb, off, ctx);
1294 + break;
1295 + default:
1296 + pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
1297 + inst->code);
1298 + return -1;
1299 + }
1300 + }
1301 +
1302 + /* compute offsets only during the first pass */
1303 + if (ctx->target == NULL)
1304 + ctx->offsets[i] = ctx->idx * 4;
1305 +
1306 + return 0;
1307 +}
1308 +
1309 +void bpf_jit_compile(struct bpf_prog *fp)
1310 +{
1311 + struct jit_ctx ctx;
1312 + unsigned int alloc_size, tmp_idx;
1313 +
1314 + if (!bpf_jit_enable)
1315 + return;
1316 +
1317 + memset(&ctx, 0, sizeof(ctx));
1318 +
1319 + ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL);
1320 + if (ctx.offsets == NULL)
1321 + return;
1322 +
1323 + ctx.skf = fp;
1324 +
1325 + if (build_body(&ctx))
1326 + goto out;
1327 +
1328 + tmp_idx = ctx.idx;
1329 + build_prologue(&ctx);
1330 + ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
1331 + /* just to complete the ctx.idx count */
1332 + build_epilogue(&ctx);
1333 +
1334 + alloc_size = 4 * ctx.idx;
1335 + ctx.target = module_alloc(alloc_size);
1336 + if (ctx.target == NULL)
1337 + goto out;
1338 +
1339 + /* Clean it */
1340 + memset(ctx.target, 0, alloc_size);
1341 +
1342 + ctx.idx = 0;
1343 +
1344 + /* Generate the actual JIT code */
1345 + build_prologue(&ctx);
1346 + build_body(&ctx);
1347 + build_epilogue(&ctx);
1348 +
1349 + /* Update the icache */
1350 + flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx));
1351 +
1352 + if (bpf_jit_enable > 1)
1353 + /* Dump JIT code */
1354 + bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
1355 +
1356 + fp->bpf_func = (void *)ctx.target;
1357 + fp->jited = 1;
1358 +
1359 +out:
1360 + kfree(ctx.offsets);
1361 +}
1362 +
1363 +void bpf_jit_free(struct bpf_prog *fp)
1364 +{
1365 + if (fp->jited)
1366 + module_memfree(fp->bpf_func);
1367 +
1368 + bpf_prog_unlock_free(fp);
1369 +}
1370 diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
1371 new file mode 100644
1372 index 000000000000..57154c5883b6
1373 --- /dev/null
1374 +++ b/arch/mips/net/bpf_jit_asm.S
1375 @@ -0,0 +1,285 @@
1376 +/*
1377 + * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
1378 + * compiler.
1379 + *
1380 + * Copyright (C) 2015 Imagination Technologies Ltd.
1381 + * Author: Markos Chandras <markos.chandras@imgtec.com>
1382 + *
1383 + * This program is free software; you can redistribute it and/or modify it
1384 + * under the terms of the GNU General Public License as published by the
1385 + * Free Software Foundation; version 2 of the License.
1386 + */
1387 +
1388 +#include <asm/asm.h>
1389 +#include <asm/isa-rev.h>
1390 +#include <asm/regdef.h>
1391 +#include "bpf_jit.h"
1392 +
1393 +/* ABI
1394 + *
1395 + * r_skb_hl skb header length
1396 + * r_skb_data skb data
1397 + * r_off(a1) offset register
1398 + * r_A BPF register A
1399 + * r_X PF register X
1400 + * r_skb(a0) *skb
1401 + * r_M *scratch memory
1402 + * r_skb_le skb length
1403 + * r_s0 Scratch register 0
1404 + * r_s1 Scratch register 1
1405 + *
1406 + * On entry:
1407 + * a0: *skb
1408 + * a1: offset (imm or imm + X)
1409 + *
1410 + * All non-BPF-ABI registers are free for use. On return, we only
1411 + * care about r_ret. The BPF-ABI registers are assumed to remain
1412 + * unmodified during the entire filter operation.
1413 + */
1414 +
1415 +#define skb a0
1416 +#define offset a1
1417 +#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */
1418 +
1419 + /* We know better :) so prevent assembler reordering etc */
1420 + .set noreorder
1421 +
1422 +#define is_offset_negative(TYPE) \
1423 + /* If offset is negative we have more work to do */ \
1424 + slti t0, offset, 0; \
1425 + bgtz t0, bpf_slow_path_##TYPE##_neg; \
1426 + /* Be careful what follows in DS. */
1427 +
1428 +#define is_offset_in_header(SIZE, TYPE) \
1429 + /* Reading from header? */ \
1430 + addiu $r_s0, $r_skb_hl, -SIZE; \
1431 + slt t0, $r_s0, offset; \
1432 + bgtz t0, bpf_slow_path_##TYPE; \
1433 +
1434 +LEAF(sk_load_word)
1435 + is_offset_negative(word)
1436 +FEXPORT(sk_load_word_positive)
1437 + is_offset_in_header(4, word)
1438 + /* Offset within header boundaries */
1439 + PTR_ADDU t1, $r_skb_data, offset
1440 + .set reorder
1441 + lw $r_A, 0(t1)
1442 + .set noreorder
1443 +#ifdef CONFIG_CPU_LITTLE_ENDIAN
1444 +# if MIPS_ISA_REV >= 2
1445 + wsbh t0, $r_A
1446 + rotr $r_A, t0, 16
1447 +# else
1448 + sll t0, $r_A, 24
1449 + srl t1, $r_A, 24
1450 + srl t2, $r_A, 8
1451 + or t0, t0, t1
1452 + andi t2, t2, 0xff00
1453 + andi t1, $r_A, 0xff00
1454 + or t0, t0, t2
1455 + sll t1, t1, 8
1456 + or $r_A, t0, t1
1457 +# endif
1458 +#endif
1459 + jr $r_ra
1460 + move $r_ret, zero
1461 + END(sk_load_word)
1462 +
1463 +LEAF(sk_load_half)
1464 + is_offset_negative(half)
1465 +FEXPORT(sk_load_half_positive)
1466 + is_offset_in_header(2, half)
1467 + /* Offset within header boundaries */
1468 + PTR_ADDU t1, $r_skb_data, offset
1469 + lhu $r_A, 0(t1)
1470 +#ifdef CONFIG_CPU_LITTLE_ENDIAN
1471 +# if MIPS_ISA_REV >= 2
1472 + wsbh $r_A, $r_A
1473 +# else
1474 + sll t0, $r_A, 8
1475 + srl t1, $r_A, 8
1476 + andi t0, t0, 0xff00
1477 + or $r_A, t0, t1
1478 +# endif
1479 +#endif
1480 + jr $r_ra
1481 + move $r_ret, zero
1482 + END(sk_load_half)
1483 +
1484 +LEAF(sk_load_byte)
1485 + is_offset_negative(byte)
1486 +FEXPORT(sk_load_byte_positive)
1487 + is_offset_in_header(1, byte)
1488 + /* Offset within header boundaries */
1489 + PTR_ADDU t1, $r_skb_data, offset
1490 + lbu $r_A, 0(t1)
1491 + jr $r_ra
1492 + move $r_ret, zero
1493 + END(sk_load_byte)
1494 +
1495 +/*
1496 + * call skb_copy_bits:
1497 + * (prototype in linux/skbuff.h)
1498 + *
1499 + * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
1500 + *
1501 + * o32 mandates we leave 4 spaces for argument registers in case
1502 + * the callee needs to use them. Even though we don't care about
1503 + * the argument registers ourselves, we need to allocate that space
1504 + * to remain ABI compliant since the callee may want to use that space.
1505 + * We also allocate 2 more spaces for $r_ra and our return register (*to).
1506 + *
1507 + * n64 is a bit different. The *caller* will allocate the space to preserve
1508 + * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
1509 + * good reason but it does not matter that much really.
1510 + *
1511 + * (void *to) is returned in r_s0
1512 + *
1513 + */
1514 +#ifdef CONFIG_CPU_LITTLE_ENDIAN
1515 +#define DS_OFFSET(SIZE) (4 * SZREG)
1516 +#else
1517 +#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
1518 +#endif
1519 +#define bpf_slow_path_common(SIZE) \
1520 + /* Quick check. Are we within reasonable boundaries? */ \
1521 + LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \
1522 + sltu $r_s0, offset, $r_s1; \
1523 + beqz $r_s0, fault; \
1524 + /* Load 4th argument in DS */ \
1525 + LONG_ADDIU a3, zero, SIZE; \
1526 + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
1527 + PTR_LA t0, skb_copy_bits; \
1528 + PTR_S $r_ra, (5 * SZREG)($r_sp); \
1529 + /* Assign low slot to a2 */ \
1530 + PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \
1531 + jalr t0; \
1532 + /* Reset our destination slot (DS but it's ok) */ \
1533 + INT_S zero, (4 * SZREG)($r_sp); \
1534 + /* \
1535 + * skb_copy_bits returns 0 on success and -EFAULT \
1536 + * on error. Our data live in a2. Do not bother with \
1537 + * our data if an error has been returned. \
1538 + */ \
1539 + /* Restore our frame */ \
1540 + PTR_L $r_ra, (5 * SZREG)($r_sp); \
1541 + INT_L $r_s0, (4 * SZREG)($r_sp); \
1542 + bltz v0, fault; \
1543 + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
1544 + move $r_ret, zero; \
1545 +
1546 +NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
1547 + bpf_slow_path_common(4)
1548 +#ifdef CONFIG_CPU_LITTLE_ENDIAN
1549 +# if MIPS_ISA_REV >= 2
1550 + wsbh t0, $r_s0
1551 + jr $r_ra
1552 + rotr $r_A, t0, 16
1553 +# else
1554 + sll t0, $r_s0, 24
1555 + srl t1, $r_s0, 24
1556 + srl t2, $r_s0, 8
1557 + or t0, t0, t1
1558 + andi t2, t2, 0xff00
1559 + andi t1, $r_s0, 0xff00
1560 + or t0, t0, t2
1561 + sll t1, t1, 8
1562 + jr $r_ra
1563 + or $r_A, t0, t1
1564 +# endif
1565 +#else
1566 + jr $r_ra
1567 + move $r_A, $r_s0
1568 +#endif
1569 +
1570 + END(bpf_slow_path_word)
1571 +
1572 +NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
1573 + bpf_slow_path_common(2)
1574 +#ifdef CONFIG_CPU_LITTLE_ENDIAN
1575 +# if MIPS_ISA_REV >= 2
1576 + jr $r_ra
1577 + wsbh $r_A, $r_s0
1578 +# else
1579 + sll t0, $r_s0, 8
1580 + andi t1, $r_s0, 0xff00
1581 + andi t0, t0, 0xff00
1582 + srl t1, t1, 8
1583 + jr $r_ra
1584 + or $r_A, t0, t1
1585 +# endif
1586 +#else
1587 + jr $r_ra
1588 + move $r_A, $r_s0
1589 +#endif
1590 +
1591 + END(bpf_slow_path_half)
1592 +
1593 +NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
1594 + bpf_slow_path_common(1)
1595 + jr $r_ra
1596 + move $r_A, $r_s0
1597 +
1598 + END(bpf_slow_path_byte)
1599 +
1600 +/*
1601 + * Negative entry points
1602 + */
1603 + .macro bpf_is_end_of_data
1604 + li t0, SKF_LL_OFF
1605 + /* Reading link layer data? */
1606 + slt t1, offset, t0
1607 + bgtz t1, fault
1608 + /* Be careful what follows in DS. */
1609 + .endm
1610 +/*
1611 + * call skb_copy_bits:
1612 + * (prototype in linux/filter.h)
1613 + *
1614 + * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
1615 + * int k, unsigned int size)
1616 + *
1617 + * see above (bpf_slow_path_common) for ABI restrictions
1618 + */
1619 +#define bpf_negative_common(SIZE) \
1620 + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
1621 + PTR_LA t0, bpf_internal_load_pointer_neg_helper; \
1622 + PTR_S $r_ra, (5 * SZREG)($r_sp); \
1623 + jalr t0; \
1624 + li a2, SIZE; \
1625 + PTR_L $r_ra, (5 * SZREG)($r_sp); \
1626 + /* Check return pointer */ \
1627 + beqz v0, fault; \
1628 + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
1629 + /* Preserve our pointer */ \
1630 + move $r_s0, v0; \
1631 + /* Set return value */ \
1632 + move $r_ret, zero; \
1633 +
1634 +bpf_slow_path_word_neg:
1635 + bpf_is_end_of_data
1636 +NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
1637 + bpf_negative_common(4)
1638 + jr $r_ra
1639 + lw $r_A, 0($r_s0)
1640 + END(sk_load_word_negative)
1641 +
1642 +bpf_slow_path_half_neg:
1643 + bpf_is_end_of_data
1644 +NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
1645 + bpf_negative_common(2)
1646 + jr $r_ra
1647 + lhu $r_A, 0($r_s0)
1648 + END(sk_load_half_negative)
1649 +
1650 +bpf_slow_path_byte_neg:
1651 + bpf_is_end_of_data
1652 +NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
1653 + bpf_negative_common(1)
1654 + jr $r_ra
1655 + lbu $r_A, 0($r_s0)
1656 + END(sk_load_byte_negative)
1657 +
1658 +fault:
1659 + jr $r_ra
1660 + addiu $r_ret, zero, 1
1661 --
1662 2.17.1
1663