f0776b205f5253f4c88983bfaf574b72b0dd08e0
[openwrt/staging/wigyori.git] / target / linux / generic / pending-4.9 / 305-mips_module_reloc.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
3
4 lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
5 Signed-off-by: Felix Fietkau <nbd@nbd.name>
6 ---
7 arch/mips/Makefile | 5 +
8 arch/mips/include/asm/module.h | 5 +
9 arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++-
10 3 files changed, 284 insertions(+), 5 deletions(-)
11
12 diff --git a/arch/mips/Makefile b/arch/mips/Makefile
13 index 48dc1a9c3e42..77bf5db20d65 100644
14 --- a/arch/mips/Makefile
15 +++ b/arch/mips/Makefile
16 @@ -93,8 +93,13 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlinuz
17 cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
18 cflags-y += -msoft-float
19 LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
20 +ifdef CONFIG_64BIT
21 KBUILD_AFLAGS_MODULE += -mlong-calls
22 KBUILD_CFLAGS_MODULE += -mlong-calls
23 +else
24 +KBUILD_AFLAGS_MODULE += -mno-long-calls
25 +KBUILD_CFLAGS_MODULE += -mno-long-calls
26 +endif
27
28 ifeq ($(CONFIG_RELOCATABLE),y)
29 LDFLAGS_vmlinux += --emit-relocs
30 diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
31 index 702c273e67a9..1d4f3b37cefe 100644
32 --- a/arch/mips/include/asm/module.h
33 +++ b/arch/mips/include/asm/module.h
34 @@ -11,6 +11,11 @@ struct mod_arch_specific {
35 const struct exception_table_entry *dbe_start;
36 const struct exception_table_entry *dbe_end;
37 struct mips_hi16 *r_mips_hi16_list;
38 +
39 + void *phys_plt_tbl;
40 + void *virt_plt_tbl;
41 + unsigned int phys_plt_offset;
42 + unsigned int virt_plt_offset;
43 };
44
45 typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */
46 diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
47 index 94627a3a6a0d..947981a9aa72 100644
48 --- a/arch/mips/kernel/module.c
49 +++ b/arch/mips/kernel/module.c
50 @@ -44,14 +44,221 @@ struct mips_hi16 {
51 static LIST_HEAD(dbe_list);
52 static DEFINE_SPINLOCK(dbe_lock);
53
54 -#ifdef MODULE_START
55 +/*
56 + * Get the potential max trampolines size required of the init and
57 + * non-init sections. Only used if we cannot find enough contiguous
58 + * physically mapped memory to put the module into.
59 + */
60 +static unsigned int
61 +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
62 + const char *secstrings, unsigned int symindex, bool is_init)
63 +{
64 + unsigned long ret = 0;
65 + unsigned int i, j;
66 + Elf_Sym *syms;
67 +
68 + /* Everything marked ALLOC (this includes the exported symbols) */
69 + for (i = 1; i < hdr->e_shnum; ++i) {
70 + unsigned int info = sechdrs[i].sh_info;
71 +
72 + if (sechdrs[i].sh_type != SHT_REL
73 + && sechdrs[i].sh_type != SHT_RELA)
74 + continue;
75 +
76 + /* Not a valid relocation section? */
77 + if (info >= hdr->e_shnum)
78 + continue;
79 +
80 + /* Don't bother with non-allocated sections */
81 + if (!(sechdrs[info].sh_flags & SHF_ALLOC))
82 + continue;
83 +
84 + /* If it's called *.init*, and we're not init, we're
85 + not interested */
86 + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
87 + != is_init)
88 + continue;
89 +
90 + syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
91 + if (sechdrs[i].sh_type == SHT_REL) {
92 + Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
93 + unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
94 +
95 + for (j = 0; j < size; ++j) {
96 + Elf_Sym *sym;
97 +
98 + if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
99 + continue;
100 +
101 + sym = syms + ELF_MIPS_R_SYM(rel[j]);
102 + if (!is_init && sym->st_shndx != SHN_UNDEF)
103 + continue;
104 +
105 + ret += 4 * sizeof(int);
106 + }
107 + } else {
108 + Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
109 + unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
110 +
111 + for (j = 0; j < size; ++j) {
112 + Elf_Sym *sym;
113 +
114 + if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
115 + continue;
116 +
117 + sym = syms + ELF_MIPS_R_SYM(rela[j]);
118 + if (!is_init && sym->st_shndx != SHN_UNDEF)
119 + continue;
120 +
121 + ret += 4 * sizeof(int);
122 + }
123 + }
124 + }
125 +
126 + return ret;
127 +}
128 +
129 +#ifndef MODULE_START
130 +static void *alloc_phys(unsigned long size)
131 +{
132 + unsigned order;
133 + struct page *page;
134 + struct page *p;
135 +
136 + size = PAGE_ALIGN(size);
137 + order = get_order(size);
138 +
139 + page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
140 + __GFP_THISNODE, order);
141 + if (!page)
142 + return NULL;
143 +
144 + split_page(page, order);
145 +
146 + /* mark all pages except for the last one */
147 + for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
148 + set_bit(PG_owner_priv_1, &p->flags);
149 +
150 + for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
151 + __free_page(p);
152 +
153 + return page_address(page);
154 +}
155 +#endif
156 +
157 +static void free_phys(void *ptr)
158 +{
159 + struct page *page;
160 + bool free;
161 +
162 + page = virt_to_page(ptr);
163 + do {
164 + free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
165 + __free_page(page);
166 + page++;
167 + } while (free);
168 +}
169 +
170 +
171 void *module_alloc(unsigned long size)
172 {
173 +#ifdef MODULE_START
174 return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
175 GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
176 __builtin_return_address(0));
177 +#else
178 + void *ptr;
179 +
180 + if (size == 0)
181 + return NULL;
182 +
183 + ptr = alloc_phys(size);
184 +
185 + /* If we failed to allocate physically contiguous memory,
186 + * fall back to regular vmalloc. The module loader code will
187 + * create jump tables to handle long jumps */
188 + if (!ptr)
189 + return vmalloc(size);
190 +
191 + return ptr;
192 +#endif
193 }
194 +
195 +static inline bool is_phys_addr(void *ptr)
196 +{
197 +#ifdef CONFIG_64BIT
198 + return (KSEGX((unsigned long)ptr) == CKSEG0);
199 +#else
200 + return (KSEGX(ptr) == KSEG0);
201 #endif
202 +}
203 +
204 +/* Free memory returned from module_alloc */
205 +void module_memfree(void *module_region)
206 +{
207 + if (is_phys_addr(module_region))
208 + free_phys(module_region);
209 + else
210 + vfree(module_region);
211 +}
212 +
213 +static void *__module_alloc(int size, bool phys)
214 +{
215 + void *ptr;
216 +
217 + if (phys)
218 + ptr = kmalloc(size, GFP_KERNEL);
219 + else
220 + ptr = vmalloc(size);
221 + return ptr;
222 +}
223 +
224 +static void __module_free(void *ptr)
225 +{
226 + if (is_phys_addr(ptr))
227 + kfree(ptr);
228 + else
229 + vfree(ptr);
230 +}
231 +
232 +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
233 + char *secstrings, struct module *mod)
234 +{
235 + unsigned int symindex = 0;
236 + unsigned int core_size, init_size;
237 + int i;
238 +
239 + mod->arch.phys_plt_offset = 0;
240 + mod->arch.virt_plt_offset = 0;
241 + mod->arch.phys_plt_tbl = NULL;
242 + mod->arch.virt_plt_tbl = NULL;
243 +
244 + if (IS_ENABLED(CONFIG_64BIT))
245 + return 0;
246 +
247 + for (i = 1; i < hdr->e_shnum; i++)
248 + if (sechdrs[i].sh_type == SHT_SYMTAB)
249 + symindex = i;
250 +
251 + core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
252 + init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
253 +
254 + if ((core_size + init_size) == 0)
255 + return 0;
256 +
257 + mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
258 + if (!mod->arch.phys_plt_tbl)
259 + return -ENOMEM;
260 +
261 + mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
262 + if (!mod->arch.virt_plt_tbl) {
263 + __module_free(mod->arch.phys_plt_tbl);
264 + mod->arch.phys_plt_tbl = NULL;
265 + return -ENOMEM;
266 + }
267 +
268 + return 0;
269 +}
270
271 int apply_r_mips_none(struct module *me, u32 *location, Elf_Addr v)
272 {
273 @@ -65,8 +272,39 @@ static int apply_r_mips_32_rel(struct module *me, u32 *location, Elf_Addr v)
274 return 0;
275 }
276
277 +static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
278 + void *start, Elf_Addr v)
279 +{
280 + unsigned *tramp = start + *plt_offset;
281 + *plt_offset += 4 * sizeof(int);
282 +
283 + /* adjust carry for addiu */
284 + if (v & 0x00008000)
285 + v += 0x10000;
286 +
287 + tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */
288 + tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */
289 + tramp[2] = 0x03200008; /* jr t9 */
290 + tramp[3] = 0x00000000; /* nop */
291 +
292 + return (Elf_Addr) tramp;
293 +}
294 +
295 +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
296 +{
297 + if (is_phys_addr(location))
298 + return add_plt_entry_to(&me->arch.phys_plt_offset,
299 + me->arch.phys_plt_tbl, v);
300 + else
301 + return add_plt_entry_to(&me->arch.virt_plt_offset,
302 + me->arch.virt_plt_tbl, v);
303 +
304 +}
305 +
306 static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
307 {
308 + u32 ofs = *location & 0x03ffffff;
309 +
310 if (v % 4) {
311 pr_err("module %s: dangerous R_MIPS_26 REL relocation\n",
312 me->name);
313 @@ -74,13 +312,17 @@ static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
314 }
315
316 if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
317 - pr_err("module %s: relocation overflow\n",
318 - me->name);
319 - return -ENOEXEC;
320 + v = add_plt_entry(me, location, v + (ofs << 2));
321 + if (!v) {
322 + pr_err("module %s: relocation overflow\n",
323 + me->name);
324 + return -ENOEXEC;
325 + }
326 + ofs = 0;
327 }
328
329 *location = (*location & ~0x03ffffff) |
330 - ((*location + (v >> 2)) & 0x03ffffff);
331 + ((ofs + (v >> 2)) & 0x03ffffff);
332
333 return 0;
334 }
335 @@ -349,9 +591,36 @@ int module_finalize(const Elf_Ehdr *hdr,
336 list_add(&me->arch.dbe_list, &dbe_list);
337 spin_unlock_irq(&dbe_lock);
338 }
339 +
340 + /* Get rid of the fixup trampoline if we're running the module
341 + * from physically mapped address space */
342 + if (me->arch.phys_plt_offset == 0) {
343 + __module_free(me->arch.phys_plt_tbl);
344 + me->arch.phys_plt_tbl = NULL;
345 + }
346 + if (me->arch.virt_plt_offset == 0) {
347 + __module_free(me->arch.virt_plt_tbl);
348 + me->arch.virt_plt_tbl = NULL;
349 + }
350 +
351 return 0;
352 }
353
354 +void module_arch_freeing_init(struct module *mod)
355 +{
356 + if (mod->state == MODULE_STATE_LIVE)
357 + return;
358 +
359 + if (mod->arch.phys_plt_tbl) {
360 + __module_free(mod->arch.phys_plt_tbl);
361 + mod->arch.phys_plt_tbl = NULL;
362 + }
363 + if (mod->arch.virt_plt_tbl) {
364 + __module_free(mod->arch.virt_plt_tbl);
365 + mod->arch.virt_plt_tbl = NULL;
366 + }
367 +}
368 +
369 void module_arch_cleanup(struct module *mod)
370 {
371 spin_lock_irq(&dbe_lock);
372 --
373 2.11.0
374