kernel: add support for kernel 5.4
[oweals/openwrt.git] / target / linux / generic / pending-5.4 / 305-mips_module_reloc.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
3
4 lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
5 Signed-off-by: Felix Fietkau <nbd@nbd.name>
6 ---
7  arch/mips/Makefile             |   5 +
8  arch/mips/include/asm/module.h |   5 +
9  arch/mips/kernel/module.c      | 279 ++++++++++++++++++++++++++++++++++++++++-
10  3 files changed, 284 insertions(+), 5 deletions(-)
11
12 --- a/arch/mips/Makefile
13 +++ b/arch/mips/Makefile
14 @@ -95,8 +95,18 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
15  cflags-y                       += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
16  cflags-y                       += -msoft-float
17  LDFLAGS_vmlinux                        += -G 0 -static -n -nostdlib
18 +ifdef CONFIG_64BIT
19  KBUILD_AFLAGS_MODULE           += -mlong-calls
20  KBUILD_CFLAGS_MODULE           += -mlong-calls
21 +else
22 +  ifdef CONFIG_DYNAMIC_FTRACE
23 +    KBUILD_AFLAGS_MODULE       += -mlong-calls
24 +    KBUILD_CFLAGS_MODULE       += -mlong-calls
25 +  else
26 +    KBUILD_AFLAGS_MODULE       += -mno-long-calls
27 +    KBUILD_CFLAGS_MODULE       += -mno-long-calls
28 +  endif
29 +endif
30  
31  ifeq ($(CONFIG_RELOCATABLE),y)
32  LDFLAGS_vmlinux                        += --emit-relocs
33 --- a/arch/mips/include/asm/module.h
34 +++ b/arch/mips/include/asm/module.h
35 @@ -12,6 +12,11 @@ struct mod_arch_specific {
36         const struct exception_table_entry *dbe_start;
37         const struct exception_table_entry *dbe_end;
38         struct mips_hi16 *r_mips_hi16_list;
39 +
40 +       void *phys_plt_tbl;
41 +       void *virt_plt_tbl;
42 +       unsigned int phys_plt_offset;
43 +       unsigned int virt_plt_offset;
44  };
45  
46  typedef uint8_t Elf64_Byte;            /* Type for a 8-bit quantity.  */
47 --- a/arch/mips/kernel/module.c
48 +++ b/arch/mips/kernel/module.c
49 @@ -32,14 +32,221 @@ struct mips_hi16 {
50  static LIST_HEAD(dbe_list);
51  static DEFINE_SPINLOCK(dbe_lock);
52  
53 -#ifdef MODULE_START
54 +/*
55 + * Get the potential max trampolines size required of the init and
56 + * non-init sections. Only used if we cannot find enough contiguous
57 + * physically mapped memory to put the module into.
58 + */
59 +static unsigned int
60 +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
61 +             const char *secstrings, unsigned int symindex, bool is_init)
62 +{
63 +       unsigned long ret = 0;
64 +       unsigned int i, j;
65 +       Elf_Sym *syms;
66 +
67 +       /* Everything marked ALLOC (this includes the exported symbols) */
68 +       for (i = 1; i < hdr->e_shnum; ++i) {
69 +               unsigned int info = sechdrs[i].sh_info;
70 +
71 +               if (sechdrs[i].sh_type != SHT_REL
72 +                   && sechdrs[i].sh_type != SHT_RELA)
73 +                       continue;
74 +
75 +               /* Not a valid relocation section? */
76 +               if (info >= hdr->e_shnum)
77 +                       continue;
78 +
79 +               /* Don't bother with non-allocated sections */
80 +               if (!(sechdrs[info].sh_flags & SHF_ALLOC))
81 +                       continue;
82 +
83 +               /* If it's called *.init*, and we're not init, we're
84 +                   not interested */
85 +               if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
86 +                   != is_init)
87 +                       continue;
88 +
89 +               syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
90 +               if (sechdrs[i].sh_type == SHT_REL) {
91 +                       Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
92 +                       unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
93 +
94 +                       for (j = 0; j < size; ++j) {
95 +                               Elf_Sym *sym;
96 +
97 +                               if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
98 +                                       continue;
99 +
100 +                               sym = syms + ELF_MIPS_R_SYM(rel[j]);
101 +                               if (!is_init && sym->st_shndx != SHN_UNDEF)
102 +                                       continue;
103 +
104 +                               ret += 4 * sizeof(int);
105 +                       }
106 +               } else {
107 +                       Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
108 +                       unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
109 +
110 +                       for (j = 0; j < size; ++j) {
111 +                               Elf_Sym *sym;
112 +
113 +                               if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
114 +                                       continue;
115 +
116 +                               sym = syms + ELF_MIPS_R_SYM(rela[j]);
117 +                               if (!is_init && sym->st_shndx != SHN_UNDEF)
118 +                                       continue;
119 +
120 +                               ret += 4 * sizeof(int);
121 +                       }
122 +               }
123 +       }
124 +
125 +       return ret;
126 +}
127 +
128 +#ifndef MODULE_START
129 +static void *alloc_phys(unsigned long size)
130 +{
131 +       unsigned order;
132 +       struct page *page;
133 +       struct page *p;
134 +
135 +       size = PAGE_ALIGN(size);
136 +       order = get_order(size);
137 +
138 +       page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
139 +                       __GFP_THISNODE, order);
140 +       if (!page)
141 +               return NULL;
142 +
143 +       split_page(page, order);
144 +
145 +       /* mark all pages except for the last one */
146 +       for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
147 +               set_bit(PG_owner_priv_1, &p->flags);
148 +
149 +       for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
150 +               __free_page(p);
151 +
152 +       return page_address(page);
153 +}
154 +#endif
155 +
156 +static void free_phys(void *ptr)
157 +{
158 +       struct page *page;
159 +       bool free;
160 +
161 +       page = virt_to_page(ptr);
162 +       do {
163 +               free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
164 +               __free_page(page);
165 +               page++;
166 +       } while (free);
167 +}
168 +
169 +
170  void *module_alloc(unsigned long size)
171  {
172 +#ifdef MODULE_START
173         return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
174                                 GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
175                                 __builtin_return_address(0));
176 +#else
177 +       void *ptr;
178 +
179 +       if (size == 0)
180 +               return NULL;
181 +
182 +       ptr = alloc_phys(size);
183 +
184 +       /* If we failed to allocate physically contiguous memory,
185 +        * fall back to regular vmalloc. The module loader code will
186 +        * create jump tables to handle long jumps */
187 +       if (!ptr)
188 +               return vmalloc(size);
189 +
190 +       return ptr;
191 +#endif
192  }
193 +
194 +static inline bool is_phys_addr(void *ptr)
195 +{
196 +#ifdef CONFIG_64BIT
197 +       return (KSEGX((unsigned long)ptr) == CKSEG0);
198 +#else
199 +       return (KSEGX(ptr) == KSEG0);
200  #endif
201 +}
202 +
203 +/* Free memory returned from module_alloc */
204 +void module_memfree(void *module_region)
205 +{
206 +       if (is_phys_addr(module_region))
207 +               free_phys(module_region);
208 +       else
209 +               vfree(module_region);
210 +}
211 +
212 +static void *__module_alloc(int size, bool phys)
213 +{
214 +       void *ptr;
215 +
216 +       if (phys)
217 +               ptr = kmalloc(size, GFP_KERNEL);
218 +       else
219 +               ptr = vmalloc(size);
220 +       return ptr;
221 +}
222 +
223 +static void __module_free(void *ptr)
224 +{
225 +       if (is_phys_addr(ptr))
226 +               kfree(ptr);
227 +       else
228 +               vfree(ptr);
229 +}
230 +
231 +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
232 +                             char *secstrings, struct module *mod)
233 +{
234 +       unsigned int symindex = 0;
235 +       unsigned int core_size, init_size;
236 +       int i;
237 +
238 +       mod->arch.phys_plt_offset = 0;
239 +       mod->arch.virt_plt_offset = 0;
240 +       mod->arch.phys_plt_tbl = NULL;
241 +       mod->arch.virt_plt_tbl = NULL;
242 +
243 +       if (IS_ENABLED(CONFIG_64BIT))
244 +               return 0;
245 +
246 +       for (i = 1; i < hdr->e_shnum; i++)
247 +               if (sechdrs[i].sh_type == SHT_SYMTAB)
248 +                       symindex = i;
249 +
250 +       core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
251 +       init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
252 +
253 +       if ((core_size + init_size) == 0)
254 +               return 0;
255 +
256 +       mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
257 +       if (!mod->arch.phys_plt_tbl)
258 +               return -ENOMEM;
259 +
260 +       mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
261 +       if (!mod->arch.virt_plt_tbl) {
262 +               __module_free(mod->arch.phys_plt_tbl);
263 +               mod->arch.phys_plt_tbl = NULL;
264 +               return -ENOMEM;
265 +       }
266 +
267 +       return 0;
268 +}
269  
270  static int apply_r_mips_none(struct module *me, u32 *location,
271                              u32 base, Elf_Addr v, bool rela)
272 @@ -55,9 +262,40 @@ static int apply_r_mips_32(struct module
273         return 0;
274  }
275  
276 +static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
277 +                                void *start, Elf_Addr v)
278 +{
279 +       unsigned *tramp = start + *plt_offset;
280 +       *plt_offset += 4 * sizeof(int);
281 +
282 +       /* adjust carry for addiu */
283 +       if (v & 0x00008000)
284 +               v += 0x10000;
285 +
286 +       tramp[0] = 0x3c190000 | (v >> 16);      /* lui t9, hi16 */
287 +       tramp[1] = 0x27390000 | (v & 0xffff);   /* addiu t9, t9, lo16 */
288 +       tramp[2] = 0x03200008;                  /* jr t9 */
289 +       tramp[3] = 0x00000000;                  /* nop */
290 +
291 +       return (Elf_Addr) tramp;
292 +}
293 +
294 +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
295 +{
296 +       if (is_phys_addr(location))
297 +               return add_plt_entry_to(&me->arch.phys_plt_offset,
298 +                               me->arch.phys_plt_tbl, v);
299 +       else
300 +               return add_plt_entry_to(&me->arch.virt_plt_offset,
301 +                               me->arch.virt_plt_tbl, v);
302 +
303 +}
304 +
305  static int apply_r_mips_26(struct module *me, u32 *location,
306                            u32 base, Elf_Addr v, bool rela)
307  {
308 +       u32 ofs = base & 0x03ffffff;
309 +
310         if (v % 4) {
311                 pr_err("module %s: dangerous R_MIPS_26 relocation\n",
312                        me->name);
313 @@ -65,13 +303,17 @@ static int apply_r_mips_26(struct module
314         }
315  
316         if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
317 -               pr_err("module %s: relocation overflow\n",
318 -                      me->name);
319 -               return -ENOEXEC;
320 +               v = add_plt_entry(me, location, v + (ofs << 2));
321 +               if (!v) {
322 +                       pr_err("module %s: relocation overflow\n",
323 +                              me->name);
324 +                       return -ENOEXEC;
325 +               }
326 +               ofs = 0;
327         }
328  
329         *location = (*location & ~0x03ffffff) |
330 -                   ((base + (v >> 2)) & 0x03ffffff);
331 +                   ((ofs + (v >> 2)) & 0x03ffffff);
332  
333         return 0;
334  }
335 @@ -447,9 +689,36 @@ int module_finalize(const Elf_Ehdr *hdr,
336                 list_add(&me->arch.dbe_list, &dbe_list);
337                 spin_unlock_irq(&dbe_lock);
338         }
339 +
340 +       /* Get rid of the fixup trampoline if we're running the module
341 +        * from physically mapped address space */
342 +       if (me->arch.phys_plt_offset == 0) {
343 +               __module_free(me->arch.phys_plt_tbl);
344 +               me->arch.phys_plt_tbl = NULL;
345 +       }
346 +       if (me->arch.virt_plt_offset == 0) {
347 +               __module_free(me->arch.virt_plt_tbl);
348 +               me->arch.virt_plt_tbl = NULL;
349 +       }
350 +
351         return 0;
352  }
353  
354 +void module_arch_freeing_init(struct module *mod)
355 +{
356 +       if (mod->state == MODULE_STATE_LIVE)
357 +               return;
358 +
359 +       if (mod->arch.phys_plt_tbl) {
360 +               __module_free(mod->arch.phys_plt_tbl);
361 +               mod->arch.phys_plt_tbl = NULL;
362 +       }
363 +       if (mod->arch.virt_plt_tbl) {
364 +               __module_free(mod->arch.virt_plt_tbl);
365 +               mod->arch.virt_plt_tbl = NULL;
366 +       }
367 +}
368 +
369  void module_arch_cleanup(struct module *mod)
370  {
371         spin_lock_irq(&dbe_lock);