kernel: add missing patch
[oweals/openwrt.git] / target / linux / generic / pending-4.9 / 305-mips_module_reloc.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
3
4 lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
5 Signed-off-by: Felix Fietkau <nbd@nbd.name>
6 ---
7  arch/mips/Makefile             |   5 +
8  arch/mips/include/asm/module.h |   5 +
9  arch/mips/kernel/module.c      | 279 ++++++++++++++++++++++++++++++++++++++++-
10  3 files changed, 284 insertions(+), 5 deletions(-)
11
12 diff --git a/arch/mips/Makefile b/arch/mips/Makefile
13 index 48dc1a9c3e42..77bf5db20d65 100644
14 --- a/arch/mips/Makefile
15 +++ b/arch/mips/Makefile
16 @@ -93,8 +93,13 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlinuz
17  cflags-y                       += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
18  cflags-y                       += -msoft-float
19  LDFLAGS_vmlinux                        += -G 0 -static -n -nostdlib
20 +ifdef CONFIG_64BIT
21  KBUILD_AFLAGS_MODULE           += -mlong-calls
22  KBUILD_CFLAGS_MODULE           += -mlong-calls
23 +else
24 +KBUILD_AFLAGS_MODULE           += -mno-long-calls
25 +KBUILD_CFLAGS_MODULE           += -mno-long-calls
26 +endif
27  
28  ifeq ($(CONFIG_RELOCATABLE),y)
29  LDFLAGS_vmlinux                        += --emit-relocs
30 diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
31 index 702c273e67a9..1d4f3b37cefe 100644
32 --- a/arch/mips/include/asm/module.h
33 +++ b/arch/mips/include/asm/module.h
34 @@ -11,6 +11,11 @@ struct mod_arch_specific {
35         const struct exception_table_entry *dbe_start;
36         const struct exception_table_entry *dbe_end;
37         struct mips_hi16 *r_mips_hi16_list;
38 +
39 +       void *phys_plt_tbl;
40 +       void *virt_plt_tbl;
41 +       unsigned int phys_plt_offset;
42 +       unsigned int virt_plt_offset;
43  };
44  
45  typedef uint8_t Elf64_Byte;            /* Type for a 8-bit quantity.  */
46 diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
47 index 94627a3a6a0d..947981a9aa72 100644
48 --- a/arch/mips/kernel/module.c
49 +++ b/arch/mips/kernel/module.c
50 @@ -44,14 +44,221 @@ struct mips_hi16 {
51  static LIST_HEAD(dbe_list);
52  static DEFINE_SPINLOCK(dbe_lock);
53  
54 -#ifdef MODULE_START
55 +/*
56 + * Get the potential max trampolines size required of the init and
57 + * non-init sections. Only used if we cannot find enough contiguous
58 + * physically mapped memory to put the module into.
59 + */
60 +static unsigned int
61 +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
62 +             const char *secstrings, unsigned int symindex, bool is_init)
63 +{
64 +       unsigned long ret = 0;
65 +       unsigned int i, j;
66 +       Elf_Sym *syms;
67 +
68 +       /* Everything marked ALLOC (this includes the exported symbols) */
69 +       for (i = 1; i < hdr->e_shnum; ++i) {
70 +               unsigned int info = sechdrs[i].sh_info;
71 +
72 +               if (sechdrs[i].sh_type != SHT_REL
73 +                   && sechdrs[i].sh_type != SHT_RELA)
74 +                       continue;
75 +
76 +               /* Not a valid relocation section? */
77 +               if (info >= hdr->e_shnum)
78 +                       continue;
79 +
80 +               /* Don't bother with non-allocated sections */
81 +               if (!(sechdrs[info].sh_flags & SHF_ALLOC))
82 +                       continue;
83 +
84 +               /* If it's called *.init*, and we're not init, we're
85 +                   not interested */
86 +               if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
87 +                   != is_init)
88 +                       continue;
89 +
90 +               syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
91 +               if (sechdrs[i].sh_type == SHT_REL) {
92 +                       Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
93 +                       unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
94 +
95 +                       for (j = 0; j < size; ++j) {
96 +                               Elf_Sym *sym;
97 +
98 +                               if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
99 +                                       continue;
100 +
101 +                               sym = syms + ELF_MIPS_R_SYM(rel[j]);
102 +                               if (!is_init && sym->st_shndx != SHN_UNDEF)
103 +                                       continue;
104 +
105 +                               ret += 4 * sizeof(int);
106 +                       }
107 +               } else {
108 +                       Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
109 +                       unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
110 +
111 +                       for (j = 0; j < size; ++j) {
112 +                               Elf_Sym *sym;
113 +
114 +                               if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
115 +                                       continue;
116 +
117 +                               sym = syms + ELF_MIPS_R_SYM(rela[j]);
118 +                               if (!is_init && sym->st_shndx != SHN_UNDEF)
119 +                                       continue;
120 +
121 +                               ret += 4 * sizeof(int);
122 +                       }
123 +               }
124 +       }
125 +
126 +       return ret;
127 +}
128 +
129 +#ifndef MODULE_START
130 +static void *alloc_phys(unsigned long size)
131 +{
132 +       unsigned order;
133 +       struct page *page;
134 +       struct page *p;
135 +
136 +       size = PAGE_ALIGN(size);
137 +       order = get_order(size);
138 +
139 +       page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
140 +                       __GFP_THISNODE, order);
141 +       if (!page)
142 +               return NULL;
143 +
144 +       split_page(page, order);
145 +
146 +       /* mark all pages except for the last one */
147 +       for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
148 +               set_bit(PG_owner_priv_1, &p->flags);
149 +
150 +       for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
151 +               __free_page(p);
152 +
153 +       return page_address(page);
154 +}
155 +#endif
156 +
157 +static void free_phys(void *ptr)
158 +{
159 +       struct page *page;
160 +       bool free;
161 +
162 +       page = virt_to_page(ptr);
163 +       do {
164 +               free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
165 +               __free_page(page);
166 +               page++;
167 +       } while (free);
168 +}
169 +
170 +
171  void *module_alloc(unsigned long size)
172  {
173 +#ifdef MODULE_START
174         return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
175                                 GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
176                                 __builtin_return_address(0));
177 +#else
178 +       void *ptr;
179 +
180 +       if (size == 0)
181 +               return NULL;
182 +
183 +       ptr = alloc_phys(size);
184 +
185 +       /* If we failed to allocate physically contiguous memory,
186 +        * fall back to regular vmalloc. The module loader code will
187 +        * create jump tables to handle long jumps */
188 +       if (!ptr)
189 +               return vmalloc(size);
190 +
191 +       return ptr;
192 +#endif
193  }
194 +
195 +static inline bool is_phys_addr(void *ptr)
196 +{
197 +#ifdef CONFIG_64BIT
198 +       return (KSEGX((unsigned long)ptr) == CKSEG0);
199 +#else
200 +       return (KSEGX(ptr) == KSEG0);
201  #endif
202 +}
203 +
204 +/* Free memory returned from module_alloc */
205 +void module_memfree(void *module_region)
206 +{
207 +       if (is_phys_addr(module_region))
208 +               free_phys(module_region);
209 +       else
210 +               vfree(module_region);
211 +}
212 +
213 +static void *__module_alloc(int size, bool phys)
214 +{
215 +       void *ptr;
216 +
217 +       if (phys)
218 +               ptr = kmalloc(size, GFP_KERNEL);
219 +       else
220 +               ptr = vmalloc(size);
221 +       return ptr;
222 +}
223 +
224 +static void __module_free(void *ptr)
225 +{
226 +       if (is_phys_addr(ptr))
227 +               kfree(ptr);
228 +       else
229 +               vfree(ptr);
230 +}
231 +
232 +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
233 +                             char *secstrings, struct module *mod)
234 +{
235 +       unsigned int symindex = 0;
236 +       unsigned int core_size, init_size;
237 +       int i;
238 +
239 +       mod->arch.phys_plt_offset = 0;
240 +       mod->arch.virt_plt_offset = 0;
241 +       mod->arch.phys_plt_tbl = NULL;
242 +       mod->arch.virt_plt_tbl = NULL;
243 +
244 +       if (IS_ENABLED(CONFIG_64BIT))
245 +               return 0;
246 +
247 +       for (i = 1; i < hdr->e_shnum; i++)
248 +               if (sechdrs[i].sh_type == SHT_SYMTAB)
249 +                       symindex = i;
250 +
251 +       core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
252 +       init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
253 +
254 +       if ((core_size + init_size) == 0)
255 +               return 0;
256 +
257 +       mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
258 +       if (!mod->arch.phys_plt_tbl)
259 +               return -ENOMEM;
260 +
261 +       mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
262 +       if (!mod->arch.virt_plt_tbl) {
263 +               __module_free(mod->arch.phys_plt_tbl);
264 +               mod->arch.phys_plt_tbl = NULL;
265 +               return -ENOMEM;
266 +       }
267 +
268 +       return 0;
269 +}
270  
271  int apply_r_mips_none(struct module *me, u32 *location, Elf_Addr v)
272  {
273 @@ -65,8 +272,39 @@ static int apply_r_mips_32_rel(struct module *me, u32 *location, Elf_Addr v)
274         return 0;
275  }
276  
277 +static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
278 +                                void *start, Elf_Addr v)
279 +{
280 +       unsigned *tramp = start + *plt_offset;
281 +       *plt_offset += 4 * sizeof(int);
282 +
283 +       /* adjust carry for addiu */
284 +       if (v & 0x00008000)
285 +               v += 0x10000;
286 +
287 +       tramp[0] = 0x3c190000 | (v >> 16);      /* lui t9, hi16 */
288 +       tramp[1] = 0x27390000 | (v & 0xffff);   /* addiu t9, t9, lo16 */
289 +       tramp[2] = 0x03200008;                  /* jr t9 */
290 +       tramp[3] = 0x00000000;                  /* nop */
291 +
292 +       return (Elf_Addr) tramp;
293 +}
294 +
295 +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
296 +{
297 +       if (is_phys_addr(location))
298 +               return add_plt_entry_to(&me->arch.phys_plt_offset,
299 +                               me->arch.phys_plt_tbl, v);
300 +       else
301 +               return add_plt_entry_to(&me->arch.virt_plt_offset,
302 +                               me->arch.virt_plt_tbl, v);
303 +
304 +}
305 +
306  static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
307  {
308 +       u32 ofs = *location & 0x03ffffff;
309 +
310         if (v % 4) {
311                 pr_err("module %s: dangerous R_MIPS_26 REL relocation\n",
312                        me->name);
313 @@ -74,13 +312,17 @@ static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
314         }
315  
316         if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
317 -               pr_err("module %s: relocation overflow\n",
318 -                      me->name);
319 -               return -ENOEXEC;
320 +               v = add_plt_entry(me, location, v + (ofs << 2));
321 +               if (!v) {
322 +                       pr_err("module %s: relocation overflow\n",
323 +                              me->name);
324 +                       return -ENOEXEC;
325 +               }
326 +               ofs = 0;
327         }
328  
329         *location = (*location & ~0x03ffffff) |
330 -                   ((*location + (v >> 2)) & 0x03ffffff);
331 +                   ((ofs + (v >> 2)) & 0x03ffffff);
332  
333         return 0;
334  }
335 @@ -349,9 +591,36 @@ int module_finalize(const Elf_Ehdr *hdr,
336                 list_add(&me->arch.dbe_list, &dbe_list);
337                 spin_unlock_irq(&dbe_lock);
338         }
339 +
340 +       /* Get rid of the fixup trampoline if we're running the module
341 +        * from physically mapped address space */
342 +       if (me->arch.phys_plt_offset == 0) {
343 +               __module_free(me->arch.phys_plt_tbl);
344 +               me->arch.phys_plt_tbl = NULL;
345 +       }
346 +       if (me->arch.virt_plt_offset == 0) {
347 +               __module_free(me->arch.virt_plt_tbl);
348 +               me->arch.virt_plt_tbl = NULL;
349 +       }
350 +
351         return 0;
352  }
353  
354 +void module_arch_freeing_init(struct module *mod)
355 +{
356 +       if (mod->state == MODULE_STATE_LIVE)
357 +               return;
358 +
359 +       if (mod->arch.phys_plt_tbl) {
360 +               __module_free(mod->arch.phys_plt_tbl);
361 +               mod->arch.phys_plt_tbl = NULL;
362 +       }
363 +       if (mod->arch.virt_plt_tbl) {
364 +               __module_free(mod->arch.virt_plt_tbl);
365 +               mod->arch.virt_plt_tbl = NULL;
366 +       }
367 +}
368 +
369  void module_arch_cleanup(struct module *mod)
370  {
371         spin_lock_irq(&dbe_lock);
372 -- 
373 2.11.0
374