kernel: update 4.9 to 4.9.44
[oweals/openwrt.git] / target / linux / generic / pending-4.9 / 305-mips_module_reloc.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
3
4 lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
5 Signed-off-by: Felix Fietkau <nbd@nbd.name>
6 ---
7  arch/mips/Makefile             |   5 +
8  arch/mips/include/asm/module.h |   5 +
9  arch/mips/kernel/module.c      | 279 ++++++++++++++++++++++++++++++++++++++++-
10  3 files changed, 284 insertions(+), 5 deletions(-)
11
12 --- a/arch/mips/Makefile
13 +++ b/arch/mips/Makefile
14 @@ -93,8 +93,13 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
15  cflags-y                       += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
16  cflags-y                       += -msoft-float
17  LDFLAGS_vmlinux                        += -G 0 -static -n -nostdlib
18 +ifdef CONFIG_64BIT
19  KBUILD_AFLAGS_MODULE           += -mlong-calls
20  KBUILD_CFLAGS_MODULE           += -mlong-calls
21 +else
22 +KBUILD_AFLAGS_MODULE           += -mno-long-calls
23 +KBUILD_CFLAGS_MODULE           += -mno-long-calls
24 +endif
25  
26  ifeq ($(CONFIG_RELOCATABLE),y)
27  LDFLAGS_vmlinux                        += --emit-relocs
28 --- a/arch/mips/include/asm/module.h
29 +++ b/arch/mips/include/asm/module.h
30 @@ -11,6 +11,11 @@ struct mod_arch_specific {
31         const struct exception_table_entry *dbe_start;
32         const struct exception_table_entry *dbe_end;
33         struct mips_hi16 *r_mips_hi16_list;
34 +
35 +       void *phys_plt_tbl;
36 +       void *virt_plt_tbl;
37 +       unsigned int phys_plt_offset;
38 +       unsigned int virt_plt_offset;
39  };
40  
41  typedef uint8_t Elf64_Byte;            /* Type for a 8-bit quantity.  */
42 --- a/arch/mips/kernel/module.c
43 +++ b/arch/mips/kernel/module.c
44 @@ -44,14 +44,221 @@ struct mips_hi16 {
45  static LIST_HEAD(dbe_list);
46  static DEFINE_SPINLOCK(dbe_lock);
47  
48 -#ifdef MODULE_START
49 +/*
50 + * Get the potential max trampolines size required of the init and
51 + * non-init sections. Only used if we cannot find enough contiguous
52 + * physically mapped memory to put the module into.
53 + */
54 +static unsigned int
55 +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
56 +             const char *secstrings, unsigned int symindex, bool is_init)
57 +{
58 +       unsigned long ret = 0;
59 +       unsigned int i, j;
60 +       Elf_Sym *syms;
61 +
62 +       /* Everything marked ALLOC (this includes the exported symbols) */
63 +       for (i = 1; i < hdr->e_shnum; ++i) {
64 +               unsigned int info = sechdrs[i].sh_info;
65 +
66 +               if (sechdrs[i].sh_type != SHT_REL
67 +                   && sechdrs[i].sh_type != SHT_RELA)
68 +                       continue;
69 +
70 +               /* Not a valid relocation section? */
71 +               if (info >= hdr->e_shnum)
72 +                       continue;
73 +
74 +               /* Don't bother with non-allocated sections */
75 +               if (!(sechdrs[info].sh_flags & SHF_ALLOC))
76 +                       continue;
77 +
78 +               /* If it's called *.init*, and we're not init, we're
79 +                   not interested */
80 +               if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
81 +                   != is_init)
82 +                       continue;
83 +
84 +               syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
85 +               if (sechdrs[i].sh_type == SHT_REL) {
86 +                       Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
87 +                       unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
88 +
89 +                       for (j = 0; j < size; ++j) {
90 +                               Elf_Sym *sym;
91 +
92 +                               if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
93 +                                       continue;
94 +
95 +                               sym = syms + ELF_MIPS_R_SYM(rel[j]);
96 +                               if (!is_init && sym->st_shndx != SHN_UNDEF)
97 +                                       continue;
98 +
99 +                               ret += 4 * sizeof(int);
100 +                       }
101 +               } else {
102 +                       Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
103 +                       unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
104 +
105 +                       for (j = 0; j < size; ++j) {
106 +                               Elf_Sym *sym;
107 +
108 +                               if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
109 +                                       continue;
110 +
111 +                               sym = syms + ELF_MIPS_R_SYM(rela[j]);
112 +                               if (!is_init && sym->st_shndx != SHN_UNDEF)
113 +                                       continue;
114 +
115 +                               ret += 4 * sizeof(int);
116 +                       }
117 +               }
118 +       }
119 +
120 +       return ret;
121 +}
122 +
123 +#ifndef MODULE_START
124 +static void *alloc_phys(unsigned long size)
125 +{
126 +       unsigned order;
127 +       struct page *page;
128 +       struct page *p;
129 +
130 +       size = PAGE_ALIGN(size);
131 +       order = get_order(size);
132 +
133 +       page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
134 +                       __GFP_THISNODE, order);
135 +       if (!page)
136 +               return NULL;
137 +
138 +       split_page(page, order);
139 +
140 +       /* mark all pages except for the last one */
141 +       for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
142 +               set_bit(PG_owner_priv_1, &p->flags);
143 +
144 +       for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
145 +               __free_page(p);
146 +
147 +       return page_address(page);
148 +}
149 +#endif
150 +
151 +static void free_phys(void *ptr)
152 +{
153 +       struct page *page;
154 +       bool free;
155 +
156 +       page = virt_to_page(ptr);
157 +       do {
158 +               free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
159 +               __free_page(page);
160 +               page++;
161 +       } while (free);
162 +}
163 +
164 +
165  void *module_alloc(unsigned long size)
166  {
167 +#ifdef MODULE_START
168         return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
169                                 GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
170                                 __builtin_return_address(0));
171 +#else
172 +       void *ptr;
173 +
174 +       if (size == 0)
175 +               return NULL;
176 +
177 +       ptr = alloc_phys(size);
178 +
179 +       /* If we failed to allocate physically contiguous memory,
180 +        * fall back to regular vmalloc. The module loader code will
181 +        * create jump tables to handle long jumps */
182 +       if (!ptr)
183 +               return vmalloc(size);
184 +
185 +       return ptr;
186 +#endif
187  }
188 +
189 +static inline bool is_phys_addr(void *ptr)
190 +{
191 +#ifdef CONFIG_64BIT
192 +       return (KSEGX((unsigned long)ptr) == CKSEG0);
193 +#else
194 +       return (KSEGX(ptr) == KSEG0);
195  #endif
196 +}
197 +
198 +/* Free memory returned from module_alloc */
199 +void module_memfree(void *module_region)
200 +{
201 +       if (is_phys_addr(module_region))
202 +               free_phys(module_region);
203 +       else
204 +               vfree(module_region);
205 +}
206 +
207 +static void *__module_alloc(int size, bool phys)
208 +{
209 +       void *ptr;
210 +
211 +       if (phys)
212 +               ptr = kmalloc(size, GFP_KERNEL);
213 +       else
214 +               ptr = vmalloc(size);
215 +       return ptr;
216 +}
217 +
218 +static void __module_free(void *ptr)
219 +{
220 +       if (is_phys_addr(ptr))
221 +               kfree(ptr);
222 +       else
223 +               vfree(ptr);
224 +}
225 +
226 +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
227 +                             char *secstrings, struct module *mod)
228 +{
229 +       unsigned int symindex = 0;
230 +       unsigned int core_size, init_size;
231 +       int i;
232 +
233 +       mod->arch.phys_plt_offset = 0;
234 +       mod->arch.virt_plt_offset = 0;
235 +       mod->arch.phys_plt_tbl = NULL;
236 +       mod->arch.virt_plt_tbl = NULL;
237 +
238 +       if (IS_ENABLED(CONFIG_64BIT))
239 +               return 0;
240 +
241 +       for (i = 1; i < hdr->e_shnum; i++)
242 +               if (sechdrs[i].sh_type == SHT_SYMTAB)
243 +                       symindex = i;
244 +
245 +       core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
246 +       init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
247 +
248 +       if ((core_size + init_size) == 0)
249 +               return 0;
250 +
251 +       mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
252 +       if (!mod->arch.phys_plt_tbl)
253 +               return -ENOMEM;
254 +
255 +       mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
256 +       if (!mod->arch.virt_plt_tbl) {
257 +               __module_free(mod->arch.phys_plt_tbl);
258 +               mod->arch.phys_plt_tbl = NULL;
259 +               return -ENOMEM;
260 +       }
261 +
262 +       return 0;
263 +}
264  
265  int apply_r_mips_none(struct module *me, u32 *location, Elf_Addr v)
266  {
267 @@ -65,8 +272,39 @@ static int apply_r_mips_32_rel(struct mo
268         return 0;
269  }
270  
271 +static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
272 +                                void *start, Elf_Addr v)
273 +{
274 +       unsigned *tramp = start + *plt_offset;
275 +       *plt_offset += 4 * sizeof(int);
276 +
277 +       /* adjust carry for addiu */
278 +       if (v & 0x00008000)
279 +               v += 0x10000;
280 +
281 +       tramp[0] = 0x3c190000 | (v >> 16);      /* lui t9, hi16 */
282 +       tramp[1] = 0x27390000 | (v & 0xffff);   /* addiu t9, t9, lo16 */
283 +       tramp[2] = 0x03200008;                  /* jr t9 */
284 +       tramp[3] = 0x00000000;                  /* nop */
285 +
286 +       return (Elf_Addr) tramp;
287 +}
288 +
289 +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
290 +{
291 +       if (is_phys_addr(location))
292 +               return add_plt_entry_to(&me->arch.phys_plt_offset,
293 +                               me->arch.phys_plt_tbl, v);
294 +       else
295 +               return add_plt_entry_to(&me->arch.virt_plt_offset,
296 +                               me->arch.virt_plt_tbl, v);
297 +
298 +}
299 +
300  static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
301  {
302 +       u32 ofs = *location & 0x03ffffff;
303 +
304         if (v % 4) {
305                 pr_err("module %s: dangerous R_MIPS_26 REL relocation\n",
306                        me->name);
307 @@ -74,13 +312,17 @@ static int apply_r_mips_26_rel(struct mo
308         }
309  
310         if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
311 -               pr_err("module %s: relocation overflow\n",
312 -                      me->name);
313 -               return -ENOEXEC;
314 +               v = add_plt_entry(me, location, v + (ofs << 2));
315 +               if (!v) {
316 +                       pr_err("module %s: relocation overflow\n",
317 +                              me->name);
318 +                       return -ENOEXEC;
319 +               }
320 +               ofs = 0;
321         }
322  
323         *location = (*location & ~0x03ffffff) |
324 -                   ((*location + (v >> 2)) & 0x03ffffff);
325 +                   ((ofs + (v >> 2)) & 0x03ffffff);
326  
327         return 0;
328  }
329 @@ -349,9 +591,36 @@ int module_finalize(const Elf_Ehdr *hdr,
330                 list_add(&me->arch.dbe_list, &dbe_list);
331                 spin_unlock_irq(&dbe_lock);
332         }
333 +
334 +       /* Get rid of the fixup trampoline if we're running the module
335 +        * from physically mapped address space */
336 +       if (me->arch.phys_plt_offset == 0) {
337 +               __module_free(me->arch.phys_plt_tbl);
338 +               me->arch.phys_plt_tbl = NULL;
339 +       }
340 +       if (me->arch.virt_plt_offset == 0) {
341 +               __module_free(me->arch.virt_plt_tbl);
342 +               me->arch.virt_plt_tbl = NULL;
343 +       }
344 +
345         return 0;
346  }
347  
348 +void module_arch_freeing_init(struct module *mod)
349 +{
350 +       if (mod->state == MODULE_STATE_LIVE)
351 +               return;
352 +
353 +       if (mod->arch.phys_plt_tbl) {
354 +               __module_free(mod->arch.phys_plt_tbl);
355 +               mod->arch.phys_plt_tbl = NULL;
356 +       }
357 +       if (mod->arch.virt_plt_tbl) {
358 +               __module_free(mod->arch.virt_plt_tbl);
359 +               mod->arch.virt_plt_tbl = NULL;
360 +       }
361 +}
362 +
363  void module_arch_cleanup(struct module *mod)
364  {
365         spin_lock_irq(&dbe_lock);