1 From: Felix Fietkau <nbd@nbd.name>
2 Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
4 lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
5 Signed-off-by: Felix Fietkau <nbd@nbd.name>
7 arch/mips/Makefile | 5 +
8 arch/mips/include/asm/module.h | 5 +
9 arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++-
10 3 files changed, 284 insertions(+), 5 deletions(-)
12 --- a/arch/mips/Makefile
13 +++ b/arch/mips/Makefile
14 @@ -93,8 +93,13 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
15 cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
16 cflags-y += -msoft-float
17 LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
19 KBUILD_AFLAGS_MODULE += -mlong-calls
20 KBUILD_CFLAGS_MODULE += -mlong-calls
22 +KBUILD_AFLAGS_MODULE += -mno-long-calls
23 +KBUILD_CFLAGS_MODULE += -mno-long-calls
26 ifeq ($(CONFIG_RELOCATABLE),y)
27 LDFLAGS_vmlinux += --emit-relocs
28 --- a/arch/mips/include/asm/module.h
29 +++ b/arch/mips/include/asm/module.h
30 @@ -11,6 +11,11 @@ struct mod_arch_specific {
31 const struct exception_table_entry *dbe_start;
32 const struct exception_table_entry *dbe_end;
33 struct mips_hi16 *r_mips_hi16_list;
37 + unsigned int phys_plt_offset;
38 + unsigned int virt_plt_offset;
41 typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */
42 --- a/arch/mips/kernel/module.c
43 +++ b/arch/mips/kernel/module.c
44 @@ -44,14 +44,221 @@ struct mips_hi16 {
45 static LIST_HEAD(dbe_list);
46 static DEFINE_SPINLOCK(dbe_lock);
50 + * Get the potential max trampolines size required of the init and
51 + * non-init sections. Only used if we cannot find enough contiguous
52 + * physically mapped memory to put the module into.
55 +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
56 + const char *secstrings, unsigned int symindex, bool is_init)
58 + unsigned long ret = 0;
62 + /* Everything marked ALLOC (this includes the exported symbols) */
63 + for (i = 1; i < hdr->e_shnum; ++i) {
64 + unsigned int info = sechdrs[i].sh_info;
66 + if (sechdrs[i].sh_type != SHT_REL
67 + && sechdrs[i].sh_type != SHT_RELA)
70 + /* Not a valid relocation section? */
71 + if (info >= hdr->e_shnum)
74 + /* Don't bother with non-allocated sections */
75 + if (!(sechdrs[info].sh_flags & SHF_ALLOC))
78 + /* If it's called *.init*, and we're not init, we're
80 + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
84 + syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
85 + if (sechdrs[i].sh_type == SHT_REL) {
86 + Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
87 + unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
89 + for (j = 0; j < size; ++j) {
92 + if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
95 + sym = syms + ELF_MIPS_R_SYM(rel[j]);
96 + if (!is_init && sym->st_shndx != SHN_UNDEF)
99 + ret += 4 * sizeof(int);
102 + Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
103 + unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
105 + for (j = 0; j < size; ++j) {
108 + if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
111 + sym = syms + ELF_MIPS_R_SYM(rela[j]);
112 + if (!is_init && sym->st_shndx != SHN_UNDEF)
115 + ret += 4 * sizeof(int);
123 +#ifndef MODULE_START
124 +static void *alloc_phys(unsigned long size)
130 + size = PAGE_ALIGN(size);
131 + order = get_order(size);
133 + page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
134 + __GFP_THISNODE, order);
138 + split_page(page, order);
140 + /* mark all pages except for the last one */
141 + for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
142 + set_bit(PG_owner_priv_1, &p->flags);
144 + for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
147 + return page_address(page);
151 +static void free_phys(void *ptr)
156 + page = virt_to_page(ptr);
158 + free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
165 void *module_alloc(unsigned long size)
168 return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
169 GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
170 __builtin_return_address(0));
177 + ptr = alloc_phys(size);
179 + /* If we failed to allocate physically contiguous memory,
180 + * fall back to regular vmalloc. The module loader code will
181 + * create jump tables to handle long jumps */
183 + return vmalloc(size);
189 +static inline bool is_phys_addr(void *ptr)
192 + return (KSEGX((unsigned long)ptr) == CKSEG0);
194 + return (KSEGX(ptr) == KSEG0);
198 +/* Free memory returned from module_alloc */
199 +void module_memfree(void *module_region)
201 + if (is_phys_addr(module_region))
202 + free_phys(module_region);
204 + vfree(module_region);
207 +static void *__module_alloc(int size, bool phys)
212 + ptr = kmalloc(size, GFP_KERNEL);
214 + ptr = vmalloc(size);
218 +static void __module_free(void *ptr)
220 + if (is_phys_addr(ptr))
226 +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
227 + char *secstrings, struct module *mod)
229 + unsigned int symindex = 0;
230 + unsigned int core_size, init_size;
233 + mod->arch.phys_plt_offset = 0;
234 + mod->arch.virt_plt_offset = 0;
235 + mod->arch.phys_plt_tbl = NULL;
236 + mod->arch.virt_plt_tbl = NULL;
238 + if (IS_ENABLED(CONFIG_64BIT))
241 + for (i = 1; i < hdr->e_shnum; i++)
242 + if (sechdrs[i].sh_type == SHT_SYMTAB)
245 + core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
246 + init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
248 + if ((core_size + init_size) == 0)
251 + mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
252 + if (!mod->arch.phys_plt_tbl)
255 + mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
256 + if (!mod->arch.virt_plt_tbl) {
257 + __module_free(mod->arch.phys_plt_tbl);
258 + mod->arch.phys_plt_tbl = NULL;
265 int apply_r_mips_none(struct module *me, u32 *location, Elf_Addr v)
267 @@ -65,8 +272,39 @@ static int apply_r_mips_32_rel(struct mo
271 +static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
272 + void *start, Elf_Addr v)
274 + unsigned *tramp = start + *plt_offset;
275 + *plt_offset += 4 * sizeof(int);
277 + /* adjust carry for addiu */
278 + if (v & 0x00008000)
281 + tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */
282 + tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */
283 + tramp[2] = 0x03200008; /* jr t9 */
284 + tramp[3] = 0x00000000; /* nop */
286 + return (Elf_Addr) tramp;
289 +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
291 + if (is_phys_addr(location))
292 + return add_plt_entry_to(&me->arch.phys_plt_offset,
293 + me->arch.phys_plt_tbl, v);
295 + return add_plt_entry_to(&me->arch.virt_plt_offset,
296 + me->arch.virt_plt_tbl, v);
300 static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
302 + u32 ofs = *location & 0x03ffffff;
305 pr_err("module %s: dangerous R_MIPS_26 REL relocation\n",
307 @@ -74,13 +312,17 @@ static int apply_r_mips_26_rel(struct mo
310 if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
311 - pr_err("module %s: relocation overflow\n",
314 + v = add_plt_entry(me, location, v + (ofs << 2));
316 + pr_err("module %s: relocation overflow\n",
323 *location = (*location & ~0x03ffffff) |
324 - ((*location + (v >> 2)) & 0x03ffffff);
325 + ((ofs + (v >> 2)) & 0x03ffffff);
329 @@ -349,9 +591,36 @@ int module_finalize(const Elf_Ehdr *hdr,
330 list_add(&me->arch.dbe_list, &dbe_list);
331 spin_unlock_irq(&dbe_lock);
334 + /* Get rid of the fixup trampoline if we're running the module
335 + * from physically mapped address space */
336 + if (me->arch.phys_plt_offset == 0) {
337 + __module_free(me->arch.phys_plt_tbl);
338 + me->arch.phys_plt_tbl = NULL;
340 + if (me->arch.virt_plt_offset == 0) {
341 + __module_free(me->arch.virt_plt_tbl);
342 + me->arch.virt_plt_tbl = NULL;
348 +void module_arch_freeing_init(struct module *mod)
350 + if (mod->state == MODULE_STATE_LIVE)
353 + if (mod->arch.phys_plt_tbl) {
354 + __module_free(mod->arch.phys_plt_tbl);
355 + mod->arch.phys_plt_tbl = NULL;
357 + if (mod->arch.virt_plt_tbl) {
358 + __module_free(mod->arch.virt_plt_tbl);
359 + mod->arch.virt_plt_tbl = NULL;
363 void module_arch_cleanup(struct module *mod)
365 spin_lock_irq(&dbe_lock);