rewrite bump allocator to fix corner cases, decouple from expand_heap
authorRich Felker <dalias@aerifal.cx>
Wed, 3 Jun 2020 22:13:18 +0000 (18:13 -0400)
committerRich Felker <dalias@aerifal.cx>
Wed, 3 Jun 2020 22:13:18 +0000 (18:13 -0400)
this affects the bump allocator used when static linking in programs
that don't need allocation metadata due to not using realloc, free,
etc.

commit e3bc22f1eff87b8f029a6ab31f1a269d69e4b053 refactored the bump
allocator to share code with __expand_heap, used by malloc, for the
purpose of fixing the case (mainly nommu) where brk doesn't work.
however, the geometric growth behavior of __expand_heap is not
actually well-suited to the bump allocator, and can produce
significant excessive memory usage. in particular, by repeatedly
requesting just over the remaining free space in the current
mmap-allocated area, the total mapped memory will be roughly double
the nominal usage. and since the main user of the no-brk mmap fallback
in the bump allocator is nommu, this excessive usage is not just
virtual address space but physical memory.

in addition, even on systems with brk, having a unified size request
to __expand_heap without knowing whether the brk or mmap backend would
get used made it so the brk could be expanded twice as far as needed.
for example, with malloc(n) and n-1 bytes available before the current
brk, the brk would be expanded by n bytes rounded up to page size,
when expansion by just one page would have sufficed.

the new implementation computes request size separately for the cases
where brk expansion is being attempted vs using mmap, and also
performs individual mmap of large allocations without moving to a new
bump area and throwing away the rest of the old one. this greatly
reduces the need for geometric area size growth and limits the extent
to which free space at the end of one bump area might be unusable for
future allocations.

as a bonus, the resulting code size is somewhat smaller than the
combined old version plus __expand_heap.

src/malloc/lite_malloc.c

index 050d84f648bcc690f97d704ce7232089b9a256f0..c3f0c129acbcb88e58a59a9231c0650f602753c3 100644 (file)
@@ -2,44 +2,99 @@
 #include <stdint.h>
 #include <limits.h>
 #include <errno.h>
+#include <sys/mman.h>
+#include "libc.h"
 #include "lock.h"
-#include "malloc_impl.h"
+#include "syscall.h"
 
 #define ALIGN 16
 
+/* This function returns true if the interval [old,new]
+ * intersects the 'len'-sized interval below &libc.auxv
+ * (interpreted as the main-thread stack) or below &b
+ * (the current stack). It is used to defend against
+ * buggy brk implementations that can cross the stack. */
+
+static int traverses_stack_p(uintptr_t old, uintptr_t new)
+{
+       const uintptr_t len = 8<<20;
+       uintptr_t a, b;
+
+       b = (uintptr_t)libc.auxv;
+       a = b > len ? b-len : 0;
+       if (new>a && old<b) return 1;
+
+       b = (uintptr_t)&b;
+       a = b > len ? b-len : 0;
+       if (new>a && old<b) return 1;
+
+       return 0;
+}
+
 static void *__simple_malloc(size_t n)
 {
-       static char *cur, *end;
+       static uintptr_t brk, cur, end;
        static volatile int lock[1];
-       size_t align=1, pad;
+       static unsigned mmap_step;
+       size_t align=1;
        void *p;
 
+       if (n > SIZE_MAX/2) {
+               errno = ENOMEM;
+               return 0;
+       }
+
        if (!n) n++;
        while (align<n && align<ALIGN)
                align += align;
 
        LOCK(lock);
 
-       pad = -(uintptr_t)cur & align-1;
-
-       if (n <= SIZE_MAX/2 + ALIGN) n += pad;
+       cur += -cur & align-1;
 
        if (n > end-cur) {
-               size_t m = n;
-               char *new = __expand_heap(&m);
-               if (!new) {
-                       UNLOCK(lock);
-                       return 0;
+               size_t req = n - (end-cur) + PAGE_SIZE-1 & -PAGE_SIZE;
+
+               if (!cur) {
+                       brk = __syscall(SYS_brk, 0);
+                       brk += -brk & PAGE_SIZE-1;
+                       cur = end = brk;
                }
-               if (new != end) {
-                       cur = new;
-                       n -= pad;
-                       pad = 0;
+
+               if (brk == end && req < SIZE_MAX-brk
+                   && !traverses_stack_p(brk, brk+req)
+                   && __syscall(SYS_brk, brk+req)==brk+req) {
+                       brk = end += req;
+               } else {
+                       int new_area = 0;
+                       req = n + PAGE_SIZE-1 & -PAGE_SIZE;
+                       /* Only make a new area rather than individual mmap
+                        * if wasted space would be over 1/8 of the map. */
+                       if (req-n > req/8) {
+                               /* Geometric area size growth up to 64 pages,
+                                * bounding waste by 1/8 of the area. */
+                               size_t min = PAGE_SIZE<<(mmap_step/2);
+                               if (min-n > end-cur) {
+                                       if (req < min) {
+                                               req = min;
+                                               if (mmap_step < 12)
+                                                       mmap_step++;
+                                       }
+                                       new_area = 1;
+                               }
+                       }
+                       void *mem = __mmap(0, req, PROT_READ|PROT_WRITE,
+                               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+                       if (mem == MAP_FAILED || !new_area) {
+                               UNLOCK(lock);
+                               return mem==MAP_FAILED ? 0 : mem;
+                       }
+                       cur = (uintptr_t)mem;
+                       end = cur + req;
                }
-               end = new + m;
        }
 
-       p = cur + pad;
+       p = (void *)cur;
        cur += n;
        UNLOCK(lock);
        return p;