From: Rich Felker <dalias@aerifal.cx>
Date: Sat, 15 Sep 2012 03:52:51 +0000 (-0400)
Subject: workaround gcc got-register-reload performance problems in malloc
X-Git-Tag: v0.9.5~5
X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=afd209deb7d3bfc9cc31713e2cb8f22693ca6fae;p=oweals%2Fmusl.git

workaround gcc got-register-reload performance problems in malloc

with this patch, the malloc in libc.so built with -Os is nearly the
same speed as the one built with -O3. thus it solves the performance
regression that resulted from removing the forced -O3 when building
libc.so; now libc.so can be both small and fast.
---

diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
index 39c7d051..88a31ae4 100644
--- a/src/malloc/malloc.c
+++ b/src/malloc/malloc.c
@@ -9,6 +9,10 @@
 #include "atomic.h"
 #include "pthread_impl.h"
 
+#if defined(__GNUC__) && defined(__PIC__)
+#define inline inline __attribute__((always_inline))
+#endif
+
 uintptr_t __brk(uintptr_t);
 void *__mmap(void *, size_t, int, int, int, off_t);
 int __munmap(void *, size_t);
@@ -58,20 +62,20 @@ static struct {
 
 /* Synchronization tools */
 
-static void lock(volatile int *lk)
+static inline void lock(volatile int *lk)
 {
 	if (!libc.threads_minus_1) return;
 	while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
 }
 
-static void unlock(volatile int *lk)
+static inline void unlock(volatile int *lk)
 {
 	if (!libc.threads_minus_1) return;
 	a_store(lk, 0);
 	if (lk[1]) __wake(lk, 1, 1);
 }
 
-static void lock_bin(int i)
+static inline void lock_bin(int i)
 {
 	if (libc.threads_minus_1)
 		lock(mal.bins[i].lock);
@@ -79,7 +83,7 @@ static void lock_bin(int i)
 		mal.bins[i].head = mal.bins[i].tail = BIN_TO_CHUNK(i);
 }
 
-static void unlock_bin(int i)
+static inline void unlock_bin(int i)
 {
 	if (!libc.threads_minus_1) return;
 	unlock(mal.bins[i].lock);