gzip: speed up and shrink put_16bit()

author Denys Vlasenko <vda.linux@googlemail.com>

Mon, 2 Feb 2015 15:07:07 +0000 (16:07 +0100)

committer Denys Vlasenko <vda.linux@googlemail.com>

Mon, 2 Feb 2015 15:07:07 +0000 (16:07 +0100)
author Denys Vlasenko <vda.linux@googlemail.com>
Mon, 2 Feb 2015 15:07:07 +0000 (16:07 +0100)
committer Denys Vlasenko <vda.linux@googlemail.com>
Mon, 2 Feb 2015 15:07:07 +0000 (16:07 +0100)
diff --git a/archival/gzip.c b/archival/gzip.c

index 46367f9e6879dc1fc4077946ce161139f46cb012..18d795996555ebdd699d953f779172edf2f8e84c 100644 (file)
--- a/archival/gzip.c
+++ b/archival/gzip.c
@@ -417,19 +417,46 @@ static void flush_outbuf(void)
  #define put_8bit(c) \
  do { \
         G1.outbuf[G1.outcnt++] = (c); \
-       if (G1.outcnt == OUTBUFSIZ) flush_outbuf(); \
+       if (G1.outcnt == OUTBUFSIZ) \
+               flush_outbuf(); \
  } while (0)
  
  /* Output a 16 bit value, lsb first */
  static void put_16bit(ush w)
  {
-       if (G1.outcnt < OUTBUFSIZ - 2) {
-               G1.outbuf[G1.outcnt++] = w;
-               G1.outbuf[G1.outcnt++] = w >> 8;
-       } else {
-               put_8bit(w);
-               put_8bit(w >> 8);
+       /* GCC 4.2.1 won't optimize out redundant loads of G1.outcnt
+        * (probably because of fear of aliasing with G1.outbuf[]
+        * stores), do it explicitly:
+        */
+       unsigned outcnt = G1.outcnt;
+       uch *dst = &G1.outbuf[outcnt];
+
+#if BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN
+       if (outcnt < OUTBUFSIZ-2) {
+               /* Common case */
+               ush *dst16 = (void*) dst;
+               *dst16 = w; /* unalinged LSB 16-bit store */
+               G1.outcnt = outcnt + 2;
+               return;
+       }
+       *dst = (uch)w;
+       w >>= 8;
+#else
+       *dst++ = (uch)w;
+       w >>= 8;
+       if (outcnt < OUTBUFSIZ-2) {
+               /* Common case */
+               *dst = w;
+               G1.outcnt = outcnt + 2;
+               return;
         }
+#endif
+
+       /* Slowpath: we will need to do flush_outbuf() */
+       G1.outcnt++;
+       if (G1.outcnt == OUTBUFSIZ)
+               flush_outbuf();
+       put_8bit(w);
  }
  
  static void put_32bit(ulg n)
diff --git a/include/platform.h b/include/platform.h

index 0b0fce1822b517791d2ac7792ba8d74bf7a6852a..df959450736cbc445af7be20da18ac24d9b52d0b 100644 (file)
--- a/include/platform.h
+++ b/include/platform.h
@@ -217,6 +217,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
   * a lvalue. This makes it more likely to not swap them by mistake
   */
  #if defined(i386) || defined(__x86_64__) || defined(__powerpc__)
+# define BB_UNALIGNED_MEMACCESS_OK 1
  # define move_from_unaligned_int(v, intp)  ((v) = *(bb__aliased_int*)(intp))
  # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp))
  # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p))
@@ -225,6 +226,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
  # define move_to_unaligned32(u32p, v)   (*(bb__aliased_uint32_t*)(u32p) = (v))
  /* #elif ... - add your favorite arch today! */
  #else
+# define BB_UNALIGNED_MEMACCESS_OK 0
  /* performs reasonably well (gcc usually inlines memcpy here) */
  # define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int)))
  # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long)))
author	Denys Vlasenko <vda.linux@googlemail.com>
	Mon, 2 Feb 2015 15:07:07 +0000 (16:07 +0100)
committer	Denys Vlasenko <vda.linux@googlemail.com>
	Mon, 2 Feb 2015 15:07:07 +0000 (16:07 +0100)
archival/gzip.c		patch \| blob \| history
include/platform.h		patch \| blob \| history