libs/icu/patches/CVE-2017-7867_CVE-2017-7868.patch

   1 Index: source/test/intltest/utxttest.h
   2 ===================================================================
   3 --- source/test/intltest/utxttest.h     (revision 39670)
   4 +++ source/test/intltest/utxttest.h     (revision 39671)
   5 @@ -38,6 +38,7 @@
   6      void Ticket10562();
   7      void Ticket10983();
   8      void Ticket12130();
   9 +    void Ticket12888();
  10
  11  private:
  12      struct m {                              // Map between native indices & code points.
  13 Index: source/test/intltest/utxttest.cpp
  14 ===================================================================
  15 --- source/test/intltest/utxttest.cpp   (revision 39670)
  16 +++ source/test/intltest/utxttest.cpp   (revision 39671)
  17 @@ -67,6 +67,8 @@
  18              if (exec) Ticket10983();  break;
  19          case 7: name = "Ticket12130";
  20              if (exec) Ticket12130(); break;
  21 +        case 8: name = "Ticket12888";
  22 +            if (exec) Ticket12888(); break;
  23          default: name = "";          break;
  24      }
  25  }
  26 @@ -1583,3 +1585,63 @@
  27      }
  28      utext_close(&ut);
  29  }
  30 +
  31 +// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal,
  32 +//               six byte utf-8 forms. Original implementation had an assumption that
  33 +//               there would be at most three utf-8 bytes per UTF-16 code unit.
  34 +//               The five and six byte sequences map to a single replacement character.
  35 +
  36 +void UTextTest::Ticket12888() {
  37 +    const char *badString =
  38 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  39 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  40 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  41 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  42 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  43 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  44 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  45 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  46 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  47 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  48 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  49 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  50 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  51 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  52 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  53 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  54 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  55 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  56 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
  57 +            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80";
  58 +
  59 +    UErrorCode status = U_ZERO_ERROR;
  60 +    LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status));
  61 +    TEST_SUCCESS(status);
  62 +    for (;;) {
  63 +        UChar32 c = utext_next32(ut.getAlias());
  64 +        if (c == U_SENTINEL) {
  65 +            break;
  66 +        }
  67 +    }
  68 +    int32_t endIdx = utext_getNativeIndex(ut.getAlias());
  69 +    if (endIdx != (int32_t)strlen(badString)) {
  70 +        errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx);
  71 +        return;
  72 +    }
  73 +
  74 +    for (int32_t prevIndex = endIdx; prevIndex>0;) {
  75 +        UChar32 c = utext_previous32(ut.getAlias());
  76 +        int32_t currentIndex = utext_getNativeIndex(ut.getAlias());
  77 +        if (c != 0xfffd) {
  78 +            errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n",
  79 +                    __FILE__, __LINE__, 0xfffd, c, currentIndex);
  80 +            break;
  81 +        }
  82 +        if (currentIndex != prevIndex - 6) {
  83 +            errln("%s:%d: wrong index. Expected, actual = %d, %d",
  84 +                    __FILE__, __LINE__, prevIndex - 6, currentIndex);
  85 +            break;
  86 +        }
  87 +        prevIndex = currentIndex;
  88 +    }
  89 +}
  90 Index: source/common/utext.cpp
  91 ===================================================================
  92 --- source/common/utext.cpp     (revision 39670)
  93 +++ source/common/utext.cpp     (revision 39671)
  94 @@ -847,9 +847,15 @@
  95  //------------------------------------------------------------------------------
  96
  97  // Chunk size.
  98 -//     Must be less than 85, because of byte mapping from UChar indexes to native indexes.
  99 -//     Worst case is three native bytes to one UChar.  (Supplemenaries are 4 native bytes
 100 -//     to two UChars.)
 101 +//     Must be less than 42  (256/6), because of byte mapping from UChar indexes to native indexes.
 102 +//     Worst case there are six UTF-8 bytes per UChar.
 103 +//         obsolete 6 byte form fd + 5 trails maps to fffd
 104 +//         obsolete 5 byte form fc + 4 trails maps to fffd
 105 +//         non-shortest 4 byte forms maps to fffd
 106 +//         normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
 107 +//     mapToUChars array size must allow for the worst case, 6.
 108 +//     This could be brought down to 4, by treating fd and fc as pure illegal,
 109 +//     rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
 110  //
 111  enum { UTF8_TEXT_CHUNK_SIZE=32 };
 112
 113 @@ -889,7 +895,7 @@
 114                                                       //  Requires two extra slots,
 115                                                       //    one for a supplementary starting in the last normal position,
 116                                                       //    and one for an entry for the buffer limit position.
 117 -    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
 118 +    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*6+6]; // Map native offset from bufNativeStart to
 119                                                       //   correspoding offset in filled part of buf.
 120      int32_t   align;
 121  };
 122 @@ -1032,6 +1038,7 @@
 123              // Requested index is in this buffer.
 124              u8b = (UTF8Buf *)ut->p;   // the current buffer
 125              mapIndex = ix - u8b->toUCharsMapStart;
 126 +            U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
 127              ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
 128              return TRUE;
 129
 130 @@ -1298,6 +1305,10 @@
 131          // Can only do this if the incoming index is somewhere in the interior of the string.
 132          //   If index is at the end, there is no character there to look at.
 133          if (ix != ut->b) {
 134 +            // Note: this function will only move the index back if it is on a trail byte
 135 +            //       and there is a preceding lead byte and the sequence from the lead
 136 +            //       through this trail could be part of a valid UTF-8 sequence
 137 +            //       Otherwise the index remains unchanged.
 138              U8_SET_CP_START(s8, 0, ix);
 139          }
 140
 141 @@ -1311,7 +1322,10 @@
 142          UChar   *buf = u8b->buf;
 143          uint8_t *mapToNative = u8b->mapToNative;
 144          uint8_t *mapToUChars = u8b->mapToUChars;
 145 -        int32_t  toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
 146 +        int32_t  toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
 147 +        // Note that toUCharsMapStart can be negative. Happens when the remaining
 148 +        // text from current position to the beginning is less than the buffer size.
 149 +        // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
 150          int32_t  destIx = UTF8_TEXT_CHUNK_SIZE+2;   // Start in the overflow region
 151                                                      //   at end of buffer to leave room
 152                                                      //   for a surrogate pair at the
 153 @@ -1338,6 +1352,7 @@
 154              if (c<0x80) {
 155                  // Special case ASCII range for speed.
 156                  buf[destIx] = (UChar)c;
 157 +                U_ASSERT(toUCharsMapStart <= srcIx);
 158                  mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
 159                  mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
 160              } else {
 161 @@ -1367,6 +1382,7 @@
 162                  do {
 163                      mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
 164                  } while (sIx >= srcIx);
 165 +                U_ASSERT(toUCharsMapStart <= (srcIx+1));
 166
 167                  // Set native indexing limit to be the current position.
 168                  //   We are processing a non-ascii, non-native-indexing char now;
 169 @@ -1541,6 +1557,7 @@
 170      U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
 171      U_ASSERT(index<=ut->chunkNativeLimit);
 172      int32_t mapIndex = index - u8b->toUCharsMapStart;
 173 +    U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
 174      int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
 175      U_ASSERT(offset>=0 && offset<=ut->chunkLength);
 176      return offset;