Merge git://git.denx.de/u-boot-usb

[oweals/u-boot.git] / lib / efi_loader / efi_unicode_collation.c
diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c

index 06fddca1c47b5da3a670525736aa735082c5156d..6655c68092e410dc226da389fd035a310b00d5c1 100644 (file)
--- a/lib/efi_loader/efi_unicode_collation.c
+++ b/lib/efi_loader/efi_unicode_collation.c
@@ -11,8 +11,8 @@
  #include <cp437.h>
  #include <efi_loader.h>
  
-/* Characters that may not be used in file names */
-static const char illegal[] = "<>:\"/\\|?*";
+/* Characters that may not be used in FAT 8.3 file names */
+static const char illegal[] = "+,<=>:;\"/\\|?*[]\x7f";
  
  /*
   * EDK2 assumes codepage 1250 when creating FAT 8.3 file names.
@@ -43,11 +43,6 @@ const efi_guid_t efi_guid_unicode_collation_protocol2 =
   * See the Unified Extensible Firmware Interface (UEFI) specification for
   * details.
   *
- * TODO:
- * The implementation does not follow the Unicode collation algorithm.
- * For ASCII characters it results in the same sort order as EDK2.
- * We could use table UNICODE_CAPITALIZATION_TABLE for better results.
- *
   * Return:     0: s1 == s2, > 0: s1 > s2, < 0: s1 < s2
   */
  static efi_intn_t EFIAPI efi_stri_coll(
@@ -73,11 +68,22 @@ out:
         return ret;
  }
  
+/**
+ * next_lower() - get next codepoint converted to lower case
+ *
+ * @string:    pointer to u16 string, on return advanced by one codepoint
+ * Return:     first codepoint of string converted to lower case
+ */
+static s32 next_lower(const u16 **string)
+{
+       return utf_to_lower(utf16_get(string));
+}
+
  /**
   * metai_match() - compare utf-16 string with a pattern string case-insenitively
   *
- * @s:         string to compare
- * @p:         pattern string
+ * @string:    string to compare
+ * @pattern:   pattern string
   *
   * The pattern string may use these:
   *     - * matches >= 0 characters
@@ -93,61 +99,67 @@ out:
   *
   * Return:     true if the string is matched.
   */
-static bool metai_match(const u16 *s, const u16 *p)
+static bool metai_match(const u16 *string, const u16 *pattern)
  {
-       u16 first;
+       s32 first, s, p;
+
+       for (; *string && *pattern;) {
+               const u16 *string_old = string;
+
+               s = next_lower(&string);
+               p = next_lower(&pattern);
  
-       for (; *s && *p; ++s, ++p) {
-               switch (*p) {
+               switch (p) {
                 case '*':
                         /* Match 0 or more characters */
-                       ++p;
-                       for (;; ++s) {
-                               if (metai_match(s, p))
+                       for (;; s = next_lower(&string)) {
+                               if (metai_match(string_old, pattern))
                                         return true;
-                               if (!*s)
+                               if (!s)
                                         return false;
+                               string_old = string;
                         }
                 case '?':
                         /* Match any one character */
                         break;
                 case '[':
                         /* Match any character in the set */
-                       ++p;
-                       first = *p;
+                       p = next_lower(&pattern);
+                       first = p;
                         if (first == ']')
                                 /* Empty set */
                                 return false;
-                       ++p;
-                       if (*p == '-') {
+                       p = next_lower(&pattern);
+                       if (p == '-') {
                                 /* Range */
-                               ++p;
-                               if (*s < first || *s > *p)
+                               p = next_lower(&pattern);
+                               if (s < first || s > p)
                                         return false;
-                               ++p;
-                               if (*p != ']')
+                               p = next_lower(&pattern);
+                               if (p != ']')
                                         return false;
                         } else {
                                 /* Set */
                                 bool hit = false;
  
-                               if (*s == first)
+                               if (s == first)
                                         hit = true;
-                               for (; *p && *p != ']'; ++p) {
-                                       if (*p == *s)
+                               for (; p && p != ']';
+                                    p = next_lower(&pattern)) {
+                                       if (p == s)
                                                 hit = true;
                                 }
-                               if (!hit || *p != ']')
+                               if (!hit || p != ']')
                                         return false;
                         }
                         break;
                 default:
                         /* Match one character */
-                       if (*p != *s)
+                       if (p != s)
                                 return false;
                 }
         }
-       if (!*p && !*s)
+       if (!*pattern && !*string)
                 return true;
         return false;
  }
@@ -157,8 +169,8 @@ static bool metai_match(const u16 *s, const u16 *p)
   *                    case-insenitively
   *
   * @this:      unicode collation protocol instance
- * @s:         string to compare
- * @p:         pattern string
+ * @string:    string to compare
+ * @pattern:   pattern string
   *
   * The pattern string may use these:
   *     - * matches >= 0 characters
@@ -187,7 +199,6 @@ static bool EFIAPI efi_metai_match(struct efi_unicode_collation_protocol *this,
   *
   * @this:      unicode collation protocol instance
   * @string:    string to convert
- * @p:         pattern string
   *
   * The conversion is done in place. As long as upper and lower letters use the
   * same number of words this does not pose a problem.
@@ -209,7 +220,6 @@ static void EFIAPI efi_str_lwr(struct efi_unicode_collation_protocol *this,
   *
   * @this:      unicode collation protocol instance
   * @string:    string to convert
- * @p:         pattern string
   *
   * The conversion is done in place. As long as upper and lower letters use the
   * same number of words this does not pose a problem.