2 * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
19 static char **newargv;
21 static void cleanup(void)
25 SetConsoleOutputCP(saved_cp);
27 for (i = 0; i < newargc; i++)
34 * Incrementally [re]allocate newargv and keep it NULL-terminated.
36 static int validate_argv(int argc)
46 ptr = realloc(newargv, size * sizeof(newargv[0]));
50 (newargv = ptr)[argc] = NULL;
58 static int process_glob(WCHAR *wstr, int wlen)
62 WIN32_FIND_DATAW data;
66 * Note that we support wildcard characters only in filename part
67 * of the path, and not in directories. Windows users are used to
68 * this, that's why recursive glob processing is not implemented.
71 * Start by looking for last slash or backslash, ...
73 for (slash = 0, i = 0; i < wlen; i++)
74 if (wstr[i] == L'/' || wstr[i] == L'\\')
77 * ... then look for asterisk or question mark in the file name.
79 for (i = slash; i < wlen; i++)
80 if (wstr[i] == L'*' || wstr[i] == L'?')
84 return 0; /* definitely not a glob */
86 saved_char = wstr[wlen];
88 h = FindFirstFileW(wstr, &data);
89 wstr[wlen] = saved_char;
90 if (h == INVALID_HANDLE_VALUE)
91 return 0; /* not a valid glob, just pass... */
94 udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
106 if (data.cFileName[0] == L'.') {
107 if ((data.cFileName[1] == L'\0') ||
108 (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0'))
112 if (!validate_argv(newargc + 1))
116 * -1 below means "scan for trailing '\0' *and* count it",
117 * so that |uflen| covers even trailing '\0'.
119 uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
120 NULL, 0, NULL, NULL);
122 arg = malloc(udlen + uflen);
127 WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
128 arg, udlen, NULL, NULL);
130 WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
131 arg + udlen, uflen, NULL, NULL);
133 newargv[newargc++] = arg;
134 } while (FindNextFileW(h, &data));
141 void win32_utf8argv(int *argc, char **argv[])
143 const WCHAR *wcmdline;
144 WCHAR *warg, *wend, *p;
145 int wlen, ulen, valid = 1;
150 if (!validate_argv(newargc))
153 wcmdline = GetCommandLineW();
154 if (wcmdline == NULL) return;
157 * make a copy of the command line, since we might have to modify it...
159 wlen = wcslen(wcmdline);
160 p = _alloca((wlen + 1) * sizeof(WCHAR));
163 while (*p != L'\0') {
166 if (*p == L' ' || *p == L'\t') {
167 p++; /* skip over white spaces */
172 * Note: because we may need to fiddle with the number of backslashes,
173 * the argument string is copied into itself. This is safe because
174 * the number of characters will never expand.
178 && (in_quote || (*p != L' ' && *p != L'\t'))) {
182 * Microsoft documentation on how backslashes are treated
185 * + Backslashes are interpreted literally, unless they
186 * immediately precede a double quotation mark.
187 * + If an even number of backslashes is followed by a double
188 * quotation mark, one backslash is placed in the argv array
189 * for every pair of backslashes, and the double quotation
190 * mark is interpreted as a string delimiter.
191 * + If an odd number of backslashes is followed by a double
192 * quotation mark, one backslash is placed in the argv array
193 * for every pair of backslashes, and the double quotation
194 * mark is "escaped" by the remaining backslash, causing a
195 * literal double quotation mark (") to be placed in argv.
197 * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx
199 * Though referred page doesn't mention it, multiple qouble
200 * quotes are also special. Pair of double quotes in quoted
201 * string is counted as single double quote.
213 for (i = (p - q) / 2; i > 0; i--)
217 * if odd amount of backslashes before the quote,
218 * said quote is part of the argument, not a delimiter
220 if ((p - q) % 2 == 1)
223 for (i = p - q; i > 0; i--)
230 * Without the preceding backslash (or when preceded with an
231 * even number of backslashes), the double quote is a simple
232 * string delimiter and just slightly change the parsing state
234 if (in_quote && p[1] == L'"')
237 in_quote = !in_quote;
242 * Any other non-delimiter character is just taken verbatim
250 if (wlen == 0 || !process_glob(warg, wlen)) {
251 if (!validate_argv(newargc + 1)) {
258 ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
259 NULL, 0, NULL, NULL);
264 arg = malloc(ulen + 1);
271 WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
272 arg, ulen, NULL, NULL);
275 newargv[newargc++] = arg;
280 saved_cp = GetConsoleOutputCP();
281 SetConsoleOutputCP(CP_UTF8);
287 } else if (newargv != NULL) {
290 for (i = 0; i < newargc; i++)
302 void win32_utf8argv(int &argc, char **argv[])