2 This file is part of GNUnet.
3 (C) 2005, 2006 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file util/strings.c
23 * @brief string functions
25 * @author Christian Grothoff
32 #include "gnunet_common.h"
33 #include "gnunet_strings_lib.h"
35 #define LOG(kind,...) GNUNET_log_from (kind, "util", __VA_ARGS__)
37 #define LOG_STRERROR(kind,syscall) GNUNET_log_from_strerror (kind, "util", syscall)
41 * Fill a buffer of the given size with
42 * count 0-terminated strings (given as varargs).
43 * If "buffer" is NULL, only compute the amount of
44 * space required (sum of "strlen(arg)+1").
46 * Unlike using "snprintf" with "%s", this function
47 * will add 0-terminators after each string. The
48 * "GNUNET_string_buffer_tokenize" function can be
49 * used to parse the buffer back into individual
52 * @param buffer the buffer to fill with strings, can
53 * be NULL in which case only the necessary
54 * amount of space will be calculated
55 * @param size number of bytes available in buffer
56 * @param count number of strings that follow
57 * @param ... count 0-terminated strings to copy to buffer
58 * @return number of bytes written to the buffer
59 * (or number of bytes that would have been written)
62 GNUNET_STRINGS_buffer_fill (char *buffer, size_t size, unsigned int count, ...)
73 s = va_arg (ap, const char *);
75 slen = strlen (s) + 1;
78 GNUNET_assert (needed + slen <= size);
79 memcpy (&buffer[needed], s, slen);
90 * Given a buffer of a given size, find "count"
91 * 0-terminated strings in the buffer and assign
92 * the count (varargs) of type "const char**" to the
93 * locations of the respective strings in the
96 * @param buffer the buffer to parse
97 * @param size size of the buffer
98 * @param count number of strings to locate
99 * @return offset of the character after the last 0-termination
100 * in the buffer, or 0 on error.
103 GNUNET_STRINGS_buffer_tokenize (const char *buffer, size_t size,
104 unsigned int count, ...)
112 va_start (ap, count);
115 r = va_arg (ap, const char **);
118 while ((needed < size) && (buffer[needed] != '\0'))
123 return 0; /* error */
126 needed++; /* skip 0-termination */
135 * Convert a given filesize into a fancy human-readable format.
137 * @param size number of bytes
138 * @return fancy representation of the size (possibly rounded) for humans
141 GNUNET_STRINGS_byte_size_fancy (unsigned long long size)
143 const char *unit = _( /* size unit */ "b");
166 ret = GNUNET_malloc (32);
167 GNUNET_snprintf (ret, 32, "%llu %s", size, unit);
173 * Convert a given fancy human-readable size to bytes.
175 * @param fancy_size human readable string (i.e. 1 MB)
176 * @param size set to the size in bytes
177 * @return GNUNET_OK on success, GNUNET_SYSERR on error
180 GNUNET_STRINGS_fancy_size_to_bytes (const char *fancy_size,
181 unsigned long long *size)
186 unsigned long long value;
200 "GiB", 1024 * 1024 * 1024},
202 "GB", 1000 * 1000 * 1000},
204 "TiB", 1024LL * 1024LL * 1024LL * 1024LL},
206 "TB", 1000LL * 1000LL * 1000LL * 1024LL},
208 "PiB", 1024LL * 1024LL * 1024LL * 1024LL * 1024LL},
210 "PB", 1000LL * 1000LL * 1000LL * 1024LL * 1000LL},
212 "EiB", 1024LL * 1024LL * 1024LL * 1024LL * 1024LL * 1024LL},
214 "EB", 1000LL * 1000LL * 1000LL * 1024LL * 1000LL * 1000LL},
218 unsigned long long ret;
221 unsigned long long last;
226 in = GNUNET_strdup (fancy_size);
227 for (tok = strtok (in, " "); tok != NULL; tok = strtok (NULL, " "))
230 while ((table[i].name != NULL) && (0 != strcasecmp (table[i].name, tok)))
232 if (table[i].name != NULL)
233 last *= table[i].value;
238 if (1 != sscanf (tok, "%llu", &last))
241 return GNUNET_SYSERR; /* expected number */
253 * Convert a given fancy human-readable time to our internal
256 * @param fancy_size human readable string (i.e. 1 minute)
257 * @param rtime set to the relative time
258 * @return GNUNET_OK on success, GNUNET_SYSERR on error
261 GNUNET_STRINGS_fancy_time_to_relative (const char *fancy_size,
262 struct GNUNET_TIME_Relative *rtime)
267 unsigned long long value;
279 "minutes", 60 * 1000},
283 "h", 60 * 60 * 1000},
285 "d", 24 * 60 * 60 * 1000},
287 "a", 31557600 /* year */ },
291 unsigned long long ret;
294 unsigned long long last;
297 if ((0 == strcasecmp (fancy_size, "infinity")) ||
298 (0 == strcasecmp (fancy_size, "forever")))
300 *rtime = GNUNET_TIME_UNIT_FOREVER_REL;
305 in = GNUNET_strdup (fancy_size);
306 for (tok = strtok (in, " "); tok != NULL; tok = strtok (NULL, " "))
309 while ((table[i].name != NULL) && (0 != strcasecmp (table[i].name, tok)))
311 if (table[i].name != NULL)
312 last *= table[i].value;
317 if (1 != sscanf (tok, "%llu", &last))
320 return GNUNET_SYSERR; /* expected number */
325 rtime->rel_value = (uint64_t) ret;
331 * Convert the len characters long character sequence
332 * given in input that is in the given input charset
333 * to a string in given output charset.
334 * @return the converted string (0-terminated),
335 * if conversion fails, a copy of the orignal
336 * string is returned.
339 GNUNET_STRINGS_conv (const char *input, size_t len, const char *input_charset, const char *output_charset)
343 #if ENABLE_NLS && HAVE_ICONV
350 cd = iconv_open (output_charset, input_charset);
351 if (cd == (iconv_t) - 1)
353 LOG_STRERROR (GNUNET_ERROR_TYPE_WARNING, "iconv_open");
354 LOG (GNUNET_ERROR_TYPE_WARNING, _("Character sets requested were `%s'->`%s'\n"),
355 input_charset, output_charset);
356 ret = GNUNET_malloc (len + 1);
357 memcpy (ret, input, len);
361 tmpSize = 3 * len + 4;
362 tmp = GNUNET_malloc (tmpSize);
366 #if FREEBSD || DARWIN || WINDOWS
367 (const char **) &input,
371 &len, &itmp, &finSize) == SIZE_MAX)
373 LOG_STRERROR (GNUNET_ERROR_TYPE_WARNING, "iconv");
376 ret = GNUNET_malloc (len + 1);
377 memcpy (ret, input, len);
381 ret = GNUNET_malloc (tmpSize - finSize + 1);
382 memcpy (ret, tmp, tmpSize - finSize);
383 ret[tmpSize - finSize] = '\0';
385 if (0 != iconv_close (cd))
386 LOG_STRERROR (GNUNET_ERROR_TYPE_WARNING, "iconv_close");
389 ret = GNUNET_malloc (len + 1);
390 memcpy (ret, input, len);
398 * Convert the len characters long character sequence
399 * given in input that is in the given charset
401 * @return the converted string (0-terminated),
402 * if conversion fails, a copy of the orignal
403 * string is returned.
406 GNUNET_STRINGS_to_utf8 (const char *input, size_t len, const char *charset)
408 return GNUNET_STRINGS_conv (input, len, charset, "UTF-8");
412 * Convert the len bytes-long UTF-8 string
413 * given in input to the given charset.
415 * @return the converted string (0-terminated),
416 * if conversion fails, a copy of the orignal
417 * string is returned.
420 GNUNET_STRINGS_from_utf8 (const char *input, size_t len, const char *charset)
422 return GNUNET_STRINGS_conv (input, len, "UTF-8", charset);
428 * Complete filename (a la shell) from abbrevition.
429 * @param fil the name of the file, may contain ~/ or
430 * be relative to the current directory
431 * @returns the full file name,
432 * NULL is returned on error
435 GNUNET_STRINGS_filename_expand (const char *fil)
453 if (fil[0] == DIR_SEPARATOR)
454 /* absolute path, just copy */
455 return GNUNET_strdup (fil);
458 fm = getenv ("HOME");
461 LOG (GNUNET_ERROR_TYPE_WARNING,
462 _("Failed to expand `$HOME': environment variable `HOME' not set"));
465 fm = GNUNET_strdup (fm);
466 /* do not copy '~' */
469 /* skip over dir seperator to be consistent */
470 if (fil_ptr[0] == DIR_SEPARATOR)
481 buffer = GNUNET_malloc (len);
482 if (getcwd (buffer, len) != NULL)
487 if ((errno == ERANGE) && (len < 1024 * 1024 * 4))
490 GNUNET_free (buffer);
493 GNUNET_free (buffer);
498 LOG_STRERROR (GNUNET_ERROR_TYPE_WARNING, "getcwd");
499 buffer = getenv ("PWD"); /* alternative */
501 fm = GNUNET_strdup (buffer);
504 fm = GNUNET_strdup ("./"); /* give up */
506 n = strlen (fm) + 1 + strlen (fil_ptr) + 1;
507 buffer = GNUNET_malloc (n);
508 GNUNET_snprintf (buffer, n, "%s%s%s", fm,
509 (fm[strlen (fm) - 1] ==
510 DIR_SEPARATOR) ? "" : DIR_SEPARATOR_STR, fil_ptr);
514 fn = GNUNET_malloc (MAX_PATH + 1);
516 if ((lRet = plibc_conv_to_win_path (fil, fn)) != ERROR_SUCCESS)
518 SetErrnoFromWinError (lRet);
519 LOG_STRERROR (GNUNET_ERROR_TYPE_WARNING, "plibc_conv_to_win_path");
522 /* is the path relative? */
523 if ((strncmp (fn + 1, ":\\", 2) != 0) && (strncmp (fn, "\\\\", 2) != 0))
525 char szCurDir[MAX_PATH + 1];
527 lRet = GetCurrentDirectory (MAX_PATH + 1, szCurDir);
528 if (lRet + strlen (fn) + 1 > (MAX_PATH + 1))
530 SetErrnoFromWinError (ERROR_BUFFER_OVERFLOW);
531 LOG_STRERROR (GNUNET_ERROR_TYPE_WARNING, "GetCurrentDirectory");
534 buffer = GNUNET_malloc (MAX_PATH + 1);
535 GNUNET_snprintf (buffer, MAX_PATH + 1, "%s\\%s", szCurDir, fn);
546 * Give relative time in human-readable fancy format.
548 * @param delta time in milli seconds
549 * @return time as human-readable string
552 GNUNET_STRINGS_relative_time_to_string (struct GNUNET_TIME_Relative delta)
554 const char *unit = _( /* time unit */ "ms");
556 uint64_t dval = delta.rel_value;
558 if (delta.rel_value == GNUNET_TIME_UNIT_FOREVER_REL.rel_value)
559 return GNUNET_strdup (_("eternity"));
563 unit = _( /* time unit */ "s");
567 unit = _( /* time unit */ "m");
571 unit = _( /* time unit */ "h");
575 unit = _( /* time unit */ " days");
580 GNUNET_asprintf (&ret, "%llu %s", dval, unit);
586 * "man ctime_r", except for GNUnet time; also, unlike ctime, the
587 * return value does not include the newline character.
589 * @param t time to convert
590 * @return absolute time in human-readable format
593 GNUNET_STRINGS_absolute_time_to_string (struct GNUNET_TIME_Absolute t)
598 if (t.abs_value == GNUNET_TIME_UNIT_FOREVER_ABS.abs_value)
599 return GNUNET_strdup (_("end of time"));
600 tt = t.abs_value / 1000;
602 ret = ctime_r (&tt, GNUNET_malloc (32));
604 ret = GNUNET_strdup (ctime (&tt));
606 ret[strlen (ret) - 1] = '\0';
613 * Returns a pointer to a part of filename (allocates nothing)!
615 * @param filename filename to extract basename from
616 * @return short (base) name of the file (that is, everything following the
617 * last directory separator in filename. If filename ends with a
618 * directory separator, the result will be a zero-length string.
619 * If filename has no directory separators, the result is filename
623 GNUNET_STRINGS_get_short_name (const char *filename)
625 const char *short_fn = filename;
627 while (NULL != (ss = strstr (short_fn, DIR_SEPARATOR_STR))
635 * Get the numeric value corresponding to a character.
637 * @param a a character
638 * @return corresponding numeric value
641 getValue__ (unsigned char a)
643 if ((a >= '0') && (a <= '9'))
645 if ((a >= 'A') && (a <= 'V'))
646 return (a - 'A' + 10);
652 * Convert binary data to ASCII encoding. The ASCII encoding is rather
653 * GNUnet specific. It was chosen such that it only uses characters
654 * in [0-9A-V], can be produced without complex arithmetics and uses a
655 * small number of characters.
656 * Does not append 0-terminator, but returns a pointer to the place where
657 * it should be placed, if needed.
659 * @param data data to encode
660 * @param size size of data (in bytes)
661 * @param out buffer to fill
662 * @param out_size size of the buffer. Must be large enough to hold
663 * ((size*8) + (((size*8) % 5) > 0 ? 5 - ((size*8) % 5) : 0)) / 5 bytes
664 * @return pointer to the next byte in 'out' or NULL on error.
667 GNUNET_STRINGS_data_to_string (unsigned char *data, size_t size, char *out, size_t out_size)
670 * 32 characters for encoding (GNUNET_CRYPTO_hash => 32 characters)
672 static char *encTable__ = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
678 GNUNET_assert (data != NULL);
679 GNUNET_assert (out != NULL);
680 GNUNET_assert (out_size >= (((size*8) + ((size*8) % 5)) % 5));
685 while ((rpos < size) || (vbit > 0))
687 if ((rpos < size) && (vbit < 5))
689 bits = (bits << 8) | data[rpos++]; /* eat 8 more bits */
694 bits <<= (5 - vbit); /* zero-padding */
695 GNUNET_assert (vbit == ((size * 8) % 5));
698 if (wpos >= out_size)
700 out[wpos++] = encTable__[(bits >> (vbit - 5)) & 31];
703 if (wpos != out_size)
705 GNUNET_assert (vbit == 0);
711 * Convert ASCII encoding back to data
712 * out_size must match exactly the size of the data before it was encoded.
714 * @param enc the encoding
715 * @param enclen number of characters in 'enc' (without 0-terminator, which can be missing)
716 * @param out location where to store the decoded data
717 * @param out_size sizeof the output buffer
718 * @return GNUNET_OK on success, GNUNET_SYSERR if result has the wrong encoding
721 GNUNET_STRINGS_string_to_data (const char *enc, size_t enclen,
722 unsigned char *out, size_t out_size)
730 int encoded_len = out_size * 8;
731 if (encoded_len % 5 > 0)
733 vbit = encoded_len % 5; /* padding! */
741 if ((encoded_len + shift) / 5 != enclen)
742 return GNUNET_SYSERR;
746 bits = (ret = getValue__ (enc[--rpos])) >> (5 - encoded_len % 5);
748 return GNUNET_SYSERR;
751 GNUNET_assert (rpos > 0);
752 bits = ((ret = getValue__ (enc[--rpos])) << vbit) | bits;
754 return GNUNET_SYSERR;
758 out[--wpos] = (unsigned char) bits;
763 GNUNET_assert (rpos == 0);
764 GNUNET_assert (vbit == 0);
770 * Parse a path that might be an URI.
772 * @param path path to parse. Must be NULL-terminated.
773 * @param scheme_part a pointer to 'char *' where a pointer to a string that
774 * represents the URI scheme will be stored. Can be NULL. The string is
775 * allocated by the function, and should be freed by GNUNET_free() when
776 * it is no longer needed.
777 * @param path_part a pointer to 'const char *' where a pointer to the path
778 * part of the URI will be stored. Can be NULL. Points to the same block
779 * of memory as 'path', and thus must not be freed. Might point to '\0',
780 * if path part is zero-length.
781 * @return GNUNET_YES if it's an URI, GNUNET_NO otherwise. If 'path' is not
782 * an URI, '* scheme_part' and '*path_part' will remain unchanged
783 * (if they weren't NULL).
786 GNUNET_STRINGS_parse_uri (const char *path, char **scheme_part,
787 const char **path_part)
792 const char *post_scheme_part = NULL;
794 for (end = 0, i = 0; !end && i < len; i++)
799 if (path[i] == ':' && i > 0)
804 if (!((path[i] >= 'A' && path[i] <= 'Z') || (path[i] >= 'a' && path[i] <= 'z')
805 || (path[i] >= '0' && path[i] <= '9') || path[i] == '+' || path[i] == '-'
806 || (path[i] == '.')))
819 post_scheme_part = &path[i];
826 if (post_scheme_part == NULL)
830 *scheme_part = GNUNET_malloc (post_scheme_part - path + 1);
831 memcpy (*scheme_part, path, post_scheme_part - path);
832 (*scheme_part)[post_scheme_part - path] = '\0';
835 *path_part = post_scheme_part;
841 * Check whether @filename is absolute or not, and if it's an URI
843 * @param filename filename to check
844 * @param can_be_uri GNUNET_YES to check for being URI, GNUNET_NO - to
845 * assume it's not URI
846 * @param r_is_uri a pointer to an int that is set to GNUNET_YES if @filename
847 * is URI and to GNUNET_NO otherwise. Can be NULL. If @can_be_uri is
848 * not GNUNET_YES, *r_is_uri is set to GNUNET_NO.
849 * @param r_uri a pointer to a char * that is set to a pointer to URI scheme.
850 * The string is allocated by the function, and should be freed with
851 * GNUNET_free (). Can be NULL.
852 * @return GNUNET_YES if @filaneme is absolute, GNUNET_NO otherwise.
855 GNUNET_STRINGS_path_is_absolute (const char *filename, int can_be_uri,
856 int *r_is_uri, char **r_uri_scheme)
861 const char *post_scheme_path;
864 /* consider POSIX paths to be absolute too, even on W32,
865 * as plibc expansion will fix them for us.
867 if (filename[0] == '/')
871 is_uri = GNUNET_STRINGS_parse_uri (filename, &uri, &post_scheme_path);
879 GNUNET_free_non_null (uri);
881 len = strlen(post_scheme_path);
882 /* Special check for file:///c:/blah
883 * We want to parse 'c:/', not '/c:/'
885 if (post_scheme_path[0] == '/' && len >= 3 && post_scheme_path[2] == ':')
886 post_scheme_path = &post_scheme_path[1];
888 return GNUNET_STRINGS_path_is_absolute (post_scheme_path, GNUNET_NO, NULL, NULL);
895 *r_is_uri = GNUNET_NO;
898 len = strlen (filename);
900 ((filename[0] >= 'A' && filename[0] <= 'Z')
901 || (filename[0] >= 'a' && filename[0] <= 'z'))
902 && filename[1] == ':' && (filename[2] == '/' || filename[2] == '\\'))
909 #define _IFMT 0170000 /* type of file */
910 #define _IFLNK 0120000 /* symbolic link */
911 #define S_ISLNK(m) (((m)&_IFMT) == _IFLNK)
915 * Perform @checks on @filename
917 * @param filename file to check
918 * @param checks checks to perform
919 * @return GNUNET_YES if all @checks pass, GNUNET_NO if at least one of them
920 * fails, GNUNET_SYSERR when a check can't be performed
923 GNUNET_STRINGS_check_filename (const char *filename,
924 enum GNUNET_STRINGS_FilenameCheck checks)
927 if (filename == NULL || filename[0] == '\0')
928 return GNUNET_SYSERR;
929 if (checks & GNUNET_STRINGS_CHECK_IS_ABSOLUTE)
930 if (!GNUNET_STRINGS_path_is_absolute (filename, GNUNET_NO, NULL, NULL))
932 if (checks & (GNUNET_STRINGS_CHECK_EXISTS
933 | GNUNET_STRINGS_CHECK_IS_DIRECTORY
934 | GNUNET_STRINGS_CHECK_IS_LINK))
936 if (STAT (filename, &st))
938 if (checks & GNUNET_STRINGS_CHECK_EXISTS)
941 return GNUNET_SYSERR;
944 if (checks & GNUNET_STRINGS_CHECK_IS_DIRECTORY)
945 if (!S_ISDIR (st.st_mode))
947 if (checks & GNUNET_STRINGS_CHECK_IS_LINK)
948 if (!S_ISLNK (st.st_mode))
953 /* end of strings.c */