2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $TOG: StringFuncs.c /main/15 1998/03/19 09:49:44 mgreess $ */
24 /************************************<+>*************************************
25 ****************************************************************************
27 ** File: StringFuncs.c
29 ** Project: Cde DtHelp
31 ** Description: Semi private string functions - can be platform dependent.
34 ** (c) Copyright 1987, 1988, 1989, 1990, 1991, 1992 Hewlett-Packard Company
36 ** (c) Copyright 1993, 1994 Hewlett-Packard Company
37 ** (c) Copyright 1993, 1994 International Business Machines Corp.
38 ** (c) Copyright 1993, 1994 Sun Microsystems, Inc.
39 ** (c) Copyright 1993, 1994 Novell, Inc.
43 ****************************************************************************
44 ************************************<+>*************************************/
52 #include <locale.h> /* getlocale(), LOCALE_STATUS, LC_xxx */
54 #if defined(_AIX) || defined(USL) || defined(__uxp__) || defined(CSRG_BASED)
58 #include "CvStringI.h" /* for string functions used by Canvas Engine */
59 #include "StringFuncsI.h" /* for _CEStrcollProc */
65 # define iconv_open(a, b) ((iconv_t) -1)
66 # define iconv(a, b, c, d, e) ((size_t) 0)
67 # define iconv_close(a) (0)
70 #ifdef ICONV_INBUF_CONST
71 # define ICONV_INBUF_TYPE const char **
73 # define ICONV_INBUF_TYPE char **
77 /* Private structure of the IconvContext. Public
78 structure doesn't reveal implementation. */
79 typedef struct _DtHelpCeIconvContextRec
83 unsigned char * table; /* translation table */
86 } _DtHelpCeIconvContextRec;
102 /******************************************************************************
104 * Private variables and defines.
106 *****************************************************************************/
109 /******************************************************************************
111 * Semi Public Functions
113 *****************************************************************************/
114 /******************************************************************************
115 * Function: _DtHelpCeStrcspn (const char *s1, const char *s2, max_len, &ret_len)
117 * Returns in 'ret_len' the length of the initial segment of string
118 * s1 which consists entirely of characters not found in string s2.
121 * -1 If found an invalid character.
122 * 0 If found a character in string s2
123 * 1 If found the null byte character.
124 *****************************************************************************/
149 if (!s2 || *s2 == '\0')
151 *ret_len = strlen (s1);
158 * no need to go through any hassle, just use the 3C function
160 *ret_len = strcspn (s1, s2);
161 if (s1[*ret_len] == '\0')
169 len = mblen (p1, max_len);
173 * we found an invalid character
174 * return the length found so far and the flag.
183 len2 = mblen (p2, max_len);
187 if (len2 == len && strncmp (p1, p2, len) == 0)
201 /******************************************************************************
202 * Function: _DtHelpCeStrspn (const char *s1, const char *s2, max_len, &ret_len)
204 * Returns in 'ret_len' the length of the initial segment of string
205 * s1 which consists entirely of characters found in string s2.
208 * -1 If found an invalid character.
209 * 0 If found a character not in string s2
210 * 1 If found the null byte character.
211 *****************************************************************************/
225 if (!s1 || !s2 || *s2 == '\0')
239 * no need to go through any hassle, just use the 3C function
241 *ret_len = strspn (s1, s2);
242 if (s1[*ret_len] == '\0')
249 while (*p1 != '\0' && found)
251 len = mblen (p1, max_len);
255 * we found an invalid character
256 * return the length found so far and the flag.
264 while (*p2 != '\0' && !found)
266 len2 = mblen (p2, max_len);
270 if (len2 == len && strncmp (p1, p2, len) == 0)
287 /******************************************************************************
288 * Function: _DtHelpCeStrchr (char *s1, char *value, max_len, ret_ptr)
290 * Returns in 'ret_ptr' the address of the first occurence of 'value'
294 * -1 If found an invalid character.
295 * 0 If found value in string s2
296 * 1 If found the null byte character without finding 'value'.
297 * 'ret_ptr' will also be null in this case.
298 *****************************************************************************/
312 if (s1 == NULL || *s1 == '\0')
317 *ret_ptr = strchr (s1, ((int)*value));
324 valLen = mblen(value, max_len);
330 len = mblen (p1, max_len);
333 if (len == valLen && strncmp(p1, value, len) == 0)
335 *ret_ptr = (char *)p1;
344 /******************************************************************************
345 * Function: _DtHelpCeStrrchr (char *s1, char *value, max_len, ret_ptr)
347 * Returns in 'ret_ptr' the address of the last occurence of 'value'
351 * -1 If found an invalid character.
352 * 0 If found value in string s2
353 * 1 If found the null byte character without finding 'value'.
354 * 'ret_ptr' will also be null in this case.
355 *****************************************************************************/
369 if (s1 == NULL || *s1 == '\0')
374 *ret_ptr = strrchr (s1, ((int)*value));
375 if (*ret_ptr != NULL)
381 valLen = mblen(value, max_len);
387 len = mblen (p1, max_len);
390 if (len == valLen && strncmp(p1, value, len) == 0)
391 *ret_ptr = (char *)p1;
396 if (*ret_ptr != NULL)
402 /******************************************************************************
403 * Function: _DtHelpCeCountChars (char *s1, max_len, ret_len)
405 * Returns in 'ret_len' the number of characters (not bytes)
409 * -1 If found an invalid character. 'ret_len' contains the
410 * number of 'good' characters found.
412 *****************************************************************************/
414 _DtHelpCeCountChars (
426 *ret_len = strlen (s1);
431 len = mblen (s1, max_len);
434 *ret_len = *ret_len + 1;
442 /*****************************************************************************
443 * Function: _DtHelpCeUpperCase
445 * Parameters: string Specifies the string to change into
448 * Return Value: Nothing
450 * Purpose: To change all lower case characters into upper case.
452 *****************************************************************************/
454 _DtHelpCeUpperCase ( char *string )
460 while (*string != '\0')
462 len = mblen (string, MB_CUR_MAX);
465 *string = (unsigned char) toupper (*(unsigned char *)string);
476 /************************************************************************
477 * Function: _DtHelpCeStrHashToKey
479 * Parameters: C-format string to hash
482 * Does a very simple hash operation on the string and returns the value
484 * Returns: hash value
486 ************************************************************************/
487 int _DtHelpCeStrHashToKey(
491 register const char * tstr;
492 register int key = 0;
494 if (!str) return 0; /* RETURN */
496 for (tstr = str; (c = *tstr++); )
497 key = (key << 1) + c;
504 /******************************************************************************
505 * Function: _DtHelpCeGetStrcollProc
509 * Returns: Ptr to the proper collation function to use
510 * If the codeset of the locale is "C", then it is
511 * strcasecmp(). If it's not, then it is strcoll().
513 * Purpose: When the codeset of the locale "C", strcoll()
514 * performs collation identical to strcmp(), which is
517 * To get case-insensitive collation, you need to use
518 * strcasecmp() instead. If codeset != "C", then
519 * strcoll() collates according to the language
522 * Warning: This code is not multi-thread safe. The multi-thread
523 * safe setlocale must be used instead to make it so.
525 *****************************************************************************/
526 _CEStrcollProc _DtHelpCeGetStrcollProc(void)
530 struct locale_data * li;
535 extern int strcoll(const char *,const char *);
536 #if defined(_AIX) || defined(USL) || defined(__uxp__)
537 extern int _DtHelpCeStrCaseCmp(const char *,const char *);
539 extern int strcasecmp(const char *,const char *);
544 /* if locale is C, use the explicit case insensitive compare */
546 li = getlocale(LOCALE_STATUS);
547 if ( NULL == li->LC_COLLATE_D || strcmp(C_LANG,li->LC_COLLATE_D) == 0 )
550 locale = setlocale(LC_COLLATE,NULL); /* put locale in buf */
551 if (strcmp(locale,C_LANG) == 0)
556 #if defined(_AIX) || defined(USL) || defined(__uxp__)
557 return _DtHelpCeStrCaseCmp;
564 /*****************************************************************************
565 * Function: String _DtHelpCeStripSpaces (string)
568 * Parameters: String to process
570 * Return Value: Processed string
572 * Purpose: Strip all leading and trailing spaces.
573 * Processing is in place
575 *****************************************************************************/
576 char * _DtHelpCeStripSpaces (
580 int multiLen = MB_CUR_MAX;
586 /* Strip off leading spaces first */
588 while ((multiLen == 1 || (mblen(string + i, MB_LEN_MAX) == 1)) &&
589 isspace((unsigned char) string[i]))
593 /* Copy over the leading spaces */
594 strcpy(string, string + i);
596 /* Drop out, if the string is now empty */
597 if ((i = strlen(string) - 1) < 0)
600 /* Strip off trailing spaces */
603 /* No multibyte; simply work back through the string */
604 while ((i >= 0) && (isspace((unsigned char) string[i])))
606 string[i + 1] = '\0';
610 /* Work forward, looking for a trailing space of spaces */
618 if ( ((len =mblen(string + i, MB_LEN_MAX)) == 1)
619 && isspace((unsigned char) string[i]))
628 /* if there is an invalid character, treat as a valid one-byte */
643 /*****************************************************************************
644 * Function: void _DtHelpCeCompressSpace (string)
646 * Parameters: string to process
648 * Return Value: processed string
650 * Purpose: This function strips all leading and trailing spaces
651 * from the string; it also compresses any intervening
652 * spaces into a single space. This is useful when
653 * comparing name strings. For instance, the string:
654 * " First Middle Last "
658 * "First Middle Last"
660 * Processing is in place.
662 *****************************************************************************/
663 void _DtHelpCeCompressSpace (
669 /* Strip leading and trailing spaces */
670 _DtHelpCeStripSpaces(string);
672 /* Compress intervening spaces */
673 _DtHelpCeStrchr(string, " ", 1, &ptr);
676 /* Skip over the one space we plan to keep */
678 _DtHelpCeStripSpaces(ptr);
679 _DtHelpCeStrchr(ptr, " ", 1, &ptr);
683 /*****************************************************************************
684 * Function: void _DtHelpCeIconvStr1Step (string)
687 * fromCode; codeset name
688 * fromStr; string to convert
689 * toCode; codeset name
690 * ret_toStr; converted str; this string is malloc'd by this routine
691 * and the CALLER MUST FREE it when no longer needed.
692 * dflt1; 1-byte default char
693 * dflt2; 2-byte default char
697 * -1: missing (NULL) argument
698 * -2: no translation available from fromCode to toCode
699 * -3: couldn't allocate memory
700 * -4: couldn't start conversion
701 * -5: incomplete multibyte char
702 * -6: invalid char found
705 * Converts string from fromCode to toCode using iconv(3)
706 * It expects the codeset strings to be iconv(3) compatible.
707 * Generally, compatible strings can be retrieved using
708 * the _DtHelpCeXlateStdToOpLocale() call.
711 * iconv(3) is standardized in XPG4, which is just starting
712 * to be supported. Below are several different implementations
713 * of _DtHelpCeIconvStr, each using what is available on different
714 * platforms. If no code is #ifdef'd, the XPG4 code defaults.
715 *****************************************************************************/
717 int _DtHelpCeIconvStr1Step(
718 const char * fromCode, /* codeset name */
719 const char * fromStr, /* string to convert */
720 const char * toCode, /* codeset name */
721 char * * ret_toStr, /* converted str */
722 int dflt1, /* 1-byte default char */
723 int dflt2) /* 2-byte default char */
726 _DtHelpCeIconvContextRec * iconvContext;
729 ret = _DtHelpCeIconvOpen(&iconvContext,fromCode,toCode,dflt1,dflt2);
731 ret = _DtHelpCeIconvStr(iconvContext,fromStr, ret_toStr,NULL,NULL,0);
732 _DtHelpCeIconvClose(&iconvContext);
737 /*****************************************************************************
738 * Function: void _DtHelpCeIconvOpen (string)
741 * iconvContext context
742 * fromStr; string to convert
743 * ret_toStr; converted str; this string is malloc'd by this routine
744 * and the CALLER MUST FREE it when no longer needed.
745 * dflt1; 1-byte default char
746 * dflt2; 2-byte default char
750 * -1: missing (NULL) argument
751 * -2: no translation available from fromCode to toCode
752 * -3: couldn't allocate memory
753 * -4: couldn't start conversion
754 * -5: incomplete multibyte char
755 * -6: invalid char found
758 * Opens an iconv table/algorithm to convert string from
759 * fromCode to toCode using iconv(3)
760 * It expects the codeset strings to be iconv(3) compatible.
761 * Generally, compatible strings can be retrieved using
762 * the _DtHelpCeXlateStdToOpLocale() call.
765 * iconv(3) is standardized in XPG4, which is just starting
766 * to be supported. Below are several different implementations
767 * of _DtHelpCeIconvStr, each using what is available on different
768 * platforms. If no code is #ifdef'd, the XPG4 code defaults.
769 *****************************************************************************/
770 int _DtHelpCeIconvOpen(
771 _DtHelpCeIconvContext * ret_iconvContext, /* iconv */
772 const char * fromCode, /* codeset name */
773 const char * toCode, /* codeset name */
774 int dflt1, /* 1-byte default char */
775 int dflt2) /* 2-byte default char */
777 { /* XPG4-compliant code (args dflt1 & dflt2 are ignored in this code) */
778 int err; /* error code of function */
779 _DtHelpCeIconvContextRec * ic;
787 if (!ret_iconvContext || !fromCode || !toCode )
788 return -1; /* RETURN error */
791 ic = *ret_iconvContext = calloc(1,sizeof(_DtHelpCeIconvContextRec));
792 if (NULL == ic) return -3; /* RETURN error */
794 if ( strcmp(fromCode,toCode) == 0 )
796 ic->cd = (iconv_t) BAD; /* BAD means use strdup() */
800 ic->cd = iconv_open(toCode,fromCode);
801 if ( ic->cd == (iconv_t) BAD )
803 err = -4; /* error */
808 ic->fromCodeset = strdup(fromCode);
809 ic->toCodeset = strdup(toCode);
810 return err; /* RETURN status */
813 /*****************************************************************************
814 * Function: void _DtHelpCeIconvStr (string)
817 * iconvContext context for the conversion
818 * fromStr; string to convert
819 * ret_toStr; converted str; this string is malloc'd by this routine
820 * and the CALLER MUST FREE it when no longer needed.
821 * toStrBuf; for efficiency, can pass in a buf
822 * toStrBufLen; length of buf
826 * -1: missing (NULL) argument
827 * -2: no translation available from fromCode to toCode
828 * -3: couldn't allocate memory
829 * -4: couldn't start conversion
830 * -5: incomplete multibyte char
831 * -6: invalid char found
834 * Converts string from fromCode to toCode using iconv(3)
835 * If toStrBuf is NULL, memory for the converted string
836 * will be malloced as needed.
837 * If toStrBuf is not NULL, the conversion will use up
838 * to toStrBufLen bytes of the buffer and then realloc
839 * more memory if needed.
840 * If toStrBuf is not NULL, the size of the buf is
841 * returned in ret_toStrLen; otherwise, the value is
843 * ret_toStr receives the pointer to the buf, which may
844 * be different from toStrBuf if memory was allocated
845 * or NULL if an error occurred. If toStrBuf is
846 * not NULL and memory must be allocated, a realloc()
847 * call is used, possibly invalidating the toStrBuf ptr.
848 * ret_toStrLen receives the length of the buffer if
849 * toStrBuf is not NULL. If it is NULL, the length
852 * iconv(3) is standardized in XPG4, which is just starting
853 * to be supported. Below are several different implementations
854 * of _DtHelpCeIconvStr, each using what is available on different
855 * platforms. If no code is #ifdef'd, the XPG4 code defaults.
856 *****************************************************************************/
857 int _DtHelpCeIconvStr(
858 _DtHelpCeIconvContext iconvContext, /* iconv */
859 const char * fromStr, /* string to convert */
860 char * * ret_toStr, /* converted str */
861 size_t * ret_toStrLen, /* converted str */
862 char * toStrBuf, /* for efficiency, can pass in a buf */
863 size_t toStrBufLen) /* length of buf */
864 { /* XPG4-compliant code (args dflt1 & dflt2 are ignored in this code) */
865 char * toStr; /* ptr to tostr memory */
866 int toStrSize; /* size of mallocd string */
867 size_t inBytesLeft; /* bytes left to use from input buf */
868 const char * inChar; /* ptr into fromstr */
869 char * outChar; /* ptr into tostr */
870 size_t outBytesLeft; /* bytes left in the output str */
871 int err; /* error code of function */
876 /* init ret values; allows function to be called nicely in a loop. */
877 if (ret_toStr) *ret_toStr = toStrBuf;
878 if (ret_toStrLen) *ret_toStrLen = toStrBufLen;
881 if ( !iconvContext || !fromStr || !ret_toStr
882 || (!ret_toStrLen && toStrBuf))
883 return -1; /* RETURN error */
885 /* just do a straight copy if codesets the same or invalid context */
886 if ( iconvContext->cd == (iconv_t) BAD )
888 if (NULL == toStrBuf)
890 *ret_toStr = strdup(fromStr);
892 else /* reuse the buffer */
894 int len = strlen(fromStr) + 1;
895 if (len > toStrBufLen)
896 *ret_toStr = realloc(toStrBuf,len);
899 *ret_toStr = toStrBuf;
902 /* set return values */
903 strcpy(*ret_toStr,fromStr);
906 return (NULL != *ret_toStr ? 0 : -3); /* RETURN result */
912 toStrSize = (NULL == toStrBuf ? 0 : toStrBufLen);
914 inBytesLeft = strlen(fromStr);
916 outBytesLeft = toStrSize;
918 /* translate the string */
920 while ( inBytesLeft > 0 )
922 /* convert a character */
923 if(iconv(iconvContext->cd, (ICONV_INBUF_TYPE)&inChar,
924 &inBytesLeft,&outChar,&outBytesLeft) == -1)
928 case 0: /* no error */
930 case EINVAL: /* inChar pts to incomplete multibyte char */
931 inBytesLeft = 0; /* end the translation */
934 case EILSEQ: /* invalid char at inChar */
935 inBytesLeft = 0; /* end the translation */
938 case E2BIG: /* no room in toStr */
939 /* provide enough mem in the toStr */
940 if (outBytesLeft < sizeof(wchar_t))
942 size_t offset = outChar - (char *) toStr;
944 outBytesLeft += MEM_INC;
945 toStrSize += MEM_INC;
946 toStr = realloc(toStr,toStrSize * sizeof(char));
949 inBytesLeft = 0; /* end the translation */
953 outChar = (char *) (toStr + offset); /* recalc ptr */
957 inBytesLeft = 0; /* breakout of loop */
959 } /* switch on convertsion result */
961 } /* while chars left to translate */
964 if(outChar) *outChar = EOS;
966 /* set return values */
967 *ret_toStr = (char *) toStr;
968 if (toStrBuf) *ret_toStrLen = toStrSize;
970 return (NULL != toStr ? 0 : err); /* RETURN result */
974 /*****************************************************************************
975 * Function: void _DtHelpCeIconvClose()
978 * io_iconvContext; context
983 * Closes an iconv context used to convert
984 * fromCode to toCode using iconv(3)
987 * iconv(3) is standardized in XPG4, which is just starting
988 * to be supported. Below are several different implementations
989 * of _DtHelpCeIconvStr, each using what is available on different
990 * platforms. If no code is #ifdef'd, the XPG4 code defaults.
991 *****************************************************************************/
992 void _DtHelpCeIconvClose(
993 _DtHelpCeIconvContext * io_iconvContext)
994 { /* XPG4-compliant code (args dflt1 & dflt2 are ignored in this code) */
995 _DtHelpCeIconvContextRec * ic;
997 if (!io_iconvContext || NULL == *io_iconvContext) return;
998 ic = *io_iconvContext;
1000 if ( ic->cd != (iconv_t) BAD )
1001 iconv_close(ic->cd);
1002 if (ic->fromCodeset) free(ic->fromCodeset);
1003 if (ic->toCodeset) free(ic->toCodeset);
1005 *io_iconvContext = NULL;
1009 /*****************************************************************************
1010 * Function: void _DtHelpCeIconvContextSuitable()
1013 * iconvContext: context
1014 * fromCode: proposed fromCodeset
1015 * toCode: proposed toCodeset
1017 * Return Value: True: proposed conversion compatible with this context
1018 * False: proposed conversion is not compatible
1021 * Checks whether the proposed conversion from
1022 * fromCodeset to toCodeset can be handled by
1023 * the iconv context that already exists.
1026 * This function is designed to allow a context
1027 * to stay open as long as possible and avoid
1028 * closing and then reopening the contexts for
1029 * the same conversion.
1030 *****************************************************************************/
1031 int _DtHelpCeIconvContextSuitable(
1032 _DtHelpCeIconvContext iconvContext,
1033 const char * fromCode,
1034 const char * toCode)
1037 || !iconvContext->fromCodeset
1038 || !iconvContext->toCodeset
1043 if ( strcmp(iconvContext->fromCodeset,fromCode) == 0
1044 && strcmp(iconvContext->toCodeset,toCode) == 0)
1051 #if defined(_AIX) || defined (USL) || defined(__uxp__)
1052 /*****************************************************************************
1053 * Function: _DtHelpCeStrCaseCmp
1059 * Purpose: IBM and USL do not support the 'strcasecmp' routine. This takes it's
1061 *****************************************************************************/
1063 _DtHelpCeStrCaseCmp (
1067 return (_DtCvStrCaseCmp(s1, s2));
1069 #endif /* _AIX or USL */
1071 /******************************************************************************
1072 * Function: int _DtHelpCeFreeStringArray (char **array)
1074 * Parameters: array A pointer to the NULL-terminated
1075 * string array which is to be freed.
1077 * Return Value: 0 if successful, -1 if a failure occurs
1079 * Purpose: Free the memory used for a NULL-terminated string array.
1081 ******************************************************************************/
1083 _DtHelpCeFreeStringArray (char **array)
1090 for (next = array; *next != NULL; next++)
1097 /****************************************************************************
1098 * Function: void **_DtHelpCeAddPtrToArray (void **array, void *ptr)
1100 * Parameters: array A pointer to a NULL-terminated array
1102 * ptr The pointer which is to be added to
1103 * the end of the array.
1105 * Returns: A pointer to the NULL-terminated array created
1106 * by adding 'ptr' to the end of 'array'.
1108 * Purpose: Add a new element to a NULL-terminated array of pointers.
1109 * These are typed as "void *" so that they can be used with
1110 * pointers to any type of data.
1112 ****************************************************************************/
1114 _DtHelpCeAddPtrToArray (
1118 return (_DtCvAddPtrToArray(array, ptr));
1121 /*****************************************************************************
1122 * Function: _DtHelpCeStrCaseCmpLatin1
1128 * Purpose: Use a version of CaseCmp that does not go through tolower().
1129 * This routine should be used only for compares that now the
1130 * strings are in English (iso8859-1) and do not want setlocale
1132 *****************************************************************************/
1134 _DtHelpCeStrCaseCmpLatin1 (
1138 return (_DtCvStrCaseCmpLatin1(s1, s2));
1141 /*****************************************************************************
1142 * Function: _DtHelpCeStrNCaseCmpLatin1
1148 * Purpose: Use a version of CaseCmp that does not go through tolower().
1149 * This routine should be used only for compares that now the
1150 * strings are in English (iso8859-1) and do not want setlocale
1152 *****************************************************************************/
1154 _DtHelpCeStrNCaseCmpLatin1 (
1159 return (_DtCvStrNCaseCmpLatin1(s1, s2, n));