2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $TOG: StringFuncs.c /main/15 1998/03/19 09:49:44 mgreess $ */
24 /************************************<+>*************************************
25 ****************************************************************************
27 ** File: StringFuncs.c
29 ** Project: Cde DtHelp
31 ** Description: Semi private string functions - can be platform dependent.
34 ** (c) Copyright 1987, 1988, 1989, 1990, 1991, 1992 Hewlett-Packard Company
36 ** (c) Copyright 1993, 1994 Hewlett-Packard Company
37 ** (c) Copyright 1993, 1994 International Business Machines Corp.
38 ** (c) Copyright 1993, 1994 Sun Microsystems, Inc.
39 ** (c) Copyright 1993, 1994 Novell, Inc.
43 ****************************************************************************
44 ************************************<+>*************************************/
52 #include <locale.h> /* getlocale(), LOCALE_STATUS, LC_xxx */
54 #if defined(_AIX) || defined(CSRG_BASED) || defined(__linux__)
58 #include "CvStringI.h" /* for string functions used by Canvas Engine */
59 #include "StringFuncsI.h" /* for _CEStrcollProc */
61 /* JET: This was if !defined(linux), which is wrong. We should use the
62 * real deal on linux too */
68 # define iconv_open(a, b) ((iconv_t) -1)
69 # define iconv(a, b, c, d, e) ((size_t) 0)
70 # define iconv_close(a) (0)
73 #ifdef ICONV_INBUF_CONST
74 # define ICONV_INBUF_TYPE const char **
76 # define ICONV_INBUF_TYPE char **
80 /* Private structure of the IconvContext. Public
81 structure doesn't reveal implementation. */
82 typedef struct _DtHelpCeIconvContextRec
86 unsigned char * table; /* translation table */
89 } _DtHelpCeIconvContextRec;
105 /******************************************************************************
107 * Private variables and defines.
109 *****************************************************************************/
112 /******************************************************************************
114 * Semi Public Functions
116 *****************************************************************************/
117 /******************************************************************************
118 * Function: _DtHelpCeStrcspn (const char *s1, const char *s2, max_len, &ret_len)
120 * Returns in 'ret_len' the length of the initial segment of string
121 * s1 which consists entirely of characters not found in string s2.
124 * -1 If found an invalid character.
125 * 0 If found a character in string s2
126 * 1 If found the null byte character.
127 *****************************************************************************/
152 if (!s2 || *s2 == '\0')
154 *ret_len = strlen (s1);
161 * no need to go through any hassle, just use the 3C function
163 *ret_len = strcspn (s1, s2);
164 if (s1[*ret_len] == '\0')
172 len = mblen (p1, max_len);
176 * we found an invalid character
177 * return the length found so far and the flag.
186 len2 = mblen (p2, max_len);
190 if (len2 == len && strncmp (p1, p2, len) == 0)
204 /******************************************************************************
205 * Function: _DtHelpCeStrspn (const char *s1, const char *s2, max_len, &ret_len)
207 * Returns in 'ret_len' the length of the initial segment of string
208 * s1 which consists entirely of characters found in string s2.
211 * -1 If found an invalid character.
212 * 0 If found a character not in string s2
213 * 1 If found the null byte character.
214 *****************************************************************************/
228 if (!s1 || !s2 || *s2 == '\0')
242 * no need to go through any hassle, just use the 3C function
244 *ret_len = strspn (s1, s2);
245 if (s1[*ret_len] == '\0')
252 while (*p1 != '\0' && found)
254 len = mblen (p1, max_len);
258 * we found an invalid character
259 * return the length found so far and the flag.
267 while (*p2 != '\0' && !found)
269 len2 = mblen (p2, max_len);
273 if (len2 == len && strncmp (p1, p2, len) == 0)
290 /******************************************************************************
291 * Function: _DtHelpCeStrchr (char *s1, char *value, max_len, ret_ptr)
293 * Returns in 'ret_ptr' the address of the first occurence of 'value'
297 * -1 If found an invalid character.
298 * 0 If found value in string s2
299 * 1 If found the null byte character without finding 'value'.
300 * 'ret_ptr' will also be null in this case.
301 *****************************************************************************/
315 if (s1 == NULL || *s1 == '\0')
320 *ret_ptr = strchr (s1, ((int)*value));
327 valLen = mblen(value, max_len);
333 len = mblen (p1, max_len);
336 if (len == valLen && strncmp(p1, value, len) == 0)
338 *ret_ptr = (char *)p1;
347 /******************************************************************************
348 * Function: _DtHelpCeStrrchr (char *s1, char *value, max_len, ret_ptr)
350 * Returns in 'ret_ptr' the address of the last occurence of 'value'
354 * -1 If found an invalid character.
355 * 0 If found value in string s2
356 * 1 If found the null byte character without finding 'value'.
357 * 'ret_ptr' will also be null in this case.
358 *****************************************************************************/
372 if (s1 == NULL || *s1 == '\0')
377 *ret_ptr = strrchr (s1, ((int)*value));
378 if (*ret_ptr != NULL)
384 valLen = mblen(value, max_len);
390 len = mblen (p1, max_len);
393 if (len == valLen && strncmp(p1, value, len) == 0)
394 *ret_ptr = (char *)p1;
399 if (*ret_ptr != NULL)
405 /******************************************************************************
406 * Function: _DtHelpCeCountChars (char *s1, max_len, ret_len)
408 * Returns in 'ret_len' the number of characters (not bytes)
412 * -1 If found an invalid character. 'ret_len' contains the
413 * number of 'good' characters found.
415 *****************************************************************************/
417 _DtHelpCeCountChars (
429 *ret_len = strlen (s1);
434 len = mblen (s1, max_len);
437 *ret_len = *ret_len + 1;
445 /*****************************************************************************
446 * Function: _DtHelpCeUpperCase
448 * Parameters: string Specifies the string to change into
451 * Return Value: Nothing
453 * Purpose: To change all lower case characters into upper case.
455 *****************************************************************************/
457 _DtHelpCeUpperCase ( char *string )
463 while (*string != '\0')
465 len = mblen (string, MB_CUR_MAX);
468 *string = (unsigned char) toupper (*(unsigned char *)string);
479 /************************************************************************
480 * Function: _DtHelpCeStrHashToKey
482 * Parameters: C-format string to hash
485 * Does a very simple hash operation on the string and returns the value
487 * Returns: hash value
489 ************************************************************************/
490 int _DtHelpCeStrHashToKey(
497 if (!str) return 0; /* RETURN */
499 for (tstr = str; (c = *tstr++); )
500 key = (key << 1) + c;
507 /******************************************************************************
508 * Function: _DtHelpCeGetStrcollProc
512 * Returns: Ptr to the proper collation function to use
513 * If the codeset of the locale is "C", then it is
514 * strcasecmp(). If it's not, then it is strcoll().
516 * Purpose: When the codeset of the locale "C", strcoll()
517 * performs collation identical to strcmp(), which is
520 * To get case-insensitive collation, you need to use
521 * strcasecmp() instead. If codeset != "C", then
522 * strcoll() collates according to the language
525 * Warning: This code is not multi-thread safe. The multi-thread
526 * safe setlocale must be used instead to make it so.
528 *****************************************************************************/
529 _CEStrcollProc _DtHelpCeGetStrcollProc(void)
533 struct locale_data * li;
538 extern int strcoll(const char *,const char *);
540 extern int _DtHelpCeStrCaseCmp(const char *,const char *);
542 extern int strcasecmp(const char *,const char *);
547 /* if locale is C, use the explicit case insensitive compare */
549 li = getlocale(LOCALE_STATUS);
550 if ( NULL == li->LC_COLLATE_D || strcmp(C_LANG,li->LC_COLLATE_D) == 0 )
553 locale = setlocale(LC_COLLATE,NULL); /* put locale in buf */
554 if (strcmp(locale,C_LANG) == 0)
560 return _DtHelpCeStrCaseCmp;
567 /*****************************************************************************
568 * Function: String _DtHelpCeStripSpaces (string)
571 * Parameters: String to process
573 * Return Value: Processed string
575 * Purpose: Strip all leading and trailing spaces.
576 * Processing is in place
578 *****************************************************************************/
579 char * _DtHelpCeStripSpaces (
583 int multiLen = MB_CUR_MAX;
589 /* Strip off leading spaces first */
591 while ((multiLen == 1 || (mblen(string + i, MB_LEN_MAX) == 1)) &&
592 isspace((unsigned char) string[i]))
596 /* Copy over the leading spaces */
597 strcpy(string, string + i);
599 /* Drop out, if the string is now empty */
600 if ((i = strlen(string) - 1) < 0)
603 /* Strip off trailing spaces */
606 /* No multibyte; simply work back through the string */
607 while ((i >= 0) && (isspace((unsigned char) string[i])))
609 string[i + 1] = '\0';
613 /* Work forward, looking for a trailing space of spaces */
621 if ( ((len =mblen(string + i, MB_LEN_MAX)) == 1)
622 && isspace((unsigned char) string[i]))
631 /* if there is an invalid character, treat as a valid one-byte */
646 /*****************************************************************************
647 * Function: void _DtHelpCeCompressSpace (string)
649 * Parameters: string to process
651 * Return Value: processed string
653 * Purpose: This function strips all leading and trailing spaces
654 * from the string; it also compresses any intervening
655 * spaces into a single space. This is useful when
656 * comparing name strings. For instance, the string:
657 * " First Middle Last "
661 * "First Middle Last"
663 * Processing is in place.
665 *****************************************************************************/
666 void _DtHelpCeCompressSpace (
672 /* Strip leading and trailing spaces */
673 _DtHelpCeStripSpaces(string);
675 /* Compress intervening spaces */
676 _DtHelpCeStrchr(string, " ", 1, &ptr);
679 /* Skip over the one space we plan to keep */
681 _DtHelpCeStripSpaces(ptr);
682 _DtHelpCeStrchr(ptr, " ", 1, &ptr);
686 /*****************************************************************************
687 * Function: void _DtHelpCeIconvStr1Step (string)
690 * fromCode; codeset name
691 * fromStr; string to convert
692 * toCode; codeset name
693 * ret_toStr; converted str; this string is malloc'd by this routine
694 * and the CALLER MUST FREE it when no longer needed.
695 * dflt1; 1-byte default char
696 * dflt2; 2-byte default char
700 * -1: missing (NULL) argument
701 * -2: no translation available from fromCode to toCode
702 * -3: couldn't allocate memory
703 * -4: couldn't start conversion
704 * -5: incomplete multibyte char
705 * -6: invalid char found
708 * Converts string from fromCode to toCode using iconv(3)
709 * It expects the codeset strings to be iconv(3) compatible.
710 * Generally, compatible strings can be retrieved using
711 * the _DtHelpCeXlateStdToOpLocale() call.
714 * iconv(3) is standardized in XPG4, which is just starting
715 * to be supported. Below are several different implementations
716 * of _DtHelpCeIconvStr, each using what is available on different
717 * platforms. If no code is #ifdef'd, the XPG4 code defaults.
718 *****************************************************************************/
720 int _DtHelpCeIconvStr1Step(
721 const char * fromCode, /* codeset name */
722 const char * fromStr, /* string to convert */
723 const char * toCode, /* codeset name */
724 char * * ret_toStr, /* converted str */
725 int dflt1, /* 1-byte default char */
726 int dflt2) /* 2-byte default char */
729 _DtHelpCeIconvContextRec * iconvContext;
732 ret = _DtHelpCeIconvOpen(&iconvContext,fromCode,toCode,dflt1,dflt2);
734 ret = _DtHelpCeIconvStr(iconvContext,fromStr, ret_toStr,NULL,NULL,0);
735 _DtHelpCeIconvClose(&iconvContext);
740 /*****************************************************************************
741 * Function: void _DtHelpCeIconvOpen (string)
744 * iconvContext context
745 * fromStr; string to convert
746 * ret_toStr; converted str; this string is malloc'd by this routine
747 * and the CALLER MUST FREE it when no longer needed.
748 * dflt1; 1-byte default char
749 * dflt2; 2-byte default char
753 * -1: missing (NULL) argument
754 * -2: no translation available from fromCode to toCode
755 * -3: couldn't allocate memory
756 * -4: couldn't start conversion
757 * -5: incomplete multibyte char
758 * -6: invalid char found
761 * Opens an iconv table/algorithm to convert string from
762 * fromCode to toCode using iconv(3)
763 * It expects the codeset strings to be iconv(3) compatible.
764 * Generally, compatible strings can be retrieved using
765 * the _DtHelpCeXlateStdToOpLocale() call.
768 * iconv(3) is standardized in XPG4, which is just starting
769 * to be supported. Below are several different implementations
770 * of _DtHelpCeIconvStr, each using what is available on different
771 * platforms. If no code is #ifdef'd, the XPG4 code defaults.
772 *****************************************************************************/
773 int _DtHelpCeIconvOpen(
774 _DtHelpCeIconvContext * ret_iconvContext, /* iconv */
775 const char * fromCode, /* codeset name */
776 const char * toCode, /* codeset name */
777 int dflt1, /* 1-byte default char */
778 int dflt2) /* 2-byte default char */
780 { /* XPG4-compliant code (args dflt1 & dflt2 are ignored in this code) */
781 int err; /* error code of function */
782 _DtHelpCeIconvContextRec * ic;
790 if (!ret_iconvContext || !fromCode || !toCode )
791 return -1; /* RETURN error */
794 ic = *ret_iconvContext = calloc(1,sizeof(_DtHelpCeIconvContextRec));
795 if (NULL == ic) return -3; /* RETURN error */
797 if ( strcmp(fromCode,toCode) == 0 )
799 ic->cd = (iconv_t) BAD; /* BAD means use strdup() */
803 ic->cd = iconv_open(toCode,fromCode);
804 if ( ic->cd == (iconv_t) BAD )
806 err = -4; /* error */
811 ic->fromCodeset = strdup(fromCode);
812 ic->toCodeset = strdup(toCode);
813 return err; /* RETURN status */
816 /*****************************************************************************
817 * Function: void _DtHelpCeIconvStr (string)
820 * iconvContext context for the conversion
821 * fromStr; string to convert
822 * ret_toStr; converted str; this string is malloc'd by this routine
823 * and the CALLER MUST FREE it when no longer needed.
824 * toStrBuf; for efficiency, can pass in a buf
825 * toStrBufLen; length of buf
829 * -1: missing (NULL) argument
830 * -2: no translation available from fromCode to toCode
831 * -3: couldn't allocate memory
832 * -4: couldn't start conversion
833 * -5: incomplete multibyte char
834 * -6: invalid char found
837 * Converts string from fromCode to toCode using iconv(3)
838 * If toStrBuf is NULL, memory for the converted string
839 * will be malloced as needed.
840 * If toStrBuf is not NULL, the conversion will use up
841 * to toStrBufLen bytes of the buffer and then realloc
842 * more memory if needed.
843 * If toStrBuf is not NULL, the size of the buf is
844 * returned in ret_toStrLen; otherwise, the value is
846 * ret_toStr receives the pointer to the buf, which may
847 * be different from toStrBuf if memory was allocated
848 * or NULL if an error occurred. If toStrBuf is
849 * not NULL and memory must be allocated, a realloc()
850 * call is used, possibly invalidating the toStrBuf ptr.
851 * ret_toStrLen receives the length of the buffer if
852 * toStrBuf is not NULL. If it is NULL, the length
855 * iconv(3) is standardized in XPG4, which is just starting
856 * to be supported. Below are several different implementations
857 * of _DtHelpCeIconvStr, each using what is available on different
858 * platforms. If no code is #ifdef'd, the XPG4 code defaults.
859 *****************************************************************************/
860 int _DtHelpCeIconvStr(
861 _DtHelpCeIconvContext iconvContext, /* iconv */
862 const char * fromStr, /* string to convert */
863 char * * ret_toStr, /* converted str */
864 size_t * ret_toStrLen, /* converted str */
865 char * toStrBuf, /* for efficiency, can pass in a buf */
866 size_t toStrBufLen) /* length of buf */
867 { /* XPG4-compliant code (args dflt1 & dflt2 are ignored in this code) */
868 char * toStr; /* ptr to tostr memory */
869 int toStrSize; /* size of mallocd string */
870 size_t inBytesLeft; /* bytes left to use from input buf */
871 const char * inChar; /* ptr into fromstr */
872 char * outChar; /* ptr into tostr */
873 size_t outBytesLeft; /* bytes left in the output str */
874 int err; /* error code of function */
879 /* init ret values; allows function to be called nicely in a loop. */
880 if (ret_toStr) *ret_toStr = toStrBuf;
881 if (ret_toStrLen) *ret_toStrLen = toStrBufLen;
884 if ( !iconvContext || !fromStr || !ret_toStr
885 || (!ret_toStrLen && toStrBuf))
886 return -1; /* RETURN error */
888 /* just do a straight copy if codesets the same or invalid context */
889 if ( iconvContext->cd == (iconv_t) BAD )
891 if (NULL == toStrBuf)
893 *ret_toStr = strdup(fromStr);
895 else /* reuse the buffer */
897 int len = strlen(fromStr) + 1;
898 if (len > toStrBufLen)
899 *ret_toStr = realloc(toStrBuf,len);
902 *ret_toStr = toStrBuf;
905 /* set return values */
906 strcpy(*ret_toStr,fromStr);
909 return (NULL != *ret_toStr ? 0 : -3); /* RETURN result */
915 toStrSize = (NULL == toStrBuf ? 0 : toStrBufLen);
917 inBytesLeft = strlen(fromStr);
919 outBytesLeft = toStrSize;
921 /* translate the string */
923 while ( inBytesLeft > 0 )
925 /* convert a character */
926 if(iconv(iconvContext->cd, (ICONV_INBUF_TYPE)&inChar,
927 &inBytesLeft,&outChar,&outBytesLeft) == -1)
931 case 0: /* no error */
933 case EINVAL: /* inChar pts to incomplete multibyte char */
934 inBytesLeft = 0; /* end the translation */
937 case EILSEQ: /* invalid char at inChar */
938 inBytesLeft = 0; /* end the translation */
941 case E2BIG: /* no room in toStr */
942 /* provide enough mem in the toStr */
943 if (outBytesLeft < sizeof(wchar_t))
945 size_t offset = outChar - (char *) toStr;
947 outBytesLeft += MEM_INC;
948 toStrSize += MEM_INC;
949 toStr = realloc(toStr,toStrSize * sizeof(char));
952 inBytesLeft = 0; /* end the translation */
956 outChar = (char *) (toStr + offset); /* recalc ptr */
960 inBytesLeft = 0; /* breakout of loop */
962 } /* switch on convertsion result */
964 } /* while chars left to translate */
967 if(outChar) *outChar = EOS;
969 /* set return values */
970 *ret_toStr = (char *) toStr;
971 if (toStrBuf) *ret_toStrLen = toStrSize;
973 return (NULL != toStr ? 0 : err); /* RETURN result */
977 /*****************************************************************************
978 * Function: void _DtHelpCeIconvClose()
981 * io_iconvContext; context
986 * Closes an iconv context used to convert
987 * fromCode to toCode using iconv(3)
990 * iconv(3) is standardized in XPG4, which is just starting
991 * to be supported. Below are several different implementations
992 * of _DtHelpCeIconvStr, each using what is available on different
993 * platforms. If no code is #ifdef'd, the XPG4 code defaults.
994 *****************************************************************************/
995 void _DtHelpCeIconvClose(
996 _DtHelpCeIconvContext * io_iconvContext)
997 { /* XPG4-compliant code (args dflt1 & dflt2 are ignored in this code) */
998 _DtHelpCeIconvContextRec * ic;
1000 if (!io_iconvContext || NULL == *io_iconvContext) return;
1001 ic = *io_iconvContext;
1003 if ( ic->cd != (iconv_t) BAD )
1004 iconv_close(ic->cd);
1005 if (ic->fromCodeset) free(ic->fromCodeset);
1006 if (ic->toCodeset) free(ic->toCodeset);
1008 *io_iconvContext = NULL;
1012 /*****************************************************************************
1013 * Function: void _DtHelpCeIconvContextSuitable()
1016 * iconvContext: context
1017 * fromCode: proposed fromCodeset
1018 * toCode: proposed toCodeset
1020 * Return Value: True: proposed conversion compatible with this context
1021 * False: proposed conversion is not compatible
1024 * Checks whether the proposed conversion from
1025 * fromCodeset to toCodeset can be handled by
1026 * the iconv context that already exists.
1029 * This function is designed to allow a context
1030 * to stay open as long as possible and avoid
1031 * closing and then reopening the contexts for
1032 * the same conversion.
1033 *****************************************************************************/
1034 int _DtHelpCeIconvContextSuitable(
1035 _DtHelpCeIconvContext iconvContext,
1036 const char * fromCode,
1037 const char * toCode)
1040 || !iconvContext->fromCodeset
1041 || !iconvContext->toCodeset
1046 if ( strcmp(iconvContext->fromCodeset,fromCode) == 0
1047 && strcmp(iconvContext->toCodeset,toCode) == 0)
1055 /*****************************************************************************
1056 * Function: _DtHelpCeStrCaseCmp
1062 * Purpose: IBM does not support the 'strcasecmp' routine. This takes it's
1064 *****************************************************************************/
1066 _DtHelpCeStrCaseCmp (
1070 return (_DtCvStrCaseCmp(s1, s2));
1074 /******************************************************************************
1075 * Function: int _DtHelpCeFreeStringArray (char **array)
1077 * Parameters: array A pointer to the NULL-terminated
1078 * string array which is to be freed.
1080 * Return Value: 0 if successful, -1 if a failure occurs
1082 * Purpose: Free the memory used for a NULL-terminated string array.
1084 ******************************************************************************/
1086 _DtHelpCeFreeStringArray (char **array)
1093 for (next = array; *next != NULL; next++)
1100 /****************************************************************************
1101 * Function: void **_DtHelpCeAddPtrToArray (void **array, void *ptr)
1103 * Parameters: array A pointer to a NULL-terminated array
1105 * ptr The pointer which is to be added to
1106 * the end of the array.
1108 * Returns: A pointer to the NULL-terminated array created
1109 * by adding 'ptr' to the end of 'array'.
1111 * Purpose: Add a new element to a NULL-terminated array of pointers.
1112 * These are typed as "void *" so that they can be used with
1113 * pointers to any type of data.
1115 ****************************************************************************/
1117 _DtHelpCeAddPtrToArray (
1121 return (_DtCvAddPtrToArray(array, ptr));
1124 /*****************************************************************************
1125 * Function: _DtHelpCeStrCaseCmpLatin1
1131 * Purpose: Use a version of CaseCmp that does not go through tolower().
1132 * This routine should be used only for compares that now the
1133 * strings are in English (iso8859-1) and do not want setlocale
1135 *****************************************************************************/
1137 _DtHelpCeStrCaseCmpLatin1 (
1141 return (_DtCvStrCaseCmpLatin1(s1, s2));
1144 /*****************************************************************************
1145 * Function: _DtHelpCeStrNCaseCmpLatin1
1151 * Purpose: Use a version of CaseCmp that does not go through tolower().
1152 * This routine should be used only for compares that now the
1153 * strings are in English (iso8859-1) and do not want setlocale
1155 *****************************************************************************/
1157 _DtHelpCeStrNCaseCmpLatin1 (
1162 return (_DtCvStrNCaseCmpLatin1(s1, s2, n));