2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $TOG: FormatMan.c /main/14 1999/10/14 15:06:59 mgreess $ */
24 /************************************<+>*************************************
25 ****************************************************************************
29 ** Project: Text Graphic Display Library
31 ** Description: This body of formats the input into a form understood by
35 ** (c) Copyright 1987, 1988, 1989, 1990, 1991, 1992 Hewlett-Packard Company
37 ** (c) Copyright 1993, 1994 Hewlett-Packard Company
38 ** (c) Copyright 1993, 1994 International Business Machines Corp.
39 ** (c) Copyright 1993, 1994 Sun Microsystems, Inc.
40 ** (c) Copyright 1993, 1994 Novell, Inc.
44 ****************************************************************************
45 ************************************<+>*************************************/
52 #include <sys/utsname.h>
68 #include "CanvasSegP.h"
74 #include "DisplayAreaP.h"
77 #include "FontAttrI.h"
78 #include "FormatUtilI.h"
79 #include "FormatCCDFI.h"
80 #include "FormatManI.h"
81 #include "HelpXlate.h"
82 #include "XInterfaceI.h"
87 /******************************************************************************
89 * Private variables and defines.
91 *****************************************************************************/
100 #define STDOUT_FD (1) /* File descriptor for std. output. */
101 #define STDERR_FD (1) /* File descriptor for std. error. */
103 #define PIPE_OUT (0) /* The output end of a pipe. */
104 #define PIPE_IN (1) /* The input end of a pipe. */
106 static char *ScanString = "<\n\\ ";
107 static char *EndToken = "</>";
108 static char *BoldToken = "<weight bold>";
109 static char *ItalicToken = "<angle italic>";
111 static int EndTokenSize = 3;
112 static int BoldTokenSize = 13;
113 static int ItalicTokenSize = 14;
115 /******** Private Function Declarations ********/
116 static int FormatManPage(
121 _DtHelpFontHints *font_attr,
125 /******** End Public Function Declarations ********/
127 #define FileStream(f) ((FILE *)(f)->hidden)
129 /******************************************************************************
133 *****************************************************************************/
134 /*********************************************************************
135 * Procedure: GetNextState (char c, wchar_t wide_c, int length, char *str,
136 * wchar_t ret_wc, int ret_wc_len, char ret_str_ptr,
139 * Parameters: wide_c Specifies the wide char value of the
140 * previous character.
141 * length Specifies the length in bytes of the
142 * previous character.
143 * str Specifies the pointer to the string of
144 * bytes to be processed next.
145 * ret_wc Returns the wide character that is to be
146 * italicized iff the state returned is Italic.
147 * ret_wc_len Returns the length in bytes of ret_wc.
148 * ret_str_ptr Returns the pointer to the next character
149 * after the bold or italic character.
150 * ret_flag Returns whether we have encountered a
151 * + followed by a _, to convert to +/-
153 * Returns: Bold, Italic, Char
155 * Purpose: To determine if the sequence of characters represent
156 * a bold or italic character.
158 *********************************************************************/
173 /* check to see if we have the pattern of a bold + followed by a bold _
174 purpose is to eventually convert this to +/-
175 This is a hack because the nroff output produced for the sequence
176 of a + with an _ UNDER it, as in indicating statistical significance,
177 looks like a bold +, backspace, and then a bold _.
178 However, the nroff output indicates <c>bs_ as an italic, so
179 this sequence has to be treated as a special case. */
183 if (str[7] == '\010' && str[8] == '_' &&
184 str[9] == '\010' && str[10] == '_' &&
185 str[11] == '\010' && str[12] == '_' &&
186 str[13] == '\010' && str[14] == '_' )
192 /* We need to eat the bold '_' -- continuation of aforementioned hack */
193 if (TRUE == *ret_flag)
196 str[0] == '\010' && str[1] == '_' &&
197 str[2] == '\010' && str[3] == '_' &&
198 str[4] == '\010' && str[5] == '_' &&
199 str[6] == '\010' && str[7] == '_' )
202 *ret_str_ptr = str + 8;
208 * check for multi-byte italic which is '_ _bsbs<c>'
210 if (len == 1 && c == '_' && mblen (str, MB_CUR_MAX) == 1 && *str == '_')
217 * check for backspaces equal to the number of bytes making up wide_c.
221 if (mblen (str, MB_CUR_MAX) != 1 || *str != '\010')
228 * convert the next character
230 *ret_wc_len = mbtowc(ret_wc, str, MB_CUR_MAX);
235 if ((*ret_wc_len) < 1)
239 * check to make sure the next character is equal to the first character
240 * if so, the sequence was <c>bs<c> indicating a bold character.
242 if (*ret_wc_len == len && *ret_wc == wide_c)
245 * Do not set the return string ptr prior to this return.
246 * If the user specifies the same variable for both the str and
247 * ret_str_ptr, then we will overwrite it improperly if the
248 * sequence is not an italic or bold.
250 *ret_str_ptr = str + *ret_wc_len;
255 * Check to see if the sequence is _bs<c> for italics.
256 * If so, take the next character for the ret_c (even if it
257 * is a multi-byte character). ret_wc, and ret_wc_len are
258 * already set from the mbtowc call. Move the ptr to after
259 * the processed character.
261 if (len == 1 && c == '_')
264 *ret_str_ptr = str + *ret_wc_len;
269 * Check to see if the sequence is <c>bs_ for italics.
270 * If so, return the given c for the ret_c (even if it
271 * is a multi-byte character). Set ret_wc and ret_wc_len
272 * to the passed in wide character and wide character len.
273 * Move the ptr to after the underscore.
275 if (*ret_wc_len == 1 && *str == '_')
278 *ret_str_ptr = str + *ret_wc_len;
285 * failed all the tests.
286 * It could be a character followed by backspaces
287 * followed by another character (which would cause over-
288 * striking), but we don't know how to deal with this.
293 /******************************************************************************
295 * Formatting a man page is done based on the current input and the
296 * current state of the processing.
299 * Bold multi-byte characters have one backspace per byte. Therefore,
300 * a two-byte character would be be followed by two backspaces
301 * and the same two-bytes to indicate that the character is to
302 * be a bold character.
303 * Italic characters can either be preceded by a backspace/underline
304 * or followed by a backspace/underline. I.E. _bsAB<bs_
305 * would be an italic capitol A, a regular capital B and
306 * an italic less than sign.
307 * All backslashes and less than signs are output with a backslash
310 * The following characters have special meaning in the state table:
311 * <c> indicates a character.
312 * <?> indicates the next character in the sequence.
313 * <_> indicates an underscore character.
314 * bs indicates a backspace character.
315 * nl indicates a new-line character.
317 * Cur State Cur Sequence Action New State
318 * ------------------------------------------------------------------------
319 * Char <c>bs<c><?> Bold -> buf Bold
324 * <_>bs<c><?> Italic -> buf Italic
328 * <c>bs<_><?> Italic -> buf Italic
332 * nl<?> dump buf Char
335 * <c><?> <c> -> buf Char
339 * <c> = lastChar skip to <?> Bold
341 * nl<?> <\> -> buf Char
345 * <c><?> <\> -> buf BoldDone
347 * BoldDone <c>bs<c><?> erase <\> Bold
352 * <_>bs<c><?> Italic -> buf Italic
356 * <c>bs<_><?> Italic -> buf Italic
360 * nl<?> dump buf Char
363 * <c><?> <c> -> buf Char
366 * Italic <c>bs<c><?> <\> -> buf Char
368 * <_>bs<c><?> <c> -> buf Italic
371 * <c>bs<_><?> <c> -> buf Italic
374 * bs<c><?> erase lastChar BoldItalic
380 * nl<?> <\> -> buf Char
384 * <c><?> </> -> buf Char
386 * BoldItalic bs<c><?> skip to <?> BoldItalic
388 * <c><?> <\> -> buf BoldItalicDone
390 * BoldItalicDone _bs<c>bs<c><?> erase <\> ItalicBold
395 * _bs<c><?> <c> -> buf Italic
399 * <c><?> <\> ->buf Char
404 ******************************************************************************/
405 /******************************************************************************
406 * Function: RemoveToken
407 ******************************************************************************/
414 *out_size = *out_size - size;
417 ((*out_buf)[*out_size] == '\\' || (*out_buf)[*out_size] == '<'))
418 *out_size = *out_size - 1;
420 (*out_buf)[*out_size] = '\0';
423 /*********************************************************************
424 * Function: WriteToken
425 *********************************************************************/
436 return (_DtHelpCeAddStrToBuf(&ptr, out_buf, out_size, out_max, size, 128));
439 /*********************************************************************
440 * Function: WriteOutChar
441 *********************************************************************/
455 if (last_len == 1 && c == '<')
457 else if (last_len == 1 && c == '\\')
461 (void) wctomb (temp, last_wc);
462 temp[last_len] = '\0';
466 /* here is the conversion for the bold '+' followed by bold '_'
467 part of a hack to treat this special character sequence */
468 if (flag && c == '+')
471 return (_DtHelpCeAddStrToBuf(&ptr, out_buf, out_size, out_max,
475 /*********************************************************************
476 * Function: FormatManPage
478 * FormatManPage is the top entry point for formating man pages
479 * into a form understood by a display area.
481 *********************************************************************/
488 _DtHelpFontHints *font_attr,
505 Boolean flag = False;
507 enum State state = Char;
509 cread = strlen (rloc);
514 * while I can read information process; loop.
516 while (result != -1 && cread > 0)
519 * check for the size of the character
521 checkLen = mblen(rloc, MB_CUR_MAX);
524 * if we hit a null character before we've run out of characters,
525 * we've got corrupt data.
533 * check for end of line
535 if (checkLen == 1 && *rloc == '\n')
538 if (state == Bold || state == Italic)
539 result = WriteToken(EndToken, EndTokenSize,
540 out_buf, out_size, out_max);
543 result = _DtHelpCeAddCharToBuf (&rloc, out_buf,
544 out_size, out_max, 128);
547 result = __DtHelpCeProcessString(
557 (*out_buf)[0] = '\0';
569 * get the character and wide character
570 * representation of the next character.
573 lastLen = mbtowc (&lastWC, rloc, MB_CUR_MAX);
576 * skip past this character.
578 rloc = rloc + lastLen;
579 cread = cread - lastLen;
582 * Check ahead for bold or italic sequences
584 newState = GetNextState (c, lastWC, lastLen,
586 &retC, &retWC, &retWCLen,
589 if (newState == Bold)
591 if (state == BoldDone)
592 RemoveToken(EndTokenSize, out_buf, out_size);
594 result = WriteToken(BoldToken,
600 * skip the backspaces and the extra
601 * copy of the character.
603 cread = cread - (retStrPtr - rloc);
606 else if (newState == Italic)
608 if (state != BoldItalicDone)
609 result = WriteToken(ItalicToken,
615 * skip the blanks after the current
616 * character plus the character after
617 * that. The returned wide character
618 * is the character that is to be
621 cread = cread - (retStrPtr - rloc);
628 if (state == BoldItalicDone &&
629 GetNextState (c, lastWC, lastLen, rloc,
630 &retC, &retWC, &retWCLen,
631 &retStrPtr, &flag) == Bold)
633 RemoveToken(EndTokenSize, out_buf, out_size);
634 newState = BoldItalic;
637 else if (state == BoldItalicDone)
638 result = WriteToken(EndToken, EndTokenSize,
645 result = WriteOutChar(lastLen, lastWC, c,
646 out_buf, out_size, out_max, flag);
651 if (GetNextState (c, lastWC, lastLen, rloc,
652 &retC, &retWC, &retWCLen,
653 &retStrPtr, &flag) == Bold)
655 /* skip backspaces and copy characters */
656 cread = cread - (retStrPtr - rloc);
661 result = WriteToken(EndToken, EndTokenSize,
662 out_buf, out_size, out_max);
663 if (state == BoldItalic)
664 state = BoldItalicDone;
672 newState = GetNextState (c, lastWC, lastLen,
674 &retC, &retWC, &retWCLen,
677 if (newState == Italic)
680 cread = cread - (retStrPtr - rloc);
686 result = WriteOutChar(lastLen, lastWC, c,
687 out_buf, out_size, out_max, flag);
689 else if (italicCnt == 1 && lastWC == retWC
692 RemoveToken(lastLen, out_buf, out_size);
694 result = WriteToken(BoldToken,
699 cread = cread - (retStrPtr - rloc);
705 result = WriteOutChar(lastLen, lastWC, c,
706 out_buf, out_size, out_max, flag);
712 result = WriteToken(EndToken,
724 if (cread < (3 * ((int) MB_CUR_MAX)) &&
725 !feof (FileStream(in_file)))
731 * if it is an invalid character - skip.
733 * If this is the start of a multi-byte character,
734 * I must save it and try again on the next read.
736 if (cread < ((int) MB_CUR_MAX))
741 * otherwise we've got corrupt data.
747 if (result != -1 && !feof(FileStream(in_file)))
749 if (_DtHelpCeGetNxtBuf (in_file, in_buf, &rloc, in_size) == -1)
753 cread = strlen (rloc);
755 } while (result != -1 && cread > 0);
759 } /* End FormatManPage */
761 /******************************************************************************
765 *****************************************************************************/
766 /*********************************************************************
767 * Function: _DtHelpFormatManPage
769 * _DtHelpFormatManPage formats a man page
770 * into a form understood by a display area.
772 *********************************************************************/
774 _DtHelpFormatManPage(
775 XtPointer client_data,
777 XtPointer *ret_handle)
781 int writeBufSize = 0;
784 char *writeBuf = NULL;
785 char readBuf[BUFSIZ];
786 _DtHelpFontHints fontAttr;
788 _DtCvTopicInfo *topicStruct;
789 BufFilePtr myBufFile;
790 static char manString[] = "man ";
791 static char devNullString[] = " 2>/dev/null";
792 _DtCvTopicPtr topic = NULL;
793 DtHelpDispAreaStruct *pDAS = (DtHelpDispAreaStruct *) client_data;
794 _FrmtUiInfo myUiInfo = { NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 1, False };
797 * fill out the ui information
799 myUiInfo.load_font = _DtHelpDAResolveFont;
800 myUiInfo.client_data = (_DtCvPointer) pDAS;
801 myUiInfo.avg_char = (int)(pDAS->charWidth / 10 +
802 ((pDAS->charWidth % 10) ? 1 : 0));
803 myUiInfo.nl_to_space = pDAS->nl_to_space;
806 * pre-append the man command to man specification
808 ptr = (char *) malloc(sizeof(manString) + strlen(man_spec) +
809 sizeof(devNullString) - 1);
812 strcpy (ptr, manString);
813 strcat (ptr, man_spec);
814 strcat (ptr, devNullString);
816 myFile = popen(ptr, "r");
819 * free the man command
826 if (!myFile) /* couldn't create man(1) process */
830 * make sure we don't try to read compressed.
832 myBufFile = _DtHelpCeCreatePipeBufFile(myFile);
833 if (myBufFile == NULL)
835 (void) pclose(myFile); /* don't check for error, it was popen'd */
840 * get the font quark list - but force to mono-space
842 _DtHelpCeCopyDefFontAttrList (&fontAttr);
843 fontAttr.spacing = _DtHelpFontSpacingMono;
844 _DtHelpCeXlateOpToStdLocale(DtLCX_OPER_SETLOCALE,setlocale(LC_CTYPE,NULL),
845 NULL, &(fontAttr.language), &(fontAttr.char_set));
847 myVars = __DtHelpCeSetUpVars(fontAttr.language, fontAttr.char_set, &myUiInfo);
850 free(fontAttr.language);
851 free(fontAttr.char_set);
852 _DtHelpCeBufFileClose (myBufFile, True);
859 result = _DtHelpCeGetNxtBuf (myBufFile, readBuf, &ptr, BUFSIZ);
861 result = FormatManPage (myVars, myBufFile,
869 if ((result != -1) && writeBufSize)
870 result = __DtHelpCeProcessString(myVars, NULL, _DtCvLITERAL,
871 ScanString, writeBuf, writeBufSize,
872 0, False, &fontAttr);
883 _DtHelpCeBufFileClose (myBufFile, True);
886 * clean up the last segment.
889 __DtHelpCeGetParagraphList (myVars, True, _DtCvLITERAL, &topic);
891 topicStruct = (_DtCvTopicInfo *) (topic);
894 * did we have any paragraphs to format?
896 if ((topic != NULL && NULL == topicStruct->seg_list)
897 || NULL == _DtCvContainerListOfSeg(topicStruct->seg_list))
899 _DtHelpFreeSegments(topicStruct->seg_list, _DtCvFALSE,
900 NULL, (_DtCvPointer) pDAS);
901 free ((char *) topicStruct);
903 errno = ENOENT; /* we'll just assume no man page existed */
907 free(fontAttr.language);
908 free(fontAttr.char_set);
911 *ret_handle = (_DtCvPointer) topic;
915 } /* End _DtHelpFormatManPage */