2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * COMPONENT_NAME: austext
26 * FUNCTIONS: change_max_wordsize
28 * confirm_ok_to_overwrite
38 * (C) COPYRIGHT International Business Machines Corp. 1993,1996
40 * Licensed Materials - Property of IBM
41 * US Government Users Restricted Rights - Use, duplication or
42 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
44 /************************* DTSRCREATE.C **************************
45 * $XConsortium: dtsrcreate.c /main/9 1996/09/23 21:02:04 cde-ibm $
47 * Program formerly named initausd, in module initausd.c.
48 * Essentially performs the same function as vista's initdb,
49 * but uses only the dtsearch.dbd and renames the files during creation.
50 * Also creates and initializes the first slot, the 'dbrec'.
53 * Revision 2.8 1996/03/25 18:53:33 miker
54 * Changed FILENAME_MAX to _POSIX_PATH_MAX.
56 * Revision 2.7 1996/02/01 18:16:16 miker
57 * Changed some arg defaults depending on DTSEARCH definition.
58 * Deleted BETA definition.
60 * Revision 2.6 1995/12/27 16:10:03 miker
61 * Permit -wx before -wn on command line.
63 * Revision 2.5 1995/12/07 23:27:51 miker
64 * Fixed bug: minwordsz was > max when max inited to -1.
65 * Changed 'Engine Type' to 'Flavor' for AusBuild.
67 * Revision 2.4 1995/10/25 21:09:42 miker
70 * Revision 2.3 1995/10/20 21:28:25 miker
71 * Intelligently look for dtsearch.dbd in 3 places.
73 * Revision 2.2 1995/10/19 21:16:17 miker
74 * Internally rename database files so it doesn't have to be
75 * done at open time. Ask permission to overwrite preexisting
76 * files. Always create databases from current model dtsearch.dbd
77 * (elminate SECOND CASE). Coincides with libDtvis enhancements.
79 * Revision 2.1 1995/09/22 19:32:18 miker
80 * Freeze DtSearch 0.1, AusText 2.1.8
82 * Revision 1.2 1995/09/19 21:56:53 miker
83 * Enabled Japanese language DtSrJPN.
84 * If DtSearch, use DtSrVERSION instead of AUSAPI_VERSION in banner.
86 * Revision 1.1 1995/08/31 20:50:28 miker
93 #include <sys/types.h>
98 #define PROGNAME "DTSRCREATE"
99 #define DEFAULT_MINWORD (MINWIDTH_TOKEN + 1)
100 #define STANDARD_MAXWORD (DtSrMAXWIDTH_HWORD - 1)
102 #define MS_initausd 12
103 #define FNAME_MODEL "dtsearch.dbd"
105 /* The following MUST MATCH vista's dbtype.h! */
106 #define SIZEOF_FILE_ENTRY 252 /* sizeof(FILE_ENTRY) */
107 #define DBD_COMPAT_LEN 6
108 #define START_OF_FT (DBD_COMPAT_LEN + (8 * sizeof(INT)))
110 /* Values for 'flavor' global variable */
111 #define AUSTEXT_FLAVOR 'a'
112 #define DTSEARCH_FLAVOR 'd'
114 /*------------------ GLOBALS -------------------*/
115 static int abstrsz = -1;
116 static char dbname [12] = "<dbname>";
117 struct or_dbrec dbrec;
118 static int debug_mode = FALSE;
119 static char default_cant_open_msg[] =
121 static int fzkeysz = 0;
122 static int flavor = DTSEARCH_FLAVOR;
123 static int language = DtSrLaENG;
124 static int minwordsz = DEFAULT_MINWORD;
125 static int maxwordsz = INT_MAX;
126 static int max_ormisc_size;
127 static int maxwidth_lword;
128 static int maxwidth_sword;
129 static char modelpath [_POSIX_PATH_MAX];
130 /* path/name of model dbd file */
131 static char newpath [_POSIX_PATH_MAX];
132 /* path/name for each renamed file */
133 static char *newextp; /* loc where extension suffixes placed */
134 static int ok_to_overwrite = FALSE;
135 static long path_offset = 0;
136 static int quiet_mode = FALSE;
138 static char *exttab[] = {
139 /* Must be in same order as model .dbd file tables */
140 ".d00", ".d01", ".d21", ".d22", ".d23",
141 ".k00", ".k01", ".k21", ".k22", ".k23",
144 /* Same as MS_initausd, 213... */
145 static char default_unable_to_open_msg[] =
146 "%1$s Unable to open '%2$s':\n %3$s.\a\n";
149 /************************************************/
151 /* confirm_ok_to_overwrite */
153 /************************************************/
154 /* Called whenever we are about to write a new file.
155 * Checks to see if file preexists. If it does,
156 * and user has never said it's ok to overwrite,
157 * prompts for permission to overlay all preexisting files.
158 * If 'yes', never asks again. If 'no', exits.
159 * Returns if ok to overwrite, else exits.
161 static void confirm_ok_to_overwrite (char *fname)
168 if ((fptr = fopen (newpath, "r")) == NULL)
172 printf ( catgets(dtsearch_catd, MS_initausd, 12,
173 "\nFile '%s' already exists.\n"
174 "Is it ok to overwrite it and other database files? [y,n] ") ,
176 i = tolower (getchar());
178 ok_to_overwrite = TRUE;
182 } /* confirm_ok_to_overwrite() */
185 /************************************************/
187 /* change_max_wordsize */
189 /************************************************/
190 /* Subroutine of user_args_processor().
191 * Adjusts maxwordsz per user request and allowed sizes of schema.
193 static int change_max_wordsize (char *new_size)
197 maxwordsz = users_newsize = atoi (new_size);
199 /* error if min and max specifications incompatible */
200 if (minwordsz > maxwordsz) {
201 printf (catgets (dtsearch_catd, MS_initausd, 5,
202 PROGNAME" Minimum word size %d greater "
203 "than maximum word size %d.\n"),
204 minwordsz, maxwordsz);
208 /* If necessary, adjust to nearest logical maxwordsz */
209 if (maxwordsz != maxwidth_sword &&
210 maxwordsz != maxwidth_lword &&
211 maxwordsz != DtSrMAXWIDTH_HWORD - 1) {
212 if (maxwordsz < maxwidth_sword)
213 maxwordsz = maxwidth_sword;
214 else if (maxwordsz < maxwidth_lword)
215 maxwordsz = maxwidth_lword;
217 maxwordsz = DtSrMAXWIDTH_HWORD - 1;
220 if (maxwordsz != users_newsize)
221 printf (catgets (dtsearch_catd, MS_initausd, 8,
222 PROGNAME " Adjusted maximum word size to %d.\n"),
225 /* Give user a final warning about large word sizes */
226 if (maxwordsz > STANDARD_MAXWORD && language != DtSrLaDEU && !quiet_mode)
227 printf ("%s", catgets (dtsearch_catd, MS_initausd, 10,
228 PROGNAME" Specifying large maximum word sizes may "
229 "significantly\n increase storage requirements.\n"));
231 } /* change_max_wordsize() */
234 /************************************************/
236 /* change_min_wordsize */
238 /************************************************/
239 /* Subroutine of user_args_processor().
240 * Adjusts minwordsz per user request.
242 static int change_min_wordsize (char *new_size)
244 int old_minwordsz = minwordsz;
245 if ((minwordsz = atoi (new_size)) < 0)
248 /* error if min and max specifications incompatible */
249 if (minwordsz > maxwordsz) {
250 printf (catgets (dtsearch_catd, MS_initausd, 5,
251 PROGNAME " Minimum word size %d greater than "
252 "maximum word size %d.\n"),
253 minwordsz, maxwordsz);
258 if (minwordsz != old_minwordsz)
259 printf (catgets (dtsearch_catd, MS_initausd, 6,
260 PROGNAME " Adjusted minimum word size to %d.\n"),
263 /* give user a warning about short word sizes */
264 if (minwordsz < DEFAULT_MINWORD)
265 printf ("%s", catgets (dtsearch_catd, MS_initausd, 9,
266 PROGNAME " Specifying small minimum word sizes"
267 " may require extensive\n"
268 " editing of stopword file to prevent significantly\n"
269 " increased index storage requirements.\n"));
272 } /* change_min_wordsize() */
275 /************************************************/
279 /************************************************/
280 static void print_usage (void)
284 printf (catgets (dtsearch_catd, MS_initausd,
286 "\nUSAGE: %s [-options] dbname\n"
287 " Creates and initializes DtSearch/AusText database files.\n"
288 " -q Do not print information messages.\n"
289 " -o Ok to overwrite preexisting database.\n"
290 " -a<n> Set maximum abstract size to <N> (default per flavor).\n"
291 " -d<dir> Dir containing "FNAME_MODEL" file if not in dbname dir.\n"
292 " -wn<n> Change minimum word size to <N>. Default is %d.\n"
293 " -wx<n> Change maximum word size to <N>. Default per language.\n"
294 " ---------- Database Flavor ----------\n"
295 " -fd DtSearch flavor. No documents, only document references\n"
296 " in abstracts (default).\n"
297 " -fa AusText flavor. Documents stored in central server repository.\n"
298 " ------------ Supported Languages ------------\n"
299 " -l<n> Set language number to <N>. Default is 0. Supported values:\n"
301 " 1 English-Latin1\n"
306 " 6 Japanese-autoknj\n"
307 " 7 Japanese-knjlist\n"
308 " <dbname> Optional path prefix, then 1 - 8 character\n"
309 " database name. Do not specify 'austext' or 'dtsearch'.\n"),
310 aa_argv0, DEFAULT_MINWORD);
313 } /* print_usage() */
316 /************************************************/
318 /* user_args_processor */
320 /************************************************/
321 /* Handles command line arguments for main().
322 * Initializes global variables.
324 static void user_args_processor (int argc, char **argv)
327 int remaining_slot_space;
330 /* Initialize variables prior to parsing command line */
339 /* Each pass grabs new parm of "-xxx" format */
350 case 'r': /* unadvertised debug mode */
351 if (strcmp (ptr, "-russell") == 0) {
353 puts ("001*** debug mode.");
358 printf (catgets (dtsearch_catd, MS_misc, 9,
359 "%sInvalid command line argument '%s'.\a\n"),
360 "\n"PROGNAME" ", ptr);
366 /* zero length abstract may be explicity specified */
367 abstrsz = atoi (ptr + 2);
368 if (abstrsz < 0 || (abstrsz == 0 && ptr[2] != '0'))
377 ok_to_overwrite = TRUE;
383 case DTSEARCH_FLAVOR:
391 case 'w': /* change min (-wn..) or max (-wx..) word size */
394 if (!change_max_wordsize (ptr + 3))
398 if (!change_min_wordsize (ptr + 3))
406 case 'd': /* special path name for model .dbd */
407 strncpy (modelpath, ptr + 2, sizeof(modelpath));
408 modelpath [sizeof(modelpath) - sizeof(FNAME_MODEL) - 4] = 0;
409 ensure_end_slash (modelpath);
410 strcat (modelpath, FNAME_MODEL);
414 /* Note that custom, unsupported languages
415 * greater than DtSrLaLAST are permitted.
417 language = atoi (ptr + 2);
420 if (!quiet_mode && language > DtSrLaLAST)
421 printf ( catgets(dtsearch_catd, MS_initausd, 13,
422 "%s Warning! you have specified "
423 "an unsupported, custom language.\n"
424 " You will have to provide your own "
425 "language loaders at run time\n"
426 " in user function 'load_custom_language' "
427 "to access this database.\a\n"),
432 printf (catgets (dtsearch_catd, MS_misc, 10,
433 "%sIgnored unknown command line argument '%s'.\n"),
437 } /* end parse of cmd line options beginning with '-' */
439 /* Only required arg is new database name,
440 * including optional path prefix.
441 * Load newpath and newextp, leaving room
442 * for long dbnames and .xxx extensions.
446 printf (catgets (dtsearch_catd, MS_misc, 18,
447 "%sDatabase name not specified.\n\a"), "\n"PROGNAME" ");
450 strncpy (newpath, argv[0], sizeof (newpath));
451 newpath [sizeof(newpath) - 12] = 0;
452 newextp = newpath + strlen (newpath);
454 /* Get just the 1 - 8 char database name by moving ptr
455 * backwards until first non-alphanumeric character
456 * (such as a ":" in the dos drive id or a slash between directories),
457 * or to the beginning of string.
458 * Then test database name for validity.
460 for (ptr = newpath + strlen(newpath) - 1; ptr >= newpath; ptr--)
461 if (!isalnum (*ptr)) {
468 if (i < 1 || i > 8) {
471 printf (catgets (dtsearch_catd, MS_misc, 11,
472 "%sInvalid database name '%s'.\a\n"),
473 "\n"PROGNAME"346 ", ptr);
476 path_offset = ptr - newpath;
477 strcpy (dbname, ptr); /* save it */
478 if (strcmp (dbname, "austext") == 0 || strcmp (dbname, "dtsearch") == 0) {
482 /* Ensure semantic processing specified only for english language */
483 if (fzkeysz != 0 && language != DtSrLaENG && language != DtSrLaENG2) {
485 printf ( catgets(dtsearch_catd, MS_initausd, 14,
486 "\n%s semantic processing is only available "
487 "for English language databases.\n\a") ,
492 /* Unless overridden by user args,
493 * initialize abstract based on flavor.
494 * The abstract size defaults to the remaining
495 * space in the final misc slot after the fzkey.
496 * However if the user specified a specific
497 * abstract size, it may be adjusted later
498 * to fill up the last slot.
501 abstrsz = max_ormisc_size - (fzkeysz % max_ormisc_size);
503 /* Default maxword size is 'short', except for German */
504 if (maxwordsz == INT_MAX)
505 maxwordsz = STANDARD_MAXWORD;
508 printf ("002*** userargs: modelpath='%s' newpath='%s'\n"
509 " fzkeysz=%d abstrsz=%d\n",
510 modelpath, newpath, fzkeysz, abstrsz);
512 } /* user_args_processor() */
515 /************************************************/
517 /* remove_d9x_file */
519 /************************************************/
520 static void remove_d9x_file (char *extension)
522 strcpy (newextp, extension);
524 printf ("094*** delete '%s'.\n", newpath);
525 if (remove (newpath) != 0) {
526 /* 'file not found' is not an error */
527 if (errno != ENOENT) {
528 printf (catgets (dtsearch_catd, MS_initausd, 244,
529 PROGNAME "244 Unable to remove '%s': %s\n"),
530 newpath, strerror (errno));
535 } /* remove_d9x_file() */
538 /************************************************/
542 /************************************************/
543 /* Copies and moves binary contents in passed, preopened
544 * model .dbd file (f) to new dbd file in target directory.
545 * Rename the internal .d00, etc filenames to match dbname.
547 static void create_new_dbd (FILE *f)
549 FILE *g; /* target dbd file */
551 static char *nocopy_msg =
552 "%s Unable to copy '%s' to '%s':\n %s\a\n";
553 /* (Same as dtsearch.msg: MS_initausd, 214) */
554 static char zeros[] =
555 "\0\0\0\0\0\0\0\0\0\0\0\0";
557 strcpy (newextp, ".dbd");
559 printf (PROGNAME"507 create_new_dbd '%s'\n", newpath);
561 /* If new .dbd file preexists, make sure it is writable */
562 confirm_ok_to_overwrite (newpath);
563 if (chmod (newpath, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) {
564 if (errno != ENOENT) {
565 printf (catgets (dtsearch_catd, MS_initausd, 214, nocopy_msg),
566 PROGNAME"515", modelpath, newpath, strerror(errno));
571 if ((g = fopen (newpath, "w+b")) == NULL) {
572 printf (catgets (dtsearch_catd, MS_initausd, 214, nocopy_msg),
573 PROGNAME"509", modelpath, newpath, strerror(errno));
577 while ((i = fgetc (f)) != EOF)
580 printf (catgets (dtsearch_catd, MS_initausd, 214, nocopy_msg),
581 PROGNAME"531", modelpath, newpath, strerror(errno));
585 /* Now reposition the write head in the new dbd file
586 * to rename the filenames. Rename each internal file
587 * name to '<newdbname>.xxx'.
589 for (i = 0; exttab[i] != NULL; i++) {
590 fseek (g, START_OF_FT + (i * SIZEOF_FILE_ENTRY), SEEK_SET);
591 fprintf (g, "%s%s", dbname, exttab[i]);
592 fwrite (zeros, sizeof(char), sizeof(zeros), g);
595 /* The new dbd file only has to be readable */
597 chmod (newpath, S_IRUSR | S_IRGRP | S_IROTH);
599 } /* create_new_dbd() */
602 /************************************************/
606 /************************************************/
607 /* 1. CREATE or find database dictionary (.dbd file).
608 * 2. CREATE empty 'dtsearch' database files.
609 * 3. OPEN 'dtsearch' database.
610 * 4. INITIALIZE the database.
611 * 5. WRITE dbrec after initializing it.
612 * 6. RENAME each database file.
613 * 7. UNLINK (delete) d9x files.
615 int main (int argc, char *argv[])
620 struct or_miscrec miscrec;
621 struct or_swordrec swordrec;
622 struct or_lwordrec lwordrec;
624 setlocale (LC_ALL, "");
625 dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
628 max_ormisc_size = sizeof (miscrec.or_misc);
629 maxwidth_sword = sizeof (swordrec.or_swordkey) - 1;
630 maxwidth_lword = sizeof (lwordrec.or_lwordkey) - 1;
632 printf (catgets (dtsearch_catd, MS_misc, 4,
638 /* Handle cmd line args. Init global variables. */
639 user_args_processor (argc, argv);
641 /* ------- copy model .dbd to new .dbd ------- */
643 /* CASE 1: If user specified -d special alternative
644 * directory for model .dbd, it should be there.
646 if (modelpath[0] != 0) {
648 printf (PROGNAME"628 Try opening '%s' (-d dir).\n", modelpath);
649 if ((f = fopen (modelpath, "rb")) != NULL) {
651 puts (PROGNAME"638 Found it!");
658 printf (catgets (dtsearch_catd, MS_initausd, 213,
659 default_unable_to_open_msg),
660 "\n"PROGNAME"302", modelpath, strerror(errno));
665 /* CASE 2: If model .dbd is in current directory, use it.
666 * If error is anything other than 'cant find file', quit now.
669 printf (PROGNAME"649 Try opening '%s' (curr dir).\n", FNAME_MODEL);
670 if ((f = fopen (FNAME_MODEL, "rb")) != NULL) {
672 puts (PROGNAME"660 Found it!");
677 else if (errno != ENOENT) {
679 printf (catgets (dtsearch_catd, MS_initausd, 213,
680 default_unable_to_open_msg),
681 "\n"PROGNAME"655", FNAME_MODEL, strerror(errno));
683 } /* end else CASE 2 */
685 /* CASE 3: Last chance. Look for model .dbd in target directory.
686 * At this point have to quit on any error.
688 strcpy (modelpath, newpath);
689 strcpy (modelpath + path_offset, FNAME_MODEL);
691 printf (PROGNAME"672 Try opening '%s' (new dir).\n", modelpath);
692 if ((f = fopen (modelpath, "rb")) != NULL) {
694 puts (PROGNAME"675 Found it!");
701 puts (PROGNAME"682 Never found it!");
703 printf (catgets (dtsearch_catd, MS_initausd, 213,
704 default_unable_to_open_msg),
705 "\n"PROGNAME"686", FNAME_MODEL,
706 "Not found in either current or target directories. Use -d option\a");
712 /* Open a new database */
713 *newextp = 0; /* use no extension when opening database */
715 printf ("040*** d_open newpath = '%s'.\n", newpath);
716 d_open (newpath, "o");
717 if (db_status != S_OKAY) {
718 printf (catgets (dtsearch_catd, MS_initausd, 230,
719 PROGNAME "230 Could not open database '%s'.\n"), newpath);
720 puts (vista_msg (PROGNAME "231"));
723 austext_exit_dbms = (void (*) (int)) d_close; /* emerg exit func */
725 /* initialize the 'dtsearch' database */
727 printf ("042*** d_initialize.\n");
729 if (db_status != S_OKAY) {
730 printf (catgets (dtsearch_catd, MS_initausd, 239,
731 PROGNAME "239 Could not initialize database '%s'.\n"), newpath);
732 puts (vista_msg (PROGNAME "240"));
737 /* Create and initialize dbrec database header record in first slot.
738 * First fill entire record with binary zeros.
739 * Then set specific values as specified by flavor on command line.
740 * For now most values are hard-coded.
743 printf ("050*** create dbrec.\n");
744 memset (&dbrec, 0, sizeof (dbrec));
746 /* Init fields that are completely independent */
747 dbrec.or_language = (DtSrINT16) language;
748 dbrec.or_maxwordsz = (DtSrINT16) maxwordsz;
749 dbrec.or_minwordsz = (DtSrINT16) minwordsz;
750 dbrec.or_fzkeysz = (DtSrINT16) fzkeysz;
751 dbrec.or_abstrsz = (DtSrINT16) abstrsz;
752 dbrec.or_dbflags = ORD_NONOTES | ORD_NOMARKDEL | ORD_XWORDS;
753 strncpy (dbrec.or_version, SCHEMA_VERSION, sizeof(dbrec.or_version));
754 dbrec.or_version [sizeof(dbrec.or_version) - 1] = 0;
756 /* Load dbrec's recslots fields based on correct number
757 * of misc recs required to hold user's abstract.
758 * Round abstrsz upward if there is any space left on last misc rec.
760 dbrec.or_recslots = 1; /* start with obj rec itself */
761 for (i = dbrec.or_fzkeysz + dbrec.or_abstrsz; i > 0; i -= max_ormisc_size)
764 /* Add in difference to INCREASE abstrsz */
765 dbrec.or_abstrsz -= i;
766 printf (catgets (dtsearch_catd, MS_misc, 433,
767 "%1$sAdjusted maximum abstract size upward to %2$hd.\n"),
768 PROGNAME "433 ", dbrec.or_abstrsz);
771 /* Init fields that are dependent on language */
775 dbrec.or_dbflags |= ORD_XSTEMS;
781 /* Init fields that are dependent on flavor */
782 if (flavor == AUSTEXT_FLAVOR) {
783 dbrec.or_dbaccess = ORA_BLOB;
784 dbrec.or_compflags = ORC_COMPBLOB;
785 dbrec.or_hufid = -1L; /* -1 = use huffman compression, but
786 * hufid not yet known. */
787 dbrec.or_dbotype = DtSrObjTEXT;
789 else { /* default flavor == DTSEARCH_FLAVOR */
790 dbrec.or_dbaccess = ORA_NOTAVAIL;
794 /******putchar ('\n');******/
795 print_dbrec (newpath, &dbrec);
798 swab_dbrec (&dbrec, HTON);
800 printf ("060*** fillnew dbrec.\n");
801 d_fillnew (OR_DBREC, &dbrec, 0);
802 if (db_status != S_OKAY) {
803 printf ("%s", catgets (dtsearch_catd, MS_initausd, 509,
804 PROGNAME "509 Could not initialize database header record.\n"));
805 puts (vista_msg (PROGNAME "510"));
809 /* Close the database */
811 austext_exit_dbms = NULL; /* emerg exit no longer required */
813 /* Delete all nonvista (inverted index) database files (.d9x) */
814 remove_d9x_file (".d97");
815 remove_d9x_file (".d98");
816 remove_d9x_file (".d99");
818 *newextp = 0; /* no extension suffixes for next msgs */
819 printf (catgets (dtsearch_catd, MS_initausd, 24,
820 PROGNAME " Successfully initialized database '%s'.\n"), newpath);
825 /************************* DTSRCREATE.C **************************/