2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * COMPONENT_NAME: austext
26 * FUNCTIONS: TERMINATE_LINE
43 * (C) COPYRIGHT International Business Machines Corp. 1993,1995
45 * Licensed Materials - Property of IBM
46 * US Government Users Restricted Rights - Use, duplication or
47 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
49 /*************************** DTSRCLEAN.C ****************************
50 * $TOG: dtsrclean.c /main/9 1998/04/17 11:23:57 mgreess $
51 * Does garbage collection (ie compression) of .d99 file.
52 * Optionally verifies all database addresses in d99.
53 * Modification of clndtbs.c and checkd99.c.
54 * Does NOT use austext engine so this must be modified if schema changes.
57 * All command input is on command line. Reads existing d2x and d99 files.
60 * New .d2x and .d99 files are placed into the directory specified by user.
62 * EXIT CODE STANDARDS:
64 * 1 = warnings, but output should be ok.
65 * 2 = failure in cmd line parse or other initialization; job never started.
66 * 3 - 49 = fatal error, but output may be acceptable.
67 * 50 - 99 = fatal error and output files are probably unusable.
68 * (In this program, even input may be corrupted).
69 * 100+ = aborting due to asynchronous interrupt signal.
70 * Output files may or may not be unusable.
73 * Revision 2.4 1996/05/08 16:20:50 miker
74 * Added RENFILEs for new d2x files; austext_dopen no longer does.
76 * Revision 2.3 1996/02/01 18:13:06 miker
77 * Deleted BETA definition.
79 * Revision 2.2 1995/10/26 14:51:08 miker
80 * Renamed from mrclean.c. Added prolog.
83 * Revision 2.1 1995/09/22 21:18:52 miker
84 * Freeze DtSearch 0.1, AusText 2.1.8
86 * Revision 1.11 1995/09/05 18:16:46 miker
87 * Name, msg, and other minor changes for DtSearch..
88 * Print messages if austext_dopen() fails.
90 * Revision 1.10 1995/06/02 15:52:42 miker
91 * Cleaned up -m and bit vector overflow msgs.
93 * Revision 1.9 1995/05/30 19:15:58 miker
94 * Print beta char in startup banner msg.
95 * Remove -m option and max_totrecs; select bit vector
96 * size from maxdba, not reccount.
105 #include <sys/stat.h>
108 #include <sys/types.h>
109 #include <netinet/in.h>
111 #define MS_misc 1 /* msg catalog set number */
112 #define MS_dtsrclean 26 /* msg catalog set number */
113 #define DISCARD_FORMAT "%s\t\"%s\"\t%s\t%s\n" /* copied from oe.h */
114 #define RECS_PER_DOT 1000
115 #define DOTS_PER_MSG 50
116 #define DISK_BLKSIZE 512
117 #define MAX_CORRUPTION 100
118 #define MAX_REC_READ (DISK_BLKSIZE / sizeof(DB_ADDR))
120 * Max number of addresses to be read from database addresses
121 * file, ie the size of one block read from hard disk.
123 #define PROGNAME "DTSRCLEAN"
125 #define SHOW_NOTHING 0 /* bit arguments for end_of_job() */
127 #define SHOW_EXITCODE 2
128 #define SHOW_PROGRESS 4
130 #define TERMINATE_LINE() if(need_linefeed){fputc('\n',aa_stderr);need_linefeed=FALSE;}
132 /*-------------------------- GLOBALS ----------------------------*/
133 static char *arg_dbname = NULL;
134 static char *arg_newpath = NULL;
135 unsigned char *bit_vector = NULL;
136 static size_t bytes_in = 0L;
137 static size_t corruption_count = 0L;
138 static struct or_swordrec
140 static struct or_lwordrec
142 static struct or_hwordrec
144 static char datestr[32] = ""; /* "1946/04/17 13:03" */
145 static int debug_mode = FALSE;
146 static size_t dot_count = 0L;
147 char fname_d99_new[1024];
148 char fname_d99_old[1024];
149 FILE *fp_d99_new = NULL;
150 FILE *fp_d99_old = NULL;
151 static FILE *frecids = NULL;
152 static int is_valid_dba;
153 static size_t max_corruption = MAX_CORRUPTION;
154 static int normal_exitcode = 0;
155 static int need_linefeed = FALSE;
156 static int overlay_no = FALSE;
157 static int overlay_yes = FALSE;
161 recslots; /* dbrec.or_recslots promoted to INT32 */
165 recs_per_dot = RECS_PER_DOT;
166 static int rewrite_reccount = FALSE;
167 static int shutdown_now = 0; /* = FALSE */
168 static size_t size_d21_old = 0L;
169 static size_t size_d22_old = 0L;
170 static size_t size_d23_old = 0L;
171 static size_t size_d99_old = 0L;
172 static time_t timestart = 0L;
175 static int validation_mode = FALSE;
178 /********************************************************/
180 /* signal_shutdown */
182 /********************************************************/
183 /* interrupt handler for SIGINT */
184 static void signal_shutdown (int sig)
186 shutdown_now = 100 + sig;
188 } /* signal_shutdown() */
191 /************************************************/
195 /************************************************/
196 /* Prints usage statement to stderr. */
197 static void print_usage (void)
199 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 1,
200 "\nUSAGE: %s [options] <dbname> <newpath>\n"
201 " Compresses unused d99 space and validates d00-d99 links.\n"
202 " -p<N> Progress dots printed every <N> records (default %lu).\n"
203 " Complete progress message printed every %d dots.\n"
204 " -oy Authorizes overlaying preexisting d99/d2<N> files in newpath.\n"
205 " -on Forces exit if preexisting d99/d2<N> files in newpath.\n"
206 " -v Validates d99 and d00 links, uncorrupts d99 file, and ensures\n"
207 " accurate record count. Also use -c0 to uncorrupt entire database.\n"
208 " -v<fname> Same as -v but also writes all d00 recs unreferenced by d99\n"
209 " to <fname> in format suitable to extract into .fzk file format.\n"
210 " -c<N> Exits if more than <N> corrupted/incomplete links (default %d).\n"
211 " Corruption limit turned off by -c0.\n"
212 " <dbname> 1 - 8 char database name = the old d99/d2<N> files to be updated.\n"
213 " Files found in local directory or DBFPATH environment variable.\n"
214 " <newpath> Specifies where the new d99/d2<N> files will be placed.\n"
215 " If first char is not slash, path is relative to local directory.\n"
217 " 0: Complete success. 1: Warning. 2: Job never started.\n"
218 " 3-49: Job ended prematurely, old files ok, new files unusable.\n"
219 " 50-99: Fatal Error, even old database may be corrupted.\n"
220 " 100+: Ctrl-C, kill, and all other signal interrupts cause premature\n"
221 " end, new files may be unusable. Signal = exit code - 100.\n")
222 ,aa_argv0, RECS_PER_DOT, DOTS_PER_MSG, MAX_CORRUPTION);
224 } /* print_usage() */
227 /************************************************/
231 /************************************************/
232 /* Prints progress msg after dots or at end of job.
233 * Label is "Final" or "Progress".
235 static void print_progress (char *label)
240 seconds = time (NULL) - timestart; /* total seconds elapsed */
244 if ((float) bytes_in / (float) size_d99_old >= 99.5)
247 compression = (int) (100.* (float) bytes_in / (float) size_d99_old);
248 if (compression < 0 || compression > 100)
253 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 2,
254 "%s: %s Compression %d%% (about %lu KB) in %ld:%02ld min:sec.\n") ,
255 aa_argv0, label, compression, bytes_in / 1000L,
256 seconds / 60UL, seconds % 60UL);
258 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 3,
259 "%s: Counted %ld WORDS in %s.d99.\n") ,
260 aa_argv0, (long)reccount, arg_dbname);
262 } /* print_progress() */
265 /************************************************/
269 /************************************************/
270 /* Exits program. Prints status messages before going down.
271 * Should be called on even record boundaries whenever possible,
272 * ie after record writes complete and shutdown_now > 0 (TRUE).
274 static void end_of_job (int exitcode, int show_flags)
277 if (exitcode >= 100) {
278 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 66,
279 "%s Aborting after interrupt signal %d.\n"),
280 PROGNAME"66", exitcode - 100);
282 if (validation_mode && corruption_count == 0L)
283 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 4,
284 "%s: No corrupted links detected.\n") ,
286 if (corruption_count > 0L) {
287 if (max_corruption > 0L && corruption_count >= max_corruption)
288 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 193,
289 "%s Aborting at %ld corrupted links.\n"),
290 PROGNAME"193", corruption_count);
292 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 194,
293 "%s Detected%s %ld corrupted/incomplete link(s).\n"),
295 (validation_mode) ? " and corrected" : "",
298 if (show_flags & SHOW_PROGRESS) {
299 print_progress ("Final");
301 if (show_flags & SHOW_USAGE)
303 if (show_flags & SHOW_EXITCODE)
304 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 5,
305 "%s: Exit code = %d.\n") , aa_argv0, exitcode);
306 DtSearchExit (exitcode);
310 /************************************************/
312 /* user_args_processor() */
314 /************************************************/
315 /* Reads and verifies users command line arguments and
316 * converts them into internal switches and variables.
317 * Some attempt is made to read as many errors as possible
318 * before ending job for bad arguments.
320 static void user_args_processor (int argc, char **argv)
329 end_of_job (2, SHOW_USAGE);
331 /* parse all args that begin with a dash (-) */
335 if (argptr[0] != '-')
337 switch (tolower (argptr[1])) {
339 if (strcmp (argptr, "-russell") == 0) /* backdoor debug */
345 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 301,
346 "%s The -m argument is no longer necessary.\n"),
351 i = tolower (argptr[2]);
359 catgets(dtsearch_catd, MS_dtsrclean, 177,
360 "%s Invalid %.2s argument.\n"),
361 PROGNAME"177", argptr);
367 validation_mode = TRUE;
368 if (argptr[2] != '\0') {
369 if ((frecids = fopen (argptr + 2, "w")) == NULL) {
371 catgets(dtsearch_catd, MS_dtsrclean, 802,
372 "%s Unable to open '%s' to output"
373 " unreferenced d00 records:\n %s\n"),
374 PROGNAME"802", argptr, strerror(errno));
378 strftime (datestr, sizeof (datestr),
379 "%Y/%m/%d %H:%M", localtime (&stamp));
384 recs_per_dot = (DtSrINT32) atol (argptr + 2);
385 if (recs_per_dot <= 0)
390 tempsize = atol (argptr + 2);
393 max_corruption = tempsize;
398 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 159,
399 "%s Unknown argument: '%s'.\n"),
400 PROGNAME"159", argptr);
404 } /* end parse of cmd line args */
406 /* Test how we broke loop.
407 * There should still be 2 args past the ones
408 * beginning with a dash: dbname and newpath.
412 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 210,
413 "%s Missing required dbname argument.\n"),
416 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 211,
417 "%s Missing required newpath argument.\n"),
420 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 212,
421 "%s Too many arguments.\n"),
426 end_of_job (2, SHOW_USAGE);
429 arg_dbname = argv[0];
430 if (strlen (arg_dbname) > 8) {
431 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 229,
432 "%s Invalid database name '%s'.\n"),
433 PROGNAME"229", arg_dbname);
434 end_of_job (2, SHOW_USAGE);
438 * Oldpath and newpath are validated when the files
439 * are copied and the database is opened.
441 arg_newpath = argv[1];
443 } /* user_args_processor() */
446 /************************************************/
448 /* validation_error() */
450 /************************************************/
451 /* Subroutine of validation_mode in main().
452 * Prints d2x and d99 data at location of error.
453 * Adjusts d2x counts for number of good addrs and free slots.
455 static void validation_error (DB_ADDR dbaorig)
458 is_valid_dba = FALSE;
462 /* now efim retranslates back to real dba */
464 slot = ((slot + 1) * recslots - dba_offset)
467 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 6,
468 " DBA = %d:%ld (x%02x:%06lx), orig addr val = x%08lx\n"
469 " Word='%c%s' offset=%ld addrs=%ld free=%d\n") ,
470 OR_D00, slot, OR_D00, slot, dbaorig,
471 (!isgraph (d23old.or_hwordkey[0])) ? '^' : d23old.or_hwordkey[0],
472 d23old.or_hwordkey + 1, d23old.or_hwoffset,
473 d23old.or_hwaddrs, d23old.or_hwfree);
474 if (--d23new.or_hwaddrs < 0L)
475 d23new.or_hwaddrs = 0L;
476 /* (should never occur) */
480 } /* validation_error() */
483 /************************************************/
487 /************************************************/
488 static void open_all_files
489 (FILE ** fp, char *fname, char *mode, size_t * size, int *oops) {
490 struct stat fstatbuf;
492 if ((*fp = fopen (fname, mode)) == NULL) {
493 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 439,
494 "%s Can't open %s: %s\n"),
495 PROGNAME"439", fname, strerror (errno));
499 if (fstat (fileno (*fp), &fstatbuf) == -1) {
500 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 440,
501 "%s Can't access status of %s: %s\n"),
502 PROGNAME"440", fname, strerror (errno));
507 if ((*size = fstatbuf.st_size) <= 0L) {
508 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 499,
509 "%s %s is empty.\n"),
510 PROGNAME"499", fname);
514 } /* open_all_files() */
517 /************************************************/
519 /* copy_old_d2x_to_new */
521 /************************************************/
522 static void copy_old_d2x_to_new
523 (char *fname_old, char *fname_new, FILE * fp_old, FILE * fp_new) {
524 char readbuf[1024 + 32];
527 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 7,
528 "%s: Copying from old d2x files to %s...\n") ,
529 aa_argv0, fname_new);
530 for (;;) { /* loop ends when eof set on input stream */
532 i = fread (readbuf, 1, sizeof (readbuf), fp_old);
533 /* byte swap not required on pure copy operation */
535 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 517,
536 "%s Read error on %s: %s.\n"),
537 PROGNAME"517", fname_old, strerror (errno));
538 end_of_job (3, SHOW_EXITCODE);
540 j = fwrite (readbuf, 1, i, fp_new);
542 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 489,
543 "%s Write error on %s: %s.\n"),
544 PROGNAME"489", fname_new, strerror (errno));
545 end_of_job (3, SHOW_EXITCODE);
548 end_of_job (shutdown_now, SHOW_EXITCODE);
556 } /* copy_old_d2x_to_new() */
559 /********************************/
563 /********************************/
564 /* Performs vista RECREAD on curr word record.
565 * CALLER SHOULD CHECK DB_STATUS.
567 void read_d2x (struct or_hwordrec * glob_word, long field)
569 if (field == OR_SWORDKEY) {
570 RECREAD (PROGNAME "061", &d21old, 0);
571 if (db_status != S_OKAY)
573 strncpy (glob_word->or_hwordkey, d21old.or_swordkey,
575 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
576 glob_word->or_hwoffset = ntohl (d21old.or_swoffset);
577 glob_word->or_hwfree = ntohl (d21old.or_swfree);
578 glob_word->or_hwaddrs = ntohl (d21old.or_swaddrs);
580 else if (field == OR_LWORDKEY) {
581 RECREAD (PROGNAME "069", &d22old, 0);
582 if (db_status != S_OKAY)
584 strncpy (glob_word->or_hwordkey, d22old.or_lwordkey,
586 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
587 glob_word->or_hwoffset = ntohl (d22old.or_lwoffset);
588 glob_word->or_hwfree = ntohl (d22old.or_lwfree);
589 glob_word->or_hwaddrs = ntohl (d22old.or_lwaddrs);
592 RECREAD (PROGNAME "078", glob_word, 0);
593 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
594 NTOHL (glob_word->or_hwoffset);
595 NTOHL (glob_word->or_hwfree);
596 NTOHL (glob_word->or_hwaddrs);
602 /********************************/
606 /********************************/
607 /* performs vista RECWRITE on curr word record.
608 * CALLER MUST CHECK DB_STATUS.
610 static void write_d2x (struct or_hwordrec * glob_word, long field)
612 if (field == OR_SWORDKEY) {
613 strcpy (d21new.or_swordkey, glob_word->or_hwordkey);
614 d21new.or_swoffset = htonl (glob_word->or_hwoffset);
615 d21new.or_swfree = htonl (glob_word->or_hwfree);
616 d21new.or_swaddrs = htonl (glob_word->or_hwaddrs);
617 RECWRITE (PROGNAME "102", &d21new, 0);
619 else if (field == OR_LWORDKEY) {
620 strcpy (d22new.or_lwordkey, glob_word->or_hwordkey);
621 d22new.or_lwoffset = htonl (glob_word->or_hwoffset);
622 d22new.or_lwfree = htonl (glob_word->or_hwfree);
623 d22new.or_lwaddrs = htonl (glob_word->or_hwaddrs);
624 RECWRITE (PROGNAME"112", &d22new, 0);
627 HTONL (glob_word->or_hwoffset);
628 HTONL (glob_word->or_hwfree);
629 HTONL (glob_word->or_hwaddrs);
630 RECWRITE (PROGNAME "115", glob_word, 0);
637 /************************************************/
641 /************************************************/
642 /* The garbage collection/compression process itself.
643 * For very large databases, there will be appx 3 million word records,
644 * so the loop should be coded for ***EFFICIENCY***.
646 static void copy_new_d99 (long keyfield)
650 DtSrINT32 slots_left;
651 unsigned char *bvptr;
653 DB_ADDR dba, dbaorig;
657 DtSrINT32 good_addrs_left;
658 DtSrINT32 good_addrs_this_block;
659 DtSrINT32 num_reads, num_writes;
660 DB_ADDR word_addrs[MAX_REC_READ + 64]; /* d99 read buf */
661 DB_ADDR word_addrs_out[MAX_REC_READ + 64]; /* d99 write buf */
663 KEYFRST (PROGNAME "179", keyfield, 0);
664 while (db_status == S_OKAY) {
665 read_d2x (&d23new, keyfield);
666 if (validation_mode) /* save for validation err msgs */
667 memcpy (&d23old, &d23new, sizeof (d23old));
670 * Read old d99 file at specified offset to get total num
671 * "holes". In the first portion of record holes are filled
672 * with representations of valid database addresses +
673 * statistical weights. In the second portion the holes are
674 * "free slots" for future expansion which are
675 * conventionally initialized with a -1.
677 /* force number of free slots to 0(ZERO) */
678 d23new.or_hwfree = 0;
679 fseek (fp_d99_old, d23new.or_hwoffset, SEEK_SET);
680 num_holes = d23new.or_hwaddrs + d23new.or_hwfree;
681 good_addrs_left = d23new.or_hwaddrs;
682 bytes_in += sizeof (DB_ADDR) * num_holes;
684 /* Update the offset in the d2x record buffer */
685 d23new.or_hwoffset = ftell (fp_d99_new);
688 * Copy the array of holes in each disk block, reading the
689 * old and writing to the new. Loop ends when the number
690 * of holes left will fit into one last block.
693 while (!done) { /* loop on each block in this word */
694 if (num_holes > MAX_REC_READ) {
695 num_reads = MAX_REC_READ;
696 num_holes -= MAX_REC_READ;
700 num_reads = num_holes;
703 fread (word_addrs, sizeof(DB_ADDR), (size_t)num_reads, fp_d99_old);
706 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 657,
707 "%s Read error on %s: %s.\n"),
708 PROGNAME"657", fname_d99_old, strerror (errno));
709 end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE);
711 /* Note BYTE_SWAP only needed for validation_mode.
712 * If not validating, we're just going to copy
713 * the network format dba's as is directly to
718 * Addrs on d99 are now 'record numbers' not dbas. A
719 * rec# is what the dba/slot# would be if records took
720 * up just one slot and there were no dbrec at start of
721 * file. D99 rec#s start at #1, not #0.
725 * If user requested validation_mode, validate each
726 * 'good' rec# (not free slots) in word_addrs buffer.
727 * If any d99 links are corrupt, skip them when copying
728 * to the new d99 file. Rewrite -1's to all free slots.
729 * ----> NOTE UNUSUAL FORMAT OF DBA HOLES IN D99! <----
730 * Record number is shifted to the high order 3 bytes.
731 * The statistical weight is in the low order byte. The
732 * vista file number is known from the #define constant
733 * OR_D00, and the vista dba/slot# is mapped from rec#
734 * by mult/div number of slots per rec, plus/minus
737 if (validation_mode) {
739 for (swapx = 0; swapx < num_reads; swapx++)
740 NTOHL (word_addrs[swapx]);
742 /* set x to number of good addrs in this block */
743 if (good_addrs_left > num_reads) {
745 good_addrs_left -= num_reads;
753 * Validate the rec#'s in this block. Note that
754 * the loop is skipped if the entire block is free
757 good_addrs_this_block = 0;
758 for (a = 0; a < x; a++) { /* a = index to curr dba */
760 * Get rec#. Save original rec# for err msgs,
761 * then shift slot number to lower 3 bytes,
764 dbaorig = word_addrs[a]; /* rec#,rec#,rec#:wt */
765 dba = dbaorig >> 8; /* 0,rec#,rec#,rec# */
766 is_valid_dba = TRUE; /* default */
769 * If original rec# == -1 we've overrun the
770 * good rec#'s into the expansion area, which
771 * is filled with -1's. This is real bad news
772 * because if the counts in d02 are bad, the
773 * online programs will quickly crash, and we
774 * can't continue this program. Advance to next
775 * rec# because we can't mark the bit vector.
777 if (dbaorig == -1L) {
780 catgets(dtsearch_catd, MS_dtsrclean, 111,
781 "*** %s DBA in d99 = -1. "
782 "Probable overrun into expansion\n"
783 " area due to incorrect count values "
786 validation_error (dbaorig);
788 if (max_corruption > 0L &&
789 corruption_count >= max_corruption)
790 end_of_job (91, SHOW_PROGRESS + SHOW_EXITCODE);
791 continue; /* skip the bit vector
796 * If slot number > max totrecs, we have a
797 * corrupted d99-d00 link because we've already
798 * validated the d00 file and we know that it
799 * has no slots > max. Also we have to advance
800 * to next slot because we can't mark the bit
803 /******if (dba >= max_totrecs)*******/
804 if (dba >= total_num_addrs) {
807 catgets(dtsearch_catd, MS_dtsrclean, 222,
808 "*** %s DBA in d99 not in d00,"
809 " slot > max num docs.\n"),
811 validation_error (dbaorig);
813 if (max_corruption > 0L &&
814 corruption_count >= max_corruption)
815 end_of_job (92, SHOW_PROGRESS + SHOW_EXITCODE);
816 continue; /* skip the bit vector check */
820 * Verify that dba exists in d00 file (test bit
821 * #1). If not, mark bit #3 (3rd lowest) in
822 * nibble and print error msg unless bit #3
825 bvptr = bit_vector + (dba >> 1);
826 is_odd_nibble = (dba & 1L);
827 if (!(*bvptr & ((is_odd_nibble) ? 0x01 : 0x10))) {
829 if (!(*bvptr & ((is_odd_nibble) ? 0x04 : 0x40))) {
831 *bvptr |= (is_odd_nibble) ? 0x04 : 0x40;
834 catgets(dtsearch_catd, MS_dtsrclean, 333,
835 "*** %s DBA in d99 does not exist in d00.\n"),
837 validation_error (dbaorig);
839 if (max_corruption > 0L &&
840 corruption_count >= max_corruption)
841 end_of_job (93, SHOW_PROGRESS + SHOW_EXITCODE);
842 } /* endif where corrupt link
847 * Mark bit #2 in bit vector indicating a d99
850 *bvptr |= (is_odd_nibble) ? 0x02 : 0x20; /* bit #2 */
853 * move good dba to curr output block, incr
857 word_addrs_out[good_addrs_this_block++] = dbaorig;
859 } /* end validation loop for each good dba in
863 * Write out only validated addrs in current block.
864 * If this was the last block, fill out all the
865 * free slots, if any, with -1 values, and exit the
866 * dba loop for this word.
868 if (good_addrs_this_block > 0) {
870 for (swapx = 0; swapx < good_addrs_this_block; swapx++)
871 NTOHL (word_addrs_out[swapx]);
873 num_writes = fwrite (word_addrs_out, sizeof (DB_ADDR),
874 (size_t)good_addrs_this_block, fp_d99_new);
875 if (num_writes != good_addrs_this_block)
878 if (good_addrs_left <= 0) {
880 * Write blocks of -1s until new d2x free slot
881 * count is exhausted. The last block may be <
884 slots_left = d23new.or_hwfree;
885 while (slots_left > 0) {
887 * set x to number of -1's to write for
890 if (slots_left > MAX_REC_READ) {
892 slots_left -= MAX_REC_READ;
898 for (a = 0; a < x; a++)
899 word_addrs_out[a] = (DtSrINT32) -1;
900 /* BYTE_SWAP not required for foxes */
901 num_writes = fwrite (word_addrs_out,
902 sizeof(DB_ADDR), (size_t)x, fp_d99_new);
905 } /* end while loop to write out all -1's */
908 } /* endif for validation_mode for this block */
911 * If NOT in validation mode, just write out the new
912 * d99 block as an exact copy of the input block.
913 * BYTE_SWAP not required because word_addrs is
914 * still in its original network order from the fread.
917 num_writes = fwrite (word_addrs, sizeof(DB_ADDR),
918 (size_t)num_reads, fp_d99_new);
919 if (num_writes != num_reads) {
922 catgets(dtsearch_catd, MS_dtsrclean, 665,
923 "%s Write error on %s: %s.\n"),
924 PROGNAME"665", fname_d99_new, strerror(errno));
925 end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE);
927 } /* endelse for NOT validation_mode for this block */
929 } /* end loop for all blocks for this entire word
932 /* write the updated d2x record */
933 write_d2x (&d23new, keyfield);
937 * Every now and then print a dot. Print complete progress
938 * msg after DOTS_PER_MSG dots.
940 if (!(reccount % recs_per_dot)) {
941 if (++dot_count > DOTS_PER_MSG) {
943 print_progress ("Progress");
946 fputc ('.', aa_stderr);
947 need_linefeed = TRUE;
948 if (!(dot_count % 10L))
949 fputc (' ', aa_stderr);
952 } /* end of print-a-dot */
955 end_of_job (shutdown_now, SHOW_PROGRESS + SHOW_EXITCODE);
956 KEYNEXT (PROGNAME "196", keyfield, 0);
957 } /* end of main loop on each word in database */
961 } /* copy_new_d99() */
964 /************************************************/
968 /************************************************/
969 int main (int argc, char *argv[])
973 unsigned char *bvptr;
974 DB_ADDR dba, dba1, dbaorig;
976 char fname_d21_new[1024];
977 char fname_d21_old[1024];
978 char fname_d22_new[1024];
979 char fname_d22_old[1024];
980 char fname_d23_new[1024];
981 char fname_d23_old[1024];
982 FILE *fp_d21_new = NULL;
983 FILE *fp_d21_old = NULL;
984 FILE *fp_d22_new = NULL;
985 FILE *fp_d22_old = NULL;
986 FILE *fp_d23_new = NULL;
987 FILE *fp_d23_old = NULL;
988 char full_dbname_old[1024];
989 char full_dbname_new[1024];
990 DtSrINT32 max_bitvec = 0L;
993 char readbuf[1024 + 32];
994 unsigned long reads_per_dot;
995 char recidbuf[DtSrMAX_DB_KEYSIZE + 4];
998 struct or_dbrec dbrec;
1001 setlocale (LC_ALL, "");
1002 dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
1005 strftime (dbfpath, sizeof (dbfpath), /* just use any ol' buffer */
1006 catgets (dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"),
1007 localtime (&starttime));
1008 printf ( catgets(dtsearch_catd, MS_dtsrclean, 11,
1009 "%s Version %s. Run %s.\n") ,
1010 aa_argv0, AUSAPI_VERSION, dbfpath);
1012 signal (SIGHUP, signal_shutdown);
1013 signal (SIGINT, signal_shutdown);
1014 signal (SIGQUIT, signal_shutdown);
1015 signal (SIGTRAP, signal_shutdown);
1016 signal (SIGKILL, signal_shutdown); /* this cannot be trapped */
1017 signal (SIGALRM, signal_shutdown);
1018 signal (SIGTERM, signal_shutdown);
1019 signal (SIGPWR, signal_shutdown);
1021 signal (SIGXCPU, signal_shutdown);
1022 signal (SIGDANGER, signal_shutdown);
1025 user_args_processor (argc, argv);
1027 /* In order to find old files, we have to check if
1028 * DBFPATH environment variable has been set.
1029 * Load the fully constructed DBFPATH-dbname into its own buffer.
1031 full_dbname_old[0] = '\0';
1033 if ((ptr = getenv ("DBFPATH")) != NULL) {
1036 catgets(dtsearch_catd, MS_dtsrclean, 12,
1037 "%s: Ignoring empty DBFPATH environment variable.\n") ,
1040 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 13,
1041 "%s: Using DBFPATH = '%s'.\n") ,
1043 strcpy (full_dbname_old, ptr);
1045 /* Ensure that DBFPATH ends in a slash. */
1046 ptr = strchr (full_dbname_old, '\0');
1047 if (*(ptr - 1) != LOCAL_SLASH) {
1048 *ptr++ = LOCAL_SLASH;
1051 strcpy (dbfpath, full_dbname_old);
1055 /* Currently full_dbname_old contains just the path.
1056 * Similarly, build just path name for the 2 new files
1057 * using full_dbname_new as a buffer.
1058 * Verify they don't both refer to the same directory.
1060 strcpy (full_dbname_new, arg_newpath);
1061 ptr = strchr (full_dbname_new, '\0');
1062 if (*(ptr - 1) != LOCAL_SLASH) {
1063 *ptr++ = LOCAL_SLASH;
1066 if (strcmp (full_dbname_old, full_dbname_new) == 0) {
1067 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 393,
1068 "%s Old and new directories are identical: '%s'.\n"),
1069 PROGNAME"393", full_dbname_old);
1070 end_of_job (2, SHOW_USAGE);
1073 /* Complete full_dbname_old by appending dbname to the path prefix.
1074 * Then build full path/file names for all 4 files.
1076 strcat (full_dbname_old, arg_dbname);
1077 strcat (full_dbname_new, arg_dbname);
1078 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 14,
1079 "%s: Old files: '%s.d2x, .d99'.\n") ,
1080 aa_argv0, full_dbname_old);
1081 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 15,
1082 "%s: New files: '%s.d2x, .d99'.\n") ,
1083 aa_argv0, full_dbname_new);
1085 strcpy (fname_d99_old, full_dbname_old);
1086 strcat (fname_d99_old, ".d99");
1087 strcpy (fname_d21_old, full_dbname_old);
1088 strcat (fname_d21_old, ".d21");
1089 strcpy (fname_d22_old, full_dbname_old);
1090 strcat (fname_d22_old, ".d22");
1091 strcpy (fname_d23_old, full_dbname_old);
1092 strcat (fname_d23_old, ".d23");
1093 strcpy (fname_d99_new, full_dbname_new);
1094 strcat (fname_d99_new, ".d99");
1095 strcpy (fname_d21_new, full_dbname_new);
1096 strcat (fname_d21_new, ".d21");
1097 strcpy (fname_d22_new, full_dbname_new);
1098 strcat (fname_d22_new, ".d22");
1099 strcpy (fname_d23_new, full_dbname_new);
1100 strcat (fname_d23_new, ".d23");
1102 /* If the user hasn't already authorized overwriting preexisting files,
1103 * check new directory and if new files already exist,
1104 * ask permission to overwrite.
1107 oops = FALSE; /* TRUE forces a user prompt */
1108 if ((fp_d99_new = fopen (fname_d99_new, "r")) != NULL) {
1109 fclose (fp_d99_new);
1112 if ((fp_d21_new = fopen (fname_d21_new, "r")) != NULL) {
1113 fclose (fp_d21_new);
1116 if ((fp_d22_new = fopen (fname_d22_new, "r")) != NULL) {
1117 fclose (fp_d22_new);
1120 if ((fp_d23_new = fopen (fname_d23_new, "r")) != NULL) {
1121 fclose (fp_d23_new);
1125 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 24,
1126 "%s: One or more new files already exist.\n") ,
1129 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 463,
1130 "%s Command line argument disallows file overlay.\n"),
1132 end_of_job (2, SHOW_EXITCODE);
1134 fputs (catgets(dtsearch_catd, MS_dtsrclean, 45,
1135 " Is it ok to overlay files in new directory? [y/n] "),
1139 fgets (readbuf, sizeof(readbuf), stdin);
1140 if (strlen(readbuf) && readbuf[strlen(readbuf)-1] == '\n')
1141 readbuf[strlen(readbuf)-1] = '\0';
1143 if (tolower (*readbuf) != 'y')
1144 end_of_job (2, SHOW_NOTHING);
1146 } /* end of check for overlaying new files */
1148 /* Open all files. The d2x's are opened so that the old ones
1149 * can be copied into the new directory before starting
1150 * the garbage collection process proper.
1151 * The d99's are opened now just to verify permissions.
1153 oops = FALSE; /* TRUE ends job, but only after trying all 4 files */
1154 open_all_files (&fp_d21_old, fname_d21_old, "rb", &size_d21_old, &oops);
1155 open_all_files (&fp_d22_old, fname_d22_old, "rb", &size_d22_old, &oops);
1156 open_all_files (&fp_d23_old, fname_d23_old, "rb", &size_d23_old, &oops);
1157 open_all_files (&fp_d99_old, fname_d99_old, "rb", &size_d99_old, &oops);
1158 open_all_files (&fp_d21_new, fname_d21_new, "wb", NULL, &oops);
1159 open_all_files (&fp_d22_new, fname_d22_new, "wb", NULL, &oops);
1160 open_all_files (&fp_d23_new, fname_d23_new, "wb", NULL, &oops);
1161 open_all_files (&fp_d99_new, fname_d99_new, "wb", NULL, &oops);
1164 end_of_job (shutdown_now, SHOW_EXITCODE);
1166 end_of_job (2, SHOW_EXITCODE);
1168 /* Copy old d2x files to new directory.
1169 * Database will open using new files so only they will be changed.
1171 copy_old_d2x_to_new (fname_d21_old, fname_d21_new, fp_d21_old, fp_d21_new);
1172 copy_old_d2x_to_new (fname_d22_old, fname_d22_new, fp_d22_old, fp_d22_new);
1173 copy_old_d2x_to_new (fname_d23_old, fname_d23_new, fp_d23_old, fp_d23_new);
1175 /* Open database, but use new d2x files for updates. */
1176 RENFILE (PROGNAME"1102", arg_dbname, OR_D21, fname_d21_new);
1177 RENFILE (PROGNAME"1104", arg_dbname, OR_D22, fname_d22_new);
1178 RENFILE (PROGNAME"1106", arg_dbname, OR_D23, fname_d23_new);
1179 if (!austext_dopen (arg_dbname, (dbfpath[0] == 0) ? NULL : dbfpath,
1181 puts (DtSearchGetMessages ());
1182 end_of_job (3, SHOW_EXITCODE);
1185 /* This is where efim changed real dba to
1186 * record number (still called dba)
1188 RECFRST (PROGNAME "1067", OR_OBJREC, 0);
1189 CRGET (PROGNAME "1068", &dba, 0); /* dba of first real obj
1191 recslots = dbrec.or_recslots; /* vista slots per obj
1193 dba_offset = recslots - (dba & 0xffffff); /* accounts for dbrec */
1195 /* total_num_addrs = what reccount would be if
1196 * all holes were filled with good records.
1198 total_num_addrs = (dbrec.or_maxdba - (dba & 0xffffff) + 1) / recslots + 1;
1199 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 25,
1200 "%s: curr reccnt=%ld, mxdba=%ld, sl/rec=%ld, tot#adr=%ld.\n") ,
1201 aa_argv0, (long)dbrec.or_reccount, (long)dbrec.or_maxdba,
1202 (long)dbrec.or_recslots, (long)total_num_addrs);
1204 /* Initialize validation_mode (checkd99) */
1205 if (validation_mode) {
1207 * Allocate and initialize a bit vector: 4 bits for every
1208 * possible d00 database address.
1210 max_bitvec = (total_num_addrs >> 1) + 2;
1211 if ((bit_vector = malloc ((size_t)max_bitvec + 64)) == NULL) {
1212 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 465,
1213 "%s WARNING: Can't allocate memory for bit vector.\n"
1214 " 'Validate' mode switched off.\n"),
1216 validation_mode = FALSE;
1217 normal_exitcode = 1; /* warning */
1218 goto EXIT_INIT_VALIDATION;
1220 memset (bit_vector, 0, (size_t)max_bitvec);
1223 * Read every d00 rec sequentially. 1 in bit #1 (lowest
1224 * order) in bit vector means record (dba) exists in d00
1225 * file. While we're at it, count the total number of
1228 x = dbrec.or_reccount / 50 + 1; /* x = recs per dot */
1229 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 26,
1230 "%s: Reading d00 file. Each dot appx %ld database documents...\n"),
1234 RECFRST (PROGNAME "534", OR_OBJREC, 0);
1235 while (db_status == S_OKAY) {
1236 CRREAD (PROGNAME "617", OR_OBJKEY, recidbuf, 0);
1238 /* print periodic progress dots */
1239 if (!(++reccount % x)) {
1240 fputc ('.', aa_stderr);
1241 need_linefeed = TRUE;
1242 if (!(++dot_count % 10L))
1243 fputc (' ', aa_stderr);
1248 * Get dba and record number and confirm it will not
1249 * overflow bit vector.
1251 CRGET (PROGNAME "537", &dba, 0);
1252 dba &= 0x00ffffff; /* mask out file number in high order byte */
1253 dba1 = (dba + dba_offset) / recslots; /* ="rec number", base 1 */
1254 if (dba1 >= total_num_addrs) {
1256 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 561,
1257 "%s DBA '%d:%ld' (rec #%ld) in d00 exceeds "
1258 "total num addrs %ld;\n"
1259 " Bit vector overflow because maxdba %ld"
1260 " in dbrec is incorrect.\n"),
1261 PROGNAME"561", OR_D00, (long)dba, (long)dba1,
1262 (long)total_num_addrs, (long)dbrec.or_maxdba);
1263 end_of_job (7, SHOW_EXITCODE);
1266 end_of_job (shutdown_now, SHOW_EXITCODE);
1269 * Set bit #1 of even or odd nibble to indicate that
1270 * this record *number* actually exists in d00 file.
1272 bit_vector[dba1 >> 1] |= (dba1 & 1L) ? 0x01 : 0x10;
1274 RECNEXT (PROGNAME "541", 0);
1275 } /* end of sequential read thru d00 file */
1277 TERMINATE_LINE (); /* end the dots... */
1279 /* confirm that RECCOUNT record holds the correct number */
1280 if (dbrec.or_reccount == reccount) {
1282 catgets(dtsearch_catd, MS_dtsrclean, 27,
1283 "%s: Confirmed %ld DOCUMENTS in %s.d00.\n") ,
1284 aa_argv0, (long)dbrec.or_reccount, arg_dbname);
1287 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 28,
1288 "%s: %ld DOCUMENTS actually in %s.d00 not ="
1289 " %ld count stored there.\n"
1290 " Count will be corrected in new d00 file.\n") ,
1291 aa_argv0, (long)reccount, arg_dbname, (long)dbrec.or_reccount);
1292 dbrec.or_reccount = reccount;
1293 rewrite_reccount = TRUE;
1296 EXIT_INIT_VALIDATION:;
1297 } /* end of validation_mode initialization */
1299 /* initialize main loop */
1303 dot_count = DOTS_PER_MSG; /* force initial msg after first
1306 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 29,
1307 "%s: Compressing into %s. Each dot appx %lu words...\n") ,
1308 aa_argv0, arg_newpath, (unsigned long)recs_per_dot);
1310 /* write New Header Information to a new d99 file */
1311 init_header (fp_d99_new, &fl_hdr);
1313 /* Sequentially read each word key file in big loop.
1314 * For each word, read the d99.
1315 * In validation mode check the dbas.
1316 * If not validating, just blindly rewrite the old d99 to the new one.
1317 * If validating only write good dba's and mark the bit vector.
1319 copy_new_d99 (OR_SWORDKEY);
1320 copy_new_d99 (OR_LWORDKEY);
1321 copy_new_d99 (OR_HWORDKEY);
1325 end_of_job (50, SHOW_PROGRESS + SHOW_EXITCODE);
1327 print_progress ("Final");
1329 /* If validation_mode requested, traverse bit vector and print out
1330 * table of each d00 record which cannot be accessed from any d99 word.
1331 * If a validation file name was provided, write out a line for each
1332 * bad reecord in alebeniz-compatible format.
1334 if (validation_mode) {
1335 for (x = 0, bvptr = bit_vector; x < max_bitvec; x++, bvptr++) {
1336 for (j = 0; j < 8; j += 4) { /* j = 0 or 4, amount of
1338 /* a = bits #1 and #2 of current nibble */
1339 a = 0x30 & (*bvptr << j);
1341 /* if dba is in d00 but not in d99... */
1342 if (a & 0x10 && !(a & 0x20)) {
1343 /* ...construct valid vista dba */
1346 dbaorig++; /* slot number */
1347 /*** dba = dbaorig | (OR_D00 << 24); ***//* r
1350 /* now efim retranslates back to real dba */
1351 dba = ((dbaorig + 1) * recslots - dba_offset)
1354 /* ...print out err msg */
1355 CRSET (PROGNAME "734", &dba, 0);
1356 CRREAD (PROGNAME "735", OR_OBJKEY, readbuf, 0);
1358 catgets(dtsearch_catd, MS_dtsrclean, 444,
1359 "*** %s d00 record '%s' is not referenced in d99.\n"
1360 " DBA = %d:%ld (x%02x:%06lx).\n") ,
1361 PROGNAME"444", readbuf, OR_D00,
1362 (long)dba, OR_D00, (long)dba);
1364 /*...if albeniz compatible output requested, do it */
1366 fprintf (frecids, DISCARD_FORMAT, arg_dbname,
1367 readbuf, "MrClean", datestr);
1371 if (max_corruption > 0L &&
1372 corruption_count >= max_corruption)
1373 end_of_job (94, SHOW_EXITCODE);
1374 } /* endif where d00 is not referenced by d99 */
1375 } /* end forloop: every 2 bits in a bitvector byte */
1376 } /* end forloop: every byte in bitvector */
1379 /* Normal_exitcode currently will contain either a 0 or a 1.
1380 * If we were uncorrupting the d99 and found any corrupt links,
1381 * make sure it's 1 (warning). If there were corrupt links and
1382 * we weren't trying to uncorrupt it, change it to a hard error.
1384 /***by the way, corruption_count can be > 0 only if in validation_mode.**/
1385 if (corruption_count > 0L) {
1386 if (validation_mode)
1387 normal_exitcode = 1;
1389 normal_exitcode = 90;
1391 end_of_job (normal_exitcode, SHOW_EXITCODE);
1394 /*************************** DTSRCLEAN.C ****************************/