2 * COMPONENT_NAME: austext
4 * FUNCTIONS: TERMINATE_LINE
21 * (C) COPYRIGHT International Business Machines Corp. 1993,1995
23 * Licensed Materials - Property of IBM
24 * US Government Users Restricted Rights - Use, duplication or
25 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
27 /*************************** DTSRCLEAN.C ****************************
28 * $TOG: dtsrclean.c /main/9 1998/04/17 11:23:57 mgreess $
29 * Does garbage collection (ie compression) of .d99 file.
30 * Optionally verifies all database addresses in d99.
31 * Modification of clndtbs.c and checkd99.c.
32 * Does NOT use austext engine so this must be modified if schema changes.
35 * All command input is on command line. Reads existing d2x and d99 files.
38 * New .d2x and .d99 files are placed into the directory specified by user.
40 * EXIT CODE STANDARDS:
42 * 1 = warnings, but output should be ok.
43 * 2 = failure in cmd line parse or other initialization; job never started.
44 * 3 - 49 = fatal error, but output may be acceptable.
45 * 50 - 99 = fatal error and output files are probably unusable.
46 * (In this program, even input may be corrupted).
47 * 100+ = aborting due to asynchronous interrupt signal.
48 * Output files may or may not be unusable.
51 * Revision 2.4 1996/05/08 16:20:50 miker
52 * Added RENFILEs for new d2x files; austext_dopen no longer does.
54 * Revision 2.3 1996/02/01 18:13:06 miker
55 * Deleted BETA definition.
57 * Revision 2.2 1995/10/26 14:51:08 miker
58 * Renamed from mrclean.c. Added prolog.
61 * Revision 2.1 1995/09/22 21:18:52 miker
62 * Freeze DtSearch 0.1, AusText 2.1.8
64 * Revision 1.11 1995/09/05 18:16:46 miker
65 * Name, msg, and other minor changes for DtSearch..
66 * Print messages if austext_dopen() fails.
68 * Revision 1.10 1995/06/02 15:52:42 miker
69 * Cleaned up -m and bit vector overflow msgs.
71 * Revision 1.9 1995/05/30 19:15:58 miker
72 * Print beta char in startup banner msg.
73 * Remove -m option and max_totrecs; select bit vector
74 * size from maxdba, not reccount.
86 #include <sys/types.h>
87 #include <netinet/in.h>
89 #define MS_misc 1 /* msg catalog set number */
90 #define MS_dtsrclean 26 /* msg catalog set number */
91 #define DISCARD_FORMAT "%s\t\"%s\"\t%s\t%s\n" /* copied from oe.h */
92 #define RECS_PER_DOT 1000
93 #define DOTS_PER_MSG 50
94 #define DISK_BLKSIZE 512
95 #define MAX_CORRUPTION 100
96 #define MAX_REC_READ (DISK_BLKSIZE / sizeof(DB_ADDR))
98 * Max number of addresses to be read from database addresses
99 * file, ie the size of one block read from hard disk.
101 #define PROGNAME "DTSRCLEAN"
103 #define SHOW_NOTHING 0 /* bit arguments for end_of_job() */
105 #define SHOW_EXITCODE 2
106 #define SHOW_PROGRESS 4
108 #define TERMINATE_LINE() if(need_linefeed){fputc('\n',aa_stderr);need_linefeed=FALSE;}
110 /*-------------------------- GLOBALS ----------------------------*/
111 static char *arg_dbname = NULL;
112 static char *arg_newpath = NULL;
113 unsigned char *bit_vector = NULL;
114 static size_t bytes_in = 0L;
115 static size_t corruption_count = 0L;
116 static struct or_swordrec
118 static struct or_lwordrec
120 static struct or_hwordrec
122 static char datestr[32] = ""; /* "1946/04/17 13:03" */
123 static int debug_mode = FALSE;
124 static size_t dot_count = 0L;
125 char fname_d99_new[1024];
126 char fname_d99_old[1024];
127 FILE *fp_d99_new = NULL;
128 FILE *fp_d99_old = NULL;
129 static FILE *frecids = NULL;
130 static int is_valid_dba;
131 static size_t max_corruption = MAX_CORRUPTION;
132 static int normal_exitcode = 0;
133 static int need_linefeed = FALSE;
134 static int overlay_no = FALSE;
135 static int overlay_yes = FALSE;
139 recslots; /* dbrec.or_recslots promoted to INT32 */
143 recs_per_dot = RECS_PER_DOT;
144 static int rewrite_reccount = FALSE;
145 static int shutdown_now = 0; /* = FALSE */
146 static size_t size_d21_old = 0L;
147 static size_t size_d22_old = 0L;
148 static size_t size_d23_old = 0L;
149 static size_t size_d99_old = 0L;
150 static time_t timestart = 0L;
153 static int validation_mode = FALSE;
156 /********************************************************/
158 /* signal_shutdown */
160 /********************************************************/
161 /* interrupt handler for SIGINT */
162 static void signal_shutdown (int sig)
164 shutdown_now = 100 + sig;
166 } /* signal_shutdown() */
169 /************************************************/
173 /************************************************/
174 /* Prints usage statement to stderr. */
175 static void print_usage (void)
177 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 1,
178 "\nUSAGE: %s [options] <dbname> <newpath>\n"
179 " Compresses unused d99 space and validates d00-d99 links.\n"
180 " -p<N> Progress dots printed every <N> records (default %lu).\n"
181 " Complete progress message printed every %d dots.\n"
182 " -oy Authorizes overlaying preexisting d99/d2<N> files in newpath.\n"
183 " -on Forces exit if preexisting d99/d2<N> files in newpath.\n"
184 " -v Validates d99 and d00 links, uncorrupts d99 file, and ensures\n"
185 " accurate record count. Also use -c0 to uncorrupt entire database.\n"
186 " -v<fname> Same as -v but also writes all d00 recs unreferenced by d99\n"
187 " to <fname> in format suitable to extract into .fzk file format.\n"
188 " -c<N> Exits if more than <N> corrupted/incomplete links (default %d).\n"
189 " Corruption limit turned off by -c0.\n"
190 " <dbname> 1 - 8 char database name = the old d99/d2<N> files to be updated.\n"
191 " Files found in local directory or DBFPATH environment variable.\n"
192 " <newpath> Specifies where the new d99/d2<N> files will be placed.\n"
193 " If first char is not slash, path is relative to local directory.\n"
195 " 0: Complete success. 1: Warning. 2: Job never started.\n"
196 " 3-49: Job ended prematurely, old files ok, new files unusable.\n"
197 " 50-99: Fatal Error, even old database may be corrupted.\n"
198 " 100+: Ctrl-C, kill, and all other signal interrupts cause premature\n"
199 " end, new files may be unusable. Signal = exit code - 100.\n")
200 ,aa_argv0, RECS_PER_DOT, DOTS_PER_MSG, MAX_CORRUPTION);
202 } /* print_usage() */
205 /************************************************/
209 /************************************************/
210 /* Prints progress msg after dots or at end of job.
211 * Label is "Final" or "Progress".
213 static void print_progress (char *label)
218 seconds = time (NULL) - timestart; /* total seconds elapsed */
222 if ((float) bytes_in / (float) size_d99_old >= 99.5)
225 compression = (int) (100.* (float) bytes_in / (float) size_d99_old);
226 if (compression < 0 || compression > 100)
231 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 2,
232 "%s: %s Compression %d%% (about %lu KB) in %ld:%02ld min:sec.\n") ,
233 aa_argv0, label, compression, bytes_in / 1000L,
234 seconds / 60UL, seconds % 60UL);
236 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 3,
237 "%s: Counted %ld WORDS in %s.d99.\n") ,
238 aa_argv0, (long)reccount, arg_dbname);
240 } /* print_progress() */
243 /************************************************/
247 /************************************************/
248 /* Exits program. Prints status messages before going down.
249 * Should be called on even record boundaries whenever possible,
250 * ie after record writes complete and shutdown_now > 0 (TRUE).
252 static void end_of_job (int exitcode, int show_flags)
255 if (exitcode >= 100) {
256 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 66,
257 "%s Aborting after interrupt signal %d.\n"),
258 PROGNAME"66", exitcode - 100);
260 if (validation_mode && corruption_count == 0L)
261 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 4,
262 "%s: No corrupted links detected.\n") ,
264 if (corruption_count > 0L) {
265 if (max_corruption > 0L && corruption_count >= max_corruption)
266 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 193,
267 "%s Aborting at %ld corrupted links.\n"),
268 PROGNAME"193", corruption_count);
270 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 194,
271 "%s Detected%s %ld corrupted/incomplete link(s).\n"),
273 (validation_mode) ? " and corrected" : "",
276 if (show_flags & SHOW_PROGRESS) {
277 print_progress ("Final");
279 if (show_flags & SHOW_USAGE)
281 if (show_flags & SHOW_EXITCODE)
282 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 5,
283 "%s: Exit code = %d.\n") , aa_argv0, exitcode);
284 DtSearchExit (exitcode);
288 /************************************************/
290 /* user_args_processor() */
292 /************************************************/
293 /* Reads and verifies users command line arguments and
294 * converts them into internal switches and variables.
295 * Some attempt is made to read as many errors as possible
296 * before ending job for bad arguments.
298 static void user_args_processor (int argc, char **argv)
307 end_of_job (2, SHOW_USAGE);
309 /* parse all args that begin with a dash (-) */
313 if (argptr[0] != '-')
315 switch (tolower (argptr[1])) {
317 if (strcmp (argptr, "-russell") == 0) /* backdoor debug */
323 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 301,
324 "%s The -m argument is no longer necessary.\n"),
329 i = tolower (argptr[2]);
337 catgets(dtsearch_catd, MS_dtsrclean, 177,
338 "%s Invalid %.2s argument.\n"),
339 PROGNAME"177", argptr);
345 validation_mode = TRUE;
346 if (argptr[2] != '\0') {
347 if ((frecids = fopen (argptr + 2, "w")) == NULL) {
349 catgets(dtsearch_catd, MS_dtsrclean, 802,
350 "%s Unable to open '%s' to output"
351 " unreferenced d00 records:\n %s\n"),
352 PROGNAME"802", argptr, strerror(errno));
356 strftime (datestr, sizeof (datestr),
357 "%Y/%m/%d %H:%M", localtime (&stamp));
362 recs_per_dot = (DtSrINT32) atol (argptr + 2);
363 if (recs_per_dot <= 0)
368 tempsize = atol (argptr + 2);
371 max_corruption = tempsize;
376 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 159,
377 "%s Unknown argument: '%s'.\n"),
378 PROGNAME"159", argptr);
382 } /* end parse of cmd line args */
384 /* Test how we broke loop.
385 * There should still be 2 args past the ones
386 * beginning with a dash: dbname and newpath.
390 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 210,
391 "%s Missing required dbname argument.\n"),
394 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 211,
395 "%s Missing required newpath argument.\n"),
398 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 212,
399 "%s Too many arguments.\n"),
404 end_of_job (2, SHOW_USAGE);
407 arg_dbname = argv[0];
408 if (strlen (arg_dbname) > 8) {
409 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 229,
410 "%s Invalid database name '%s'.\n"),
411 PROGNAME"229", arg_dbname);
412 end_of_job (2, SHOW_USAGE);
416 * Oldpath and newpath are validated when the files
417 * are copied and the database is opened.
419 arg_newpath = argv[1];
421 } /* user_args_processor() */
424 /************************************************/
426 /* validation_error() */
428 /************************************************/
429 /* Subroutine of validation_mode in main().
430 * Prints d2x and d99 data at location of error.
431 * Adjusts d2x counts for number of good addrs and free slots.
433 static void validation_error (DB_ADDR dbaorig)
436 is_valid_dba = FALSE;
440 /* now efim retranslates back to real dba */
442 slot = ((slot + 1) * recslots - dba_offset)
445 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 6,
446 " DBA = %d:%ld (x%02x:%06lx), orig addr val = x%08lx\n"
447 " Word='%c%s' offset=%ld addrs=%ld free=%d\n") ,
448 OR_D00, slot, OR_D00, slot, dbaorig,
449 (!isgraph (d23old.or_hwordkey[0])) ? '^' : d23old.or_hwordkey[0],
450 d23old.or_hwordkey + 1, d23old.or_hwoffset,
451 d23old.or_hwaddrs, d23old.or_hwfree);
452 if (--d23new.or_hwaddrs < 0L)
453 d23new.or_hwaddrs = 0L;
454 /* (should never occur) */
458 } /* validation_error() */
461 /************************************************/
465 /************************************************/
466 static void open_all_files
467 (FILE ** fp, char *fname, char *mode, size_t * size, int *oops) {
468 struct stat fstatbuf;
470 if ((*fp = fopen (fname, mode)) == NULL) {
471 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 439,
472 "%s Can't open %s: %s\n"),
473 PROGNAME"439", fname, strerror (errno));
477 if (fstat (fileno (*fp), &fstatbuf) == -1) {
478 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 440,
479 "%s Can't access status of %s: %s\n"),
480 PROGNAME"440", fname, strerror (errno));
485 if ((*size = fstatbuf.st_size) <= 0L) {
486 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 499,
487 "%s %s is empty.\n"),
488 PROGNAME"499", fname);
492 } /* open_all_files() */
495 /************************************************/
497 /* copy_old_d2x_to_new */
499 /************************************************/
500 static void copy_old_d2x_to_new
501 (char *fname_old, char *fname_new, FILE * fp_old, FILE * fp_new) {
502 char readbuf[1024 + 32];
505 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 7,
506 "%s: Copying from old d2x files to %s...\n") ,
507 aa_argv0, fname_new);
508 for (;;) { /* loop ends when eof set on input stream */
510 i = fread (readbuf, 1, sizeof (readbuf), fp_old);
511 /* byte swap not required on pure copy operation */
513 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 517,
514 "%s Read error on %s: %s.\n"),
515 PROGNAME"517", fname_old, strerror (errno));
516 end_of_job (3, SHOW_EXITCODE);
518 j = fwrite (readbuf, 1, i, fp_new);
520 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 489,
521 "%s Write error on %s: %s.\n"),
522 PROGNAME"489", fname_new, strerror (errno));
523 end_of_job (3, SHOW_EXITCODE);
526 end_of_job (shutdown_now, SHOW_EXITCODE);
534 } /* copy_old_d2x_to_new() */
537 /********************************/
541 /********************************/
542 /* Performs vista RECREAD on curr word record.
543 * CALLER SHOULD CHECK DB_STATUS.
545 void read_d2x (struct or_hwordrec * glob_word, long field)
547 if (field == OR_SWORDKEY) {
548 RECREAD (PROGNAME "061", &d21old, 0);
549 if (db_status != S_OKAY)
551 strncpy (glob_word->or_hwordkey, d21old.or_swordkey,
553 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
554 glob_word->or_hwoffset = ntohl (d21old.or_swoffset);
555 glob_word->or_hwfree = ntohl (d21old.or_swfree);
556 glob_word->or_hwaddrs = ntohl (d21old.or_swaddrs);
558 else if (field == OR_LWORDKEY) {
559 RECREAD (PROGNAME "069", &d22old, 0);
560 if (db_status != S_OKAY)
562 strncpy (glob_word->or_hwordkey, d22old.or_lwordkey,
564 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
565 glob_word->or_hwoffset = ntohl (d22old.or_lwoffset);
566 glob_word->or_hwfree = ntohl (d22old.or_lwfree);
567 glob_word->or_hwaddrs = ntohl (d22old.or_lwaddrs);
570 RECREAD (PROGNAME "078", glob_word, 0);
571 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
572 NTOHL (glob_word->or_hwoffset);
573 NTOHL (glob_word->or_hwfree);
574 NTOHL (glob_word->or_hwaddrs);
580 /********************************/
584 /********************************/
585 /* performs vista RECWRITE on curr word record.
586 * CALLER MUST CHECK DB_STATUS.
588 static void write_d2x (struct or_hwordrec * glob_word, long field)
590 if (field == OR_SWORDKEY) {
591 strcpy (d21new.or_swordkey, glob_word->or_hwordkey);
592 d21new.or_swoffset = htonl (glob_word->or_hwoffset);
593 d21new.or_swfree = htonl (glob_word->or_hwfree);
594 d21new.or_swaddrs = htonl (glob_word->or_hwaddrs);
595 RECWRITE (PROGNAME "102", &d21new, 0);
597 else if (field == OR_LWORDKEY) {
598 strcpy (d22new.or_lwordkey, glob_word->or_hwordkey);
599 d22new.or_lwoffset = htonl (glob_word->or_hwoffset);
600 d22new.or_lwfree = htonl (glob_word->or_hwfree);
601 d22new.or_lwaddrs = htonl (glob_word->or_hwaddrs);
602 RECWRITE (PROGNAME"112", &d22new, 0);
605 HTONL (glob_word->or_hwoffset);
606 HTONL (glob_word->or_hwfree);
607 HTONL (glob_word->or_hwaddrs);
608 RECWRITE (PROGNAME "115", glob_word, 0);
615 /************************************************/
619 /************************************************/
620 /* The garbage collection/compression process itself.
621 * For very large databases, there will be appx 3 million word records,
622 * so the loop should be coded for ***EFFICIENCY***.
624 static void copy_new_d99 (long keyfield)
628 DtSrINT32 slots_left;
629 unsigned char *bvptr;
631 DB_ADDR dba, dbaorig;
635 DtSrINT32 good_addrs_left;
636 DtSrINT32 good_addrs_this_block;
637 DtSrINT32 num_reads, num_writes;
638 DB_ADDR word_addrs[MAX_REC_READ + 64]; /* d99 read buf */
639 DB_ADDR word_addrs_out[MAX_REC_READ + 64]; /* d99 write buf */
641 KEYFRST (PROGNAME "179", keyfield, 0);
642 while (db_status == S_OKAY) {
643 read_d2x (&d23new, keyfield);
644 if (validation_mode) /* save for validation err msgs */
645 memcpy (&d23old, &d23new, sizeof (d23old));
648 * Read old d99 file at specified offset to get total num
649 * "holes". In the first portion of record holes are filled
650 * with representations of valid database addresses +
651 * statistical weights. In the second portion the holes are
652 * "free slots" for future expansion which are
653 * conventionally initialized with a -1.
655 /* force number of free slots to 0(ZERO) */
656 d23new.or_hwfree = 0;
657 fseek (fp_d99_old, d23new.or_hwoffset, SEEK_SET);
658 num_holes = d23new.or_hwaddrs + d23new.or_hwfree;
659 good_addrs_left = d23new.or_hwaddrs;
660 bytes_in += sizeof (DB_ADDR) * num_holes;
662 /* Update the offset in the d2x record buffer */
663 d23new.or_hwoffset = ftell (fp_d99_new);
666 * Copy the array of holes in each disk block, reading the
667 * old and writing to the new. Loop ends when the number
668 * of holes left will fit into one last block.
671 while (!done) { /* loop on each block in this word */
672 if (num_holes > MAX_REC_READ) {
673 num_reads = MAX_REC_READ;
674 num_holes -= MAX_REC_READ;
678 num_reads = num_holes;
681 fread (word_addrs, sizeof(DB_ADDR), (size_t)num_reads, fp_d99_old);
684 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 657,
685 "%s Read error on %s: %s.\n"),
686 PROGNAME"657", fname_d99_old, strerror (errno));
687 end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE);
689 /* Note BYTE_SWAP only needed for validation_mode.
690 * If not validating, we're just going to copy
691 * the network format dba's as is directly to
696 * Addrs on d99 are now 'record numbers' not dbas. A
697 * rec# is what the dba/slot# would be if records took
698 * up just one slot and there were no dbrec at start of
699 * file. D99 rec#s start at #1, not #0.
703 * If user requested validation_mode, validate each
704 * 'good' rec# (not free slots) in word_addrs buffer.
705 * If any d99 links are corrupt, skip them when copying
706 * to the new d99 file. Rewrite -1's to all free slots.
707 * ----> NOTE UNUSUAL FORMAT OF DBA HOLES IN D99! <----
708 * Record number is shifted to the high order 3 bytes.
709 * The statistical weight is in the low order byte. The
710 * vista file number is known from the #define constant
711 * OR_D00, and the vista dba/slot# is mapped from rec#
712 * by mult/div number of slots per rec, plus/minus
715 if (validation_mode) {
717 for (swapx = 0; swapx < num_reads; swapx++)
718 NTOHL (word_addrs[swapx]);
720 /* set x to number of good addrs in this block */
721 if (good_addrs_left > num_reads) {
723 good_addrs_left -= num_reads;
731 * Validate the rec#'s in this block. Note that
732 * the loop is skipped if the entire block is free
735 good_addrs_this_block = 0;
736 for (a = 0; a < x; a++) { /* a = index to curr dba */
738 * Get rec#. Save original rec# for err msgs,
739 * then shift slot number to lower 3 bytes,
742 dbaorig = word_addrs[a]; /* rec#,rec#,rec#:wt */
743 dba = dbaorig >> 8; /* 0,rec#,rec#,rec# */
744 is_valid_dba = TRUE; /* default */
747 * If original rec# == -1 we've overrun the
748 * good rec#'s into the expansion area, which
749 * is filled with -1's. This is real bad news
750 * because if the counts in d02 are bad, the
751 * online programs will quickly crash, and we
752 * can't continue this program. Advance to next
753 * rec# because we can't mark the bit vector.
755 if (dbaorig == -1L) {
758 catgets(dtsearch_catd, MS_dtsrclean, 111,
759 "*** %s DBA in d99 = -1. "
760 "Probable overrun into expansion\n"
761 " area due to incorrect count values "
764 validation_error (dbaorig);
766 if (max_corruption > 0L &&
767 corruption_count >= max_corruption)
768 end_of_job (91, SHOW_PROGRESS + SHOW_EXITCODE);
769 continue; /* skip the bit vector
774 * If slot number > max totrecs, we have a
775 * corrupted d99-d00 link because we've already
776 * validated the d00 file and we know that it
777 * has no slots > max. Also we have to advance
778 * to next slot because we can't mark the bit
781 /******if (dba >= max_totrecs)*******/
782 if (dba >= total_num_addrs) {
785 catgets(dtsearch_catd, MS_dtsrclean, 222,
786 "*** %s DBA in d99 not in d00,"
787 " slot > max num docs.\n"),
789 validation_error (dbaorig);
791 if (max_corruption > 0L &&
792 corruption_count >= max_corruption)
793 end_of_job (92, SHOW_PROGRESS + SHOW_EXITCODE);
794 continue; /* skip the bit vector check */
798 * Verify that dba exists in d00 file (test bit
799 * #1). If not, mark bit #3 (3rd lowest) in
800 * nibble and print error msg unless bit #3
803 bvptr = bit_vector + (dba >> 1);
804 is_odd_nibble = (dba & 1L);
805 if (!(*bvptr & ((is_odd_nibble) ? 0x01 : 0x10))) {
807 if (!(*bvptr & ((is_odd_nibble) ? 0x04 : 0x40))) {
809 *bvptr |= (is_odd_nibble) ? 0x04 : 0x40;
812 catgets(dtsearch_catd, MS_dtsrclean, 333,
813 "*** %s DBA in d99 does not exist in d00.\n"),
815 validation_error (dbaorig);
817 if (max_corruption > 0L &&
818 corruption_count >= max_corruption)
819 end_of_job (93, SHOW_PROGRESS + SHOW_EXITCODE);
820 } /* endif where corrupt link
825 * Mark bit #2 in bit vector indicating a d99
828 *bvptr |= (is_odd_nibble) ? 0x02 : 0x20; /* bit #2 */
831 * move good dba to curr output block, incr
835 word_addrs_out[good_addrs_this_block++] = dbaorig;
837 } /* end validation loop for each good dba in
841 * Write out only validated addrs in current block.
842 * If this was the last block, fill out all the
843 * free slots, if any, with -1 values, and exit the
844 * dba loop for this word.
846 if (good_addrs_this_block > 0) {
848 for (swapx = 0; swapx < good_addrs_this_block; swapx++)
849 NTOHL (word_addrs_out[swapx]);
851 num_writes = fwrite (word_addrs_out, sizeof (DB_ADDR),
852 (size_t)good_addrs_this_block, fp_d99_new);
853 if (num_writes != good_addrs_this_block)
856 if (good_addrs_left <= 0) {
858 * Write blocks of -1s until new d2x free slot
859 * count is exhausted. The last block may be <
862 slots_left = d23new.or_hwfree;
863 while (slots_left > 0) {
865 * set x to number of -1's to write for
868 if (slots_left > MAX_REC_READ) {
870 slots_left -= MAX_REC_READ;
876 for (a = 0; a < x; a++)
877 word_addrs_out[a] = (DtSrINT32) -1;
878 /* BYTE_SWAP not required for foxes */
879 num_writes = fwrite (word_addrs_out,
880 sizeof(DB_ADDR), (size_t)x, fp_d99_new);
883 } /* end while loop to write out all -1's */
886 } /* endif for validation_mode for this block */
889 * If NOT in validation mode, just write out the new
890 * d99 block as an exact copy of the input block.
891 * BYTE_SWAP not required because word_addrs is
892 * still in its original network order from the fread.
895 num_writes = fwrite (word_addrs, sizeof(DB_ADDR),
896 (size_t)num_reads, fp_d99_new);
897 if (num_writes != num_reads) {
900 catgets(dtsearch_catd, MS_dtsrclean, 665,
901 "%s Write error on %s: %s.\n"),
902 PROGNAME"665", fname_d99_new, strerror(errno));
903 end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE);
905 } /* endelse for NOT validation_mode for this block */
907 } /* end loop for all blocks for this entire word
910 /* write the updated d2x record */
911 write_d2x (&d23new, keyfield);
915 * Every now and then print a dot. Print complete progress
916 * msg after DOTS_PER_MSG dots.
918 if (!(reccount % recs_per_dot)) {
919 if (++dot_count > DOTS_PER_MSG) {
921 print_progress ("Progress");
924 fputc ('.', aa_stderr);
925 need_linefeed = TRUE;
926 if (!(dot_count % 10L))
927 fputc (' ', aa_stderr);
930 } /* end of print-a-dot */
933 end_of_job (shutdown_now, SHOW_PROGRESS + SHOW_EXITCODE);
934 KEYNEXT (PROGNAME "196", keyfield, 0);
935 } /* end of main loop on each word in database */
939 } /* copy_new_d99() */
942 /************************************************/
946 /************************************************/
947 int main (int argc, char *argv[])
951 unsigned char *bvptr;
952 DB_ADDR dba, dba1, dbaorig;
954 char fname_d21_new[1024];
955 char fname_d21_old[1024];
956 char fname_d22_new[1024];
957 char fname_d22_old[1024];
958 char fname_d23_new[1024];
959 char fname_d23_old[1024];
960 FILE *fp_d21_new = NULL;
961 FILE *fp_d21_old = NULL;
962 FILE *fp_d22_new = NULL;
963 FILE *fp_d22_old = NULL;
964 FILE *fp_d23_new = NULL;
965 FILE *fp_d23_old = NULL;
966 char full_dbname_old[1024];
967 char full_dbname_new[1024];
968 DtSrINT32 max_bitvec = 0L;
971 char readbuf[1024 + 32];
972 unsigned long reads_per_dot;
973 char recidbuf[DtSrMAX_DB_KEYSIZE + 4];
976 struct or_dbrec dbrec;
979 setlocale (LC_ALL, "");
980 dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
983 strftime (dbfpath, sizeof (dbfpath), /* just use any ol' buffer */
984 catgets (dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"),
985 localtime (&starttime));
986 printf ( catgets(dtsearch_catd, MS_dtsrclean, 11,
987 "%s Version %s. Run %s.\n") ,
988 aa_argv0, AUSAPI_VERSION, dbfpath);
990 signal (SIGHUP, signal_shutdown);
991 signal (SIGINT, signal_shutdown);
992 signal (SIGQUIT, signal_shutdown);
993 signal (SIGTRAP, signal_shutdown);
994 signal (SIGKILL, signal_shutdown); /* this cannot be trapped */
995 signal (SIGALRM, signal_shutdown);
996 signal (SIGTERM, signal_shutdown);
997 signal (SIGPWR, signal_shutdown);
999 signal (SIGXCPU, signal_shutdown);
1000 signal (SIGDANGER, signal_shutdown);
1003 user_args_processor (argc, argv);
1005 /* In order to find old files, we have to check if
1006 * DBFPATH environment variable has been set.
1007 * Load the fully constructed DBFPATH-dbname into its own buffer.
1009 full_dbname_old[0] = '\0';
1011 if ((ptr = getenv ("DBFPATH")) != NULL) {
1014 catgets(dtsearch_catd, MS_dtsrclean, 12,
1015 "%s: Ignoring empty DBFPATH environment variable.\n") ,
1018 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 13,
1019 "%s: Using DBFPATH = '%s'.\n") ,
1021 strcpy (full_dbname_old, ptr);
1023 /* Ensure that DBFPATH ends in a slash. */
1024 ptr = strchr (full_dbname_old, '\0');
1025 if (*(ptr - 1) != LOCAL_SLASH) {
1026 *ptr++ = LOCAL_SLASH;
1029 strcpy (dbfpath, full_dbname_old);
1033 /* Currently full_dbname_old contains just the path.
1034 * Similarly, build just path name for the 2 new files
1035 * using full_dbname_new as a buffer.
1036 * Verify they don't both refer to the same directory.
1038 strcpy (full_dbname_new, arg_newpath);
1039 ptr = strchr (full_dbname_new, '\0');
1040 if (*(ptr - 1) != LOCAL_SLASH) {
1041 *ptr++ = LOCAL_SLASH;
1044 if (strcmp (full_dbname_old, full_dbname_new) == 0) {
1045 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 393,
1046 "%s Old and new directories are identical: '%s'.\n"),
1047 PROGNAME"393", full_dbname_old);
1048 end_of_job (2, SHOW_USAGE);
1051 /* Complete full_dbname_old by appending dbname to the path prefix.
1052 * Then build full path/file names for all 4 files.
1054 strcat (full_dbname_old, arg_dbname);
1055 strcat (full_dbname_new, arg_dbname);
1056 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 14,
1057 "%s: Old files: '%s.d2x, .d99'.\n") ,
1058 aa_argv0, full_dbname_old);
1059 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 15,
1060 "%s: New files: '%s.d2x, .d99'.\n") ,
1061 aa_argv0, full_dbname_new);
1063 strcpy (fname_d99_old, full_dbname_old);
1064 strcat (fname_d99_old, ".d99");
1065 strcpy (fname_d21_old, full_dbname_old);
1066 strcat (fname_d21_old, ".d21");
1067 strcpy (fname_d22_old, full_dbname_old);
1068 strcat (fname_d22_old, ".d22");
1069 strcpy (fname_d23_old, full_dbname_old);
1070 strcat (fname_d23_old, ".d23");
1071 strcpy (fname_d99_new, full_dbname_new);
1072 strcat (fname_d99_new, ".d99");
1073 strcpy (fname_d21_new, full_dbname_new);
1074 strcat (fname_d21_new, ".d21");
1075 strcpy (fname_d22_new, full_dbname_new);
1076 strcat (fname_d22_new, ".d22");
1077 strcpy (fname_d23_new, full_dbname_new);
1078 strcat (fname_d23_new, ".d23");
1080 /* If the user hasn't already authorized overwriting preexisting files,
1081 * check new directory and if new files already exist,
1082 * ask permission to overwrite.
1085 oops = FALSE; /* TRUE forces a user prompt */
1086 if ((fp_d99_new = fopen (fname_d99_new, "r")) != NULL) {
1087 fclose (fp_d99_new);
1090 if ((fp_d21_new = fopen (fname_d21_new, "r")) != NULL) {
1091 fclose (fp_d21_new);
1094 if ((fp_d22_new = fopen (fname_d22_new, "r")) != NULL) {
1095 fclose (fp_d22_new);
1098 if ((fp_d23_new = fopen (fname_d23_new, "r")) != NULL) {
1099 fclose (fp_d23_new);
1103 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 24,
1104 "%s: One or more new files already exist.\n") ,
1107 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 463,
1108 "%s Command line argument disallows file overlay.\n"),
1110 end_of_job (2, SHOW_EXITCODE);
1112 fputs (catgets(dtsearch_catd, MS_dtsrclean, 45,
1113 " Is it ok to overlay files in new directory? [y/n] "),
1117 fgets (readbuf, sizeof(readbuf), stdin);
1118 if (strlen(readbuf) && readbuf[strlen(readbuf)-1] == '\n')
1119 readbuf[strlen(readbuf)-1] = '\0';
1121 if (tolower (*readbuf) != 'y')
1122 end_of_job (2, SHOW_NOTHING);
1124 } /* end of check for overlaying new files */
1126 /* Open all files. The d2x's are opened so that the old ones
1127 * can be copied into the new directory before starting
1128 * the garbage collection process proper.
1129 * The d99's are opened now just to verify permissions.
1131 oops = FALSE; /* TRUE ends job, but only after trying all 4 files */
1132 open_all_files (&fp_d21_old, fname_d21_old, "rb", &size_d21_old, &oops);
1133 open_all_files (&fp_d22_old, fname_d22_old, "rb", &size_d22_old, &oops);
1134 open_all_files (&fp_d23_old, fname_d23_old, "rb", &size_d23_old, &oops);
1135 open_all_files (&fp_d99_old, fname_d99_old, "rb", &size_d99_old, &oops);
1136 open_all_files (&fp_d21_new, fname_d21_new, "wb", NULL, &oops);
1137 open_all_files (&fp_d22_new, fname_d22_new, "wb", NULL, &oops);
1138 open_all_files (&fp_d23_new, fname_d23_new, "wb", NULL, &oops);
1139 open_all_files (&fp_d99_new, fname_d99_new, "wb", NULL, &oops);
1142 end_of_job (shutdown_now, SHOW_EXITCODE);
1144 end_of_job (2, SHOW_EXITCODE);
1146 /* Copy old d2x files to new directory.
1147 * Database will open using new files so only they will be changed.
1149 copy_old_d2x_to_new (fname_d21_old, fname_d21_new, fp_d21_old, fp_d21_new);
1150 copy_old_d2x_to_new (fname_d22_old, fname_d22_new, fp_d22_old, fp_d22_new);
1151 copy_old_d2x_to_new (fname_d23_old, fname_d23_new, fp_d23_old, fp_d23_new);
1153 /* Open database, but use new d2x files for updates. */
1154 RENFILE (PROGNAME"1102", arg_dbname, OR_D21, fname_d21_new);
1155 RENFILE (PROGNAME"1104", arg_dbname, OR_D22, fname_d22_new);
1156 RENFILE (PROGNAME"1106", arg_dbname, OR_D23, fname_d23_new);
1157 if (!austext_dopen (arg_dbname, (dbfpath[0] == 0) ? NULL : dbfpath,
1159 puts (DtSearchGetMessages ());
1160 end_of_job (3, SHOW_EXITCODE);
1163 /* This is where efim changed real dba to
1164 * record number (still called dba)
1166 RECFRST (PROGNAME "1067", OR_OBJREC, 0);
1167 CRGET (PROGNAME "1068", &dba, 0); /* dba of first real obj
1169 recslots = dbrec.or_recslots; /* vista slots per obj
1171 dba_offset = recslots - (dba & 0xffffff); /* accounts for dbrec */
1173 /* total_num_addrs = what reccount would be if
1174 * all holes were filled with good records.
1176 total_num_addrs = (dbrec.or_maxdba - (dba & 0xffffff) + 1) / recslots + 1;
1177 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 25,
1178 "%s: curr reccnt=%ld, mxdba=%ld, sl/rec=%ld, tot#adr=%ld.\n") ,
1179 aa_argv0, (long)dbrec.or_reccount, (long)dbrec.or_maxdba,
1180 (long)dbrec.or_recslots, (long)total_num_addrs);
1182 /* Initialize validation_mode (checkd99) */
1183 if (validation_mode) {
1185 * Allocate and initialize a bit vector: 4 bits for every
1186 * possible d00 database address.
1188 max_bitvec = (total_num_addrs >> 1) + 2;
1189 if ((bit_vector = malloc ((size_t)max_bitvec + 64)) == NULL) {
1190 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 465,
1191 "%s WARNING: Can't allocate memory for bit vector.\n"
1192 " 'Validate' mode switched off.\n"),
1194 validation_mode = FALSE;
1195 normal_exitcode = 1; /* warning */
1196 goto EXIT_INIT_VALIDATION;
1198 memset (bit_vector, 0, (size_t)max_bitvec);
1201 * Read every d00 rec sequentially. 1 in bit #1 (lowest
1202 * order) in bit vector means record (dba) exists in d00
1203 * file. While we're at it, count the total number of
1206 x = dbrec.or_reccount / 50 + 1; /* x = recs per dot */
1207 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 26,
1208 "%s: Reading d00 file. Each dot appx %ld database documents...\n"),
1212 RECFRST (PROGNAME "534", OR_OBJREC, 0);
1213 while (db_status == S_OKAY) {
1214 CRREAD (PROGNAME "617", OR_OBJKEY, recidbuf, 0);
1216 /* print periodic progress dots */
1217 if (!(++reccount % x)) {
1218 fputc ('.', aa_stderr);
1219 need_linefeed = TRUE;
1220 if (!(++dot_count % 10L))
1221 fputc (' ', aa_stderr);
1226 * Get dba and record number and confirm it will not
1227 * overflow bit vector.
1229 CRGET (PROGNAME "537", &dba, 0);
1230 dba &= 0x00ffffff; /* mask out file number in high order byte */
1231 dba1 = (dba + dba_offset) / recslots; /* ="rec number", base 1 */
1232 if (dba1 >= total_num_addrs) {
1234 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 561,
1235 "%s DBA '%d:%ld' (rec #%ld) in d00 exceeds "
1236 "total num addrs %ld;\n"
1237 " Bit vector overflow because maxdba %ld"
1238 " in dbrec is incorrect.\n"),
1239 PROGNAME"561", OR_D00, (long)dba, (long)dba1,
1240 (long)total_num_addrs, (long)dbrec.or_maxdba);
1241 end_of_job (7, SHOW_EXITCODE);
1244 end_of_job (shutdown_now, SHOW_EXITCODE);
1247 * Set bit #1 of even or odd nibble to indicate that
1248 * this record *number* actually exists in d00 file.
1250 bit_vector[dba1 >> 1] |= (dba1 & 1L) ? 0x01 : 0x10;
1252 RECNEXT (PROGNAME "541", 0);
1253 } /* end of sequential read thru d00 file */
1255 TERMINATE_LINE (); /* end the dots... */
1257 /* confirm that RECCOUNT record holds the correct number */
1258 if (dbrec.or_reccount == reccount) {
1260 catgets(dtsearch_catd, MS_dtsrclean, 27,
1261 "%s: Confirmed %ld DOCUMENTS in %s.d00.\n") ,
1262 aa_argv0, (long)dbrec.or_reccount, arg_dbname);
1265 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 28,
1266 "%s: %ld DOCUMENTS actually in %s.d00 not ="
1267 " %ld count stored there.\n"
1268 " Count will be corrected in new d00 file.\n") ,
1269 aa_argv0, (long)reccount, arg_dbname, (long)dbrec.or_reccount);
1270 dbrec.or_reccount = reccount;
1271 rewrite_reccount = TRUE;
1274 EXIT_INIT_VALIDATION:;
1275 } /* end of validation_mode initialization */
1277 /* initialize main loop */
1281 dot_count = DOTS_PER_MSG; /* force initial msg after first
1284 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 29,
1285 "%s: Compressing into %s. Each dot appx %lu words...\n") ,
1286 aa_argv0, arg_newpath, (unsigned long)recs_per_dot);
1288 /* write New Header Information to a new d99 file */
1289 init_header (fp_d99_new, &fl_hdr);
1291 /* Sequentially read each word key file in big loop.
1292 * For each word, read the d99.
1293 * In validation mode check the dbas.
1294 * If not validating, just blindly rewrite the old d99 to the new one.
1295 * If validating only write good dba's and mark the bit vector.
1297 copy_new_d99 (OR_SWORDKEY);
1298 copy_new_d99 (OR_LWORDKEY);
1299 copy_new_d99 (OR_HWORDKEY);
1303 end_of_job (50, SHOW_PROGRESS + SHOW_EXITCODE);
1305 print_progress ("Final");
1307 /* If validation_mode requested, traverse bit vector and print out
1308 * table of each d00 record which cannot be accessed from any d99 word.
1309 * If a validation file name was provided, write out a line for each
1310 * bad reecord in alebeniz-compatible format.
1312 if (validation_mode) {
1313 for (x = 0, bvptr = bit_vector; x < max_bitvec; x++, bvptr++) {
1314 for (j = 0; j < 8; j += 4) { /* j = 0 or 4, amount of
1316 /* a = bits #1 and #2 of current nibble */
1317 a = 0x30 & (*bvptr << j);
1319 /* if dba is in d00 but not in d99... */
1320 if (a & 0x10 && !(a & 0x20)) {
1321 /* ...construct valid vista dba */
1324 dbaorig++; /* slot number */
1325 /*** dba = dbaorig | (OR_D00 << 24); ***//* r
1328 /* now efim retranslates back to real dba */
1329 dba = ((dbaorig + 1) * recslots - dba_offset)
1332 /* ...print out err msg */
1333 CRSET (PROGNAME "734", &dba, 0);
1334 CRREAD (PROGNAME "735", OR_OBJKEY, readbuf, 0);
1336 catgets(dtsearch_catd, MS_dtsrclean, 444,
1337 "*** %s d00 record '%s' is not referenced in d99.\n"
1338 " DBA = %d:%ld (x%02x:%06lx).\n") ,
1339 PROGNAME"444", readbuf, OR_D00,
1340 (long)dba, OR_D00, (long)dba);
1342 /*...if albeniz compatible output requested, do it */
1344 fprintf (frecids, DISCARD_FORMAT, arg_dbname,
1345 readbuf, "MrClean", datestr);
1349 if (max_corruption > 0L &&
1350 corruption_count >= max_corruption)
1351 end_of_job (94, SHOW_EXITCODE);
1352 } /* endif where d00 is not referenced by d99 */
1353 } /* end forloop: every 2 bits in a bitvector byte */
1354 } /* end forloop: every byte in bitvector */
1357 /* Normal_exitcode currently will contain either a 0 or a 1.
1358 * If we were uncorrupting the d99 and found any corrupt links,
1359 * make sure it's 1 (warning). If there were corrupt links and
1360 * we weren't trying to uncorrupt it, change it to a hard error.
1362 /***by the way, corruption_count can be > 0 only if in validation_mode.**/
1363 if (corruption_count > 0L) {
1364 if (validation_mode)
1365 normal_exitcode = 1;
1367 normal_exitcode = 90;
1369 end_of_job (normal_exitcode, SHOW_EXITCODE);
1372 /*************************** DTSRCLEAN.C ****************************/