2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * COMPONENT_NAME: austext
26 * FUNCTIONS: TERMINATE_LINE
43 * (C) COPYRIGHT International Business Machines Corp. 1993,1995
45 * Licensed Materials - Property of IBM
46 * US Government Users Restricted Rights - Use, duplication or
47 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
49 /*************************** DTSRCLEAN.C ****************************
50 * $TOG: dtsrclean.c /main/9 1998/04/17 11:23:57 mgreess $
51 * Does garbage collection (ie compression) of .d99 file.
52 * Optionally verifies all database addresses in d99.
53 * Modification of clndtbs.c and checkd99.c.
54 * Does NOT use austext engine so this must be modified if schema changes.
57 * All command input is on command line. Reads existing d2x and d99 files.
60 * New .d2x and .d99 files are placed into the directory specified by user.
62 * EXIT CODE STANDARDS:
64 * 1 = warnings, but output should be ok.
65 * 2 = failure in cmd line parse or other initialization; job never started.
66 * 3 - 49 = fatal error, but output may be acceptable.
67 * 50 - 99 = fatal error and output files are probably unusable.
68 * (In this program, even input may be corrupted).
69 * 100+ = aborting due to asynchronous interrupt signal.
70 * Output files may or may not be unusable.
73 * Revision 2.4 1996/05/08 16:20:50 miker
74 * Added RENFILEs for new d2x files; austext_dopen no longer does.
76 * Revision 2.3 1996/02/01 18:13:06 miker
77 * Deleted BETA definition.
79 * Revision 2.2 1995/10/26 14:51:08 miker
80 * Renamed from mrclean.c. Added prolog.
83 * Revision 2.1 1995/09/22 21:18:52 miker
84 * Freeze DtSearch 0.1, AusText 2.1.8
86 * Revision 1.11 1995/09/05 18:16:46 miker
87 * Name, msg, and other minor changes for DtSearch..
88 * Print messages if austext_dopen() fails.
90 * Revision 1.10 1995/06/02 15:52:42 miker
91 * Cleaned up -m and bit vector overflow msgs.
93 * Revision 1.9 1995/05/30 19:15:58 miker
94 * Print beta char in startup banner msg.
95 * Remove -m option and max_totrecs; select bit vector
96 * size from maxdba, not reccount.
105 #include <sys/stat.h>
108 #include <sys/types.h>
109 #include <netinet/in.h>
111 #define MS_misc 1 /* msg catalog set number */
112 #define MS_dtsrclean 26 /* msg catalog set number */
113 #define DISCARD_FORMAT "%s\t\"%s\"\t%s\t%s\n" /* copied from oe.h */
114 #define RECS_PER_DOT 1000
115 #define DOTS_PER_MSG 50
116 #define DISK_BLKSIZE 512
117 #define MAX_CORRUPTION 100
118 #define MAX_REC_READ (DISK_BLKSIZE / sizeof(DB_ADDR))
120 * Max number of addresses to be read from database addresses
121 * file, ie the size of one block read from hard disk.
123 #define PROGNAME "DTSRCLEAN"
125 #define SHOW_NOTHING 0 /* bit arguments for end_of_job() */
127 #define SHOW_EXITCODE 2
128 #define SHOW_PROGRESS 4
130 #define TERMINATE_LINE() if(need_linefeed){fputc('\n',aa_stderr);need_linefeed=FALSE;}
132 /*-------------------------- GLOBALS ----------------------------*/
133 static char *arg_dbname = NULL;
134 static char *arg_newpath = NULL;
135 unsigned char *bit_vector = NULL;
136 static size_t bytes_in = 0L;
137 static size_t corruption_count = 0L;
138 static struct or_swordrec
140 static struct or_lwordrec
142 static struct or_hwordrec
144 static char datestr[32] = ""; /* "1946/04/17 13:03" */
145 static int debug_mode = FALSE;
146 static size_t dot_count = 0L;
147 char fname_d99_new[1024];
148 char fname_d99_old[1024];
149 FILE *fp_d99_new = NULL;
150 FILE *fp_d99_old = NULL;
151 static FILE *frecids = NULL;
152 static int is_valid_dba;
153 static size_t max_corruption = MAX_CORRUPTION;
154 static int normal_exitcode = 0;
155 static int need_linefeed = FALSE;
156 static int overlay_no = FALSE;
157 static int overlay_yes = FALSE;
161 recslots; /* dbrec.or_recslots promoted to INT32 */
165 recs_per_dot = RECS_PER_DOT;
166 static int rewrite_reccount = FALSE;
167 static int shutdown_now = 0; /* = FALSE */
168 static size_t size_d21_old = 0L;
169 static size_t size_d22_old = 0L;
170 static size_t size_d23_old = 0L;
171 static size_t size_d99_old = 0L;
172 static time_t timestart = 0L;
175 static int validation_mode = FALSE;
178 /********************************************************/
180 /* signal_shutdown */
182 /********************************************************/
183 /* interrupt handler for SIGINT */
184 static void signal_shutdown (int sig)
186 shutdown_now = 100 + sig;
188 } /* signal_shutdown() */
191 /************************************************/
195 /************************************************/
196 /* Prints usage statement to stderr. */
197 static void print_usage (void)
199 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 1,
200 "\nUSAGE: %s [options] <dbname> <newpath>\n"
201 " Compresses unused d99 space and validates d00-d99 links.\n"
202 " -p<N> Progress dots printed every <N> records (default %lu).\n"
203 " Complete progress message printed every %d dots.\n"
204 " -oy Authorizes overlaying preexisting d99/d2<N> files in newpath.\n"
205 " -on Forces exit if preexisting d99/d2<N> files in newpath.\n"
206 " -v Validates d99 and d00 links, uncorrupts d99 file, and ensures\n"
207 " accurate record count. Also use -c0 to uncorrupt entire database.\n"
208 " -v<fname> Same as -v but also writes all d00 recs unreferenced by d99\n"
209 " to <fname> in format suitable to extract into .fzk file format.\n"
210 " -c<N> Exits if more than <N> corrupted/incomplete links (default %d).\n"
211 " Corruption limit turned off by -c0.\n"
212 " <dbname> 1 - 8 char database name = the old d99/d2<N> files to be updated.\n"
213 " Files found in local directory or DBFPATH environment variable.\n"
214 " <newpath> Specifies where the new d99/d2<N> files will be placed.\n"
215 " If first char is not slash, path is relative to local directory.\n"
217 " 0: Complete success. 1: Warning. 2: Job never started.\n"
218 " 3-49: Job ended prematurely, old files ok, new files unusable.\n"
219 " 50-99: Fatal Error, even old database may be corrupted.\n"
220 " 100+: Ctrl-C, kill, and all other signal interrupts cause premature\n"
221 " end, new files may be unusable. Signal = exit code - 100.\n")
222 ,aa_argv0, RECS_PER_DOT, DOTS_PER_MSG, MAX_CORRUPTION);
224 } /* print_usage() */
227 /************************************************/
231 /************************************************/
232 /* Prints progress msg after dots or at end of job.
233 * Label is "Final" or "Progress".
235 static void print_progress (char *label)
240 seconds = time (NULL) - timestart; /* total seconds elapsed */
244 if ((float) bytes_in / (float) size_d99_old >= 99.5)
247 compression = (int) (100.* (float) bytes_in / (float) size_d99_old);
248 if (compression < 0 || compression > 100)
253 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 2,
254 "%s: %s Compression %d%% (about %lu KB) in %ld:%02ld min:sec.\n") ,
255 aa_argv0, label, compression, bytes_in / 1000L,
256 seconds / 60UL, seconds % 60UL);
258 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 3,
259 "%s: Counted %ld WORDS in %s.d99.\n") ,
260 aa_argv0, (long)reccount, arg_dbname);
262 } /* print_progress() */
265 /************************************************/
269 /************************************************/
270 /* Exits program. Prints status messages before going down.
271 * Should be called on even record boundaries whenever possible,
272 * ie after record writes complete and shutdown_now > 0 (TRUE).
274 static void end_of_job (int exitcode, int show_flags)
277 if (exitcode >= 100) {
278 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 66,
279 "%s Aborting after interrupt signal %d.\n"),
280 PROGNAME"66", exitcode - 100);
282 if (validation_mode && corruption_count == 0L)
283 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 4,
284 "%s: No corrupted links detected.\n") ,
286 if (corruption_count > 0L) {
287 if (max_corruption > 0L && corruption_count >= max_corruption)
288 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 193,
289 "%s Aborting at %ld corrupted links.\n"),
290 PROGNAME"193", corruption_count);
292 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 194,
293 "%s Detected%s %ld corrupted/incomplete link(s).\n"),
295 (validation_mode) ? " and corrected" : "",
298 if (show_flags & SHOW_PROGRESS) {
299 print_progress ("Final");
301 if (show_flags & SHOW_USAGE)
303 if (show_flags & SHOW_EXITCODE)
304 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 5,
305 "%s: Exit code = %d.\n") , aa_argv0, exitcode);
306 DtSearchExit (exitcode);
310 /************************************************/
312 /* user_args_processor() */
314 /************************************************/
315 /* Reads and verifies users command line arguments and
316 * converts them into internal switches and variables.
317 * Some attempt is made to read as many errors as possible
318 * before ending job for bad arguments.
320 static void user_args_processor (int argc, char **argv)
329 end_of_job (2, SHOW_USAGE);
331 /* parse all args that begin with a dash (-) */
335 if (argptr[0] != '-')
337 switch (tolower (argptr[1])) {
339 if (strcmp (argptr, "-russell") == 0) /* backdoor debug */
345 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 301,
346 "%s The -m argument is no longer necessary.\n"),
351 i = tolower (argptr[2]);
359 catgets(dtsearch_catd, MS_dtsrclean, 177,
360 "%s Invalid %.2s argument.\n"),
361 PROGNAME"177", argptr);
367 validation_mode = TRUE;
368 if (argptr[2] != '\0') {
369 if ((frecids = fopen (argptr + 2, "w")) == NULL) {
371 catgets(dtsearch_catd, MS_dtsrclean, 802,
372 "%s Unable to open '%s' to output"
373 " unreferenced d00 records:\n %s\n"),
374 PROGNAME"802", argptr, strerror(errno));
378 strftime (datestr, sizeof (datestr),
379 "%Y/%m/%d %H:%M", localtime (&stamp));
384 recs_per_dot = (DtSrINT32) atol (argptr + 2);
385 if (recs_per_dot <= 0)
390 tempsize = atol (argptr + 2);
393 max_corruption = tempsize;
398 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 159,
399 "%s Unknown argument: '%s'.\n"),
400 PROGNAME"159", argptr);
404 } /* end parse of cmd line args */
406 /* Test how we broke loop.
407 * There should still be 2 args past the ones
408 * beginning with a dash: dbname and newpath.
412 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 210,
413 "%s Missing required dbname argument.\n"),
416 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 211,
417 "%s Missing required newpath argument.\n"),
420 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 212,
421 "%s Too many arguments.\n"),
426 end_of_job (2, SHOW_USAGE);
429 arg_dbname = argv[0];
430 if (strlen (arg_dbname) > 8) {
431 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 229,
432 "%s Invalid database name '%s'.\n"),
433 PROGNAME"229", arg_dbname);
434 end_of_job (2, SHOW_USAGE);
438 * Oldpath and newpath are validated when the files
439 * are copied and the database is opened.
441 arg_newpath = argv[1];
443 } /* user_args_processor() */
446 /************************************************/
448 /* validation_error() */
450 /************************************************/
451 /* Subroutine of validation_mode in main().
452 * Prints d2x and d99 data at location of error.
453 * Adjusts d2x counts for number of good addrs and free slots.
455 static void validation_error (DB_ADDR dbaorig)
458 is_valid_dba = FALSE;
462 /* now efim retranslates back to real dba */
464 slot = ((slot + 1) * recslots - dba_offset)
467 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 6,
468 " DBA = %d:%ld (x%02x:%06lx), orig addr val = x%08lx\n"
469 " Word='%c%s' offset=%ld addrs=%ld free=%d\n") ,
470 OR_D00, slot, OR_D00, slot, dbaorig,
471 (!isgraph (d23old.or_hwordkey[0])) ? '^' : d23old.or_hwordkey[0],
472 d23old.or_hwordkey + 1, d23old.or_hwoffset,
473 d23old.or_hwaddrs, d23old.or_hwfree);
474 if (--d23new.or_hwaddrs < 0L)
475 d23new.or_hwaddrs = 0L;
476 /* (should never occur) */
480 } /* validation_error() */
483 /************************************************/
487 /************************************************/
488 static void open_all_files
489 (FILE ** fp, char *fname, char *mode, size_t * size, int *oops) {
490 struct stat fstatbuf;
492 if ((*fp = fopen (fname, mode)) == NULL) {
493 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 439,
494 "%s Can't open %s: %s\n"),
495 PROGNAME"439", fname, strerror (errno));
499 if (fstat (fileno (*fp), &fstatbuf) == -1) {
500 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 440,
501 "%s Can't access status of %s: %s\n"),
502 PROGNAME"440", fname, strerror (errno));
507 if ((*size = fstatbuf.st_size) <= 0L) {
508 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 499,
509 "%s %s is empty.\n"),
510 PROGNAME"499", fname);
514 } /* open_all_files() */
517 /************************************************/
519 /* copy_old_d2x_to_new */
521 /************************************************/
522 static void copy_old_d2x_to_new
523 (char *fname_old, char *fname_new, FILE * fp_old, FILE * fp_new) {
524 char readbuf[1024 + 32];
527 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 7,
528 "%s: Copying from old d2x files to %s...\n") ,
529 aa_argv0, fname_new);
530 for (;;) { /* loop ends when eof set on input stream */
532 i = fread (readbuf, 1, sizeof (readbuf), fp_old);
533 /* byte swap not required on pure copy operation */
535 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 517,
536 "%s Read error on %s: %s.\n"),
537 PROGNAME"517", fname_old, strerror (errno));
538 end_of_job (3, SHOW_EXITCODE);
540 j = fwrite (readbuf, 1, i, fp_new);
542 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 489,
543 "%s Write error on %s: %s.\n"),
544 PROGNAME"489", fname_new, strerror (errno));
545 end_of_job (3, SHOW_EXITCODE);
548 end_of_job (shutdown_now, SHOW_EXITCODE);
556 } /* copy_old_d2x_to_new() */
559 /********************************/
563 /********************************/
564 /* Performs vista RECREAD on curr word record.
565 * CALLER SHOULD CHECK DB_STATUS.
567 void read_d2x (struct or_hwordrec * glob_word, long field)
569 if (field == OR_SWORDKEY) {
570 RECREAD (PROGNAME "061", &d21old, 0);
571 if (db_status != S_OKAY)
573 strncpy (glob_word->or_hwordkey, d21old.or_swordkey,
575 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
576 glob_word->or_hwoffset = ntohl (d21old.or_swoffset);
577 glob_word->or_hwfree = ntohl (d21old.or_swfree);
578 glob_word->or_hwaddrs = ntohl (d21old.or_swaddrs);
580 else if (field == OR_LWORDKEY) {
581 RECREAD (PROGNAME "069", &d22old, 0);
582 if (db_status != S_OKAY)
584 strncpy (glob_word->or_hwordkey, d22old.or_lwordkey,
586 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
587 glob_word->or_hwoffset = ntohl (d22old.or_lwoffset);
588 glob_word->or_hwfree = ntohl (d22old.or_lwfree);
589 glob_word->or_hwaddrs = ntohl (d22old.or_lwaddrs);
592 RECREAD (PROGNAME "078", glob_word, 0);
593 glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
594 NTOHL (glob_word->or_hwoffset);
595 NTOHL (glob_word->or_hwfree);
596 NTOHL (glob_word->or_hwaddrs);
602 /********************************/
606 /********************************/
607 /* performs vista RECWRITE on curr word record.
608 * CALLER MUST CHECK DB_STATUS.
610 static void write_d2x (struct or_hwordrec * glob_word, long field)
612 if (field == OR_SWORDKEY) {
613 snprintf(d21new.or_swordkey, 16, "%s", glob_word->or_hwordkey);
614 d21new.or_swoffset = htonl (glob_word->or_hwoffset);
615 d21new.or_swfree = htonl (glob_word->or_hwfree);
616 d21new.or_swaddrs = htonl (glob_word->or_hwaddrs);
617 RECWRITE (PROGNAME "102", &d21new, 0);
619 else if (field == OR_LWORDKEY) {
620 snprintf(d22new.or_lwordkey, 40, "%s", glob_word->or_hwordkey);
621 d22new.or_lwoffset = htonl (glob_word->or_hwoffset);
622 d22new.or_lwfree = htonl (glob_word->or_hwfree);
623 d22new.or_lwaddrs = htonl (glob_word->or_hwaddrs);
624 RECWRITE (PROGNAME"112", &d22new, 0);
627 HTONL (glob_word->or_hwoffset);
628 HTONL (glob_word->or_hwfree);
629 HTONL (glob_word->or_hwaddrs);
630 RECWRITE (PROGNAME "115", glob_word, 0);
637 /************************************************/
641 /************************************************/
642 /* The garbage collection/compression process itself.
643 * For very large databases, there will be appx 3 million word records,
644 * so the loop should be coded for ***EFFICIENCY***.
646 static void copy_new_d99 (long keyfield)
650 DtSrINT32 slots_left;
651 unsigned char *bvptr;
653 DB_ADDR dba, dbaorig;
657 DtSrINT32 good_addrs_left;
658 DtSrINT32 good_addrs_this_block;
659 DtSrINT32 num_reads, num_writes;
660 DB_ADDR word_addrs[MAX_REC_READ + 64]; /* d99 read buf */
661 DB_ADDR word_addrs_out[MAX_REC_READ + 64]; /* d99 write buf */
663 KEYFRST (PROGNAME "179", keyfield, 0);
664 while (db_status == S_OKAY) {
665 read_d2x (&d23new, keyfield);
666 if (validation_mode) /* save for validation err msgs */
667 memcpy (&d23old, &d23new, sizeof (d23old));
670 * Read old d99 file at specified offset to get total num
671 * "holes". In the first portion of record holes are filled
672 * with representations of valid database addresses +
673 * statistical weights. In the second portion the holes are
674 * "free slots" for future expansion which are
675 * conventionally initialized with a -1.
677 /* force number of free slots to 0(ZERO) */
678 d23new.or_hwfree = 0;
679 fseek (fp_d99_old, d23new.or_hwoffset, SEEK_SET);
680 num_holes = d23new.or_hwaddrs + d23new.or_hwfree;
681 good_addrs_left = d23new.or_hwaddrs;
682 bytes_in += sizeof (DB_ADDR) * num_holes;
684 /* Update the offset in the d2x record buffer */
685 d23new.or_hwoffset = ftell (fp_d99_new);
688 * Copy the array of holes in each disk block, reading the
689 * old and writing to the new. Loop ends when the number
690 * of holes left will fit into one last block.
693 while (!done) { /* loop on each block in this word */
696 if (num_holes > MAX_REC_READ) {
697 num_reads = MAX_REC_READ;
698 num_holes -= MAX_REC_READ;
702 num_reads = num_holes;
705 ret = fread (word_addrs, sizeof(DB_ADDR), (size_t)num_reads, fp_d99_old);
706 if (errno || -1 == ret) {
708 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 657,
709 "%s Read error on %s: %s.\n"),
710 PROGNAME"657", fname_d99_old, strerror (errno));
711 end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE);
713 /* Note BYTE_SWAP only needed for validation_mode.
714 * If not validating, we're just going to copy
715 * the network format dba's as is directly to
720 * Addrs on d99 are now 'record numbers' not dbas. A
721 * rec# is what the dba/slot# would be if records took
722 * up just one slot and there were no dbrec at start of
723 * file. D99 rec#s start at #1, not #0.
727 * If user requested validation_mode, validate each
728 * 'good' rec# (not free slots) in word_addrs buffer.
729 * If any d99 links are corrupt, skip them when copying
730 * to the new d99 file. Rewrite -1's to all free slots.
731 * ----> NOTE UNUSUAL FORMAT OF DBA HOLES IN D99! <----
732 * Record number is shifted to the high order 3 bytes.
733 * The statistical weight is in the low order byte. The
734 * vista file number is known from the #define constant
735 * OR_D00, and the vista dba/slot# is mapped from rec#
736 * by mult/div number of slots per rec, plus/minus
739 if (validation_mode) {
741 for (swapx = 0; swapx < num_reads; swapx++)
742 NTOHL (word_addrs[swapx]);
744 /* set x to number of good addrs in this block */
745 if (good_addrs_left > num_reads) {
747 good_addrs_left -= num_reads;
755 * Validate the rec#'s in this block. Note that
756 * the loop is skipped if the entire block is free
759 good_addrs_this_block = 0;
760 for (a = 0; a < x; a++) { /* a = index to curr dba */
762 * Get rec#. Save original rec# for err msgs,
763 * then shift slot number to lower 3 bytes,
766 dbaorig = word_addrs[a]; /* rec#,rec#,rec#:wt */
767 dba = dbaorig >> 8; /* 0,rec#,rec#,rec# */
768 is_valid_dba = TRUE; /* default */
771 * If original rec# == -1 we've overrun the
772 * good rec#'s into the expansion area, which
773 * is filled with -1's. This is real bad news
774 * because if the counts in d02 are bad, the
775 * online programs will quickly crash, and we
776 * can't continue this program. Advance to next
777 * rec# because we can't mark the bit vector.
779 if (dbaorig == -1L) {
782 catgets(dtsearch_catd, MS_dtsrclean, 111,
783 "*** %s DBA in d99 = -1. "
784 "Probable overrun into expansion\n"
785 " area due to incorrect count values "
788 validation_error (dbaorig);
790 if (max_corruption > 0L &&
791 corruption_count >= max_corruption)
792 end_of_job (91, SHOW_PROGRESS + SHOW_EXITCODE);
793 continue; /* skip the bit vector
798 * If slot number > max totrecs, we have a
799 * corrupted d99-d00 link because we've already
800 * validated the d00 file and we know that it
801 * has no slots > max. Also we have to advance
802 * to next slot because we can't mark the bit
805 /******if (dba >= max_totrecs)*******/
806 if (dba >= total_num_addrs) {
809 catgets(dtsearch_catd, MS_dtsrclean, 222,
810 "*** %s DBA in d99 not in d00,"
811 " slot > max num docs.\n"),
813 validation_error (dbaorig);
815 if (max_corruption > 0L &&
816 corruption_count >= max_corruption)
817 end_of_job (92, SHOW_PROGRESS + SHOW_EXITCODE);
818 continue; /* skip the bit vector check */
822 * Verify that dba exists in d00 file (test bit
823 * #1). If not, mark bit #3 (3rd lowest) in
824 * nibble and print error msg unless bit #3
827 bvptr = bit_vector + (dba >> 1);
828 is_odd_nibble = (dba & 1L);
829 if (!(*bvptr & ((is_odd_nibble) ? 0x01 : 0x10))) {
831 if (!(*bvptr & ((is_odd_nibble) ? 0x04 : 0x40))) {
833 *bvptr |= (is_odd_nibble) ? 0x04 : 0x40;
836 catgets(dtsearch_catd, MS_dtsrclean, 333,
837 "*** %s DBA in d99 does not exist in d00.\n"),
839 validation_error (dbaorig);
841 if (max_corruption > 0L &&
842 corruption_count >= max_corruption)
843 end_of_job (93, SHOW_PROGRESS + SHOW_EXITCODE);
844 } /* endif where corrupt link
849 * Mark bit #2 in bit vector indicating a d99
852 *bvptr |= (is_odd_nibble) ? 0x02 : 0x20; /* bit #2 */
855 * move good dba to curr output block, incr
859 word_addrs_out[good_addrs_this_block++] = dbaorig;
861 } /* end validation loop for each good dba in
865 * Write out only validated addrs in current block.
866 * If this was the last block, fill out all the
867 * free slots, if any, with -1 values, and exit the
868 * dba loop for this word.
870 if (good_addrs_this_block > 0) {
872 for (swapx = 0; swapx < good_addrs_this_block; swapx++)
873 NTOHL (word_addrs_out[swapx]);
875 num_writes = fwrite (word_addrs_out, sizeof (DB_ADDR),
876 (size_t)good_addrs_this_block, fp_d99_new);
877 if (num_writes != good_addrs_this_block)
880 if (good_addrs_left <= 0) {
882 * Write blocks of -1s until new d2x free slot
883 * count is exhausted. The last block may be <
886 slots_left = d23new.or_hwfree;
887 while (slots_left > 0) {
889 * set x to number of -1's to write for
892 if (slots_left > MAX_REC_READ) {
894 slots_left -= MAX_REC_READ;
900 for (a = 0; a < x; a++)
901 word_addrs_out[a] = (DtSrINT32) -1;
902 /* BYTE_SWAP not required for foxes */
903 num_writes = fwrite (word_addrs_out,
904 sizeof(DB_ADDR), (size_t)x, fp_d99_new);
907 } /* end while loop to write out all -1's */
910 } /* endif for validation_mode for this block */
913 * If NOT in validation mode, just write out the new
914 * d99 block as an exact copy of the input block.
915 * BYTE_SWAP not required because word_addrs is
916 * still in its original network order from the fread.
919 num_writes = fwrite (word_addrs, sizeof(DB_ADDR),
920 (size_t)num_reads, fp_d99_new);
921 if (num_writes != num_reads) {
924 catgets(dtsearch_catd, MS_dtsrclean, 665,
925 "%s Write error on %s: %s.\n"),
926 PROGNAME"665", fname_d99_new, strerror(errno));
927 end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE);
929 } /* endelse for NOT validation_mode for this block */
931 } /* end loop for all blocks for this entire word
934 /* write the updated d2x record */
935 write_d2x (&d23new, keyfield);
939 * Every now and then print a dot. Print complete progress
940 * msg after DOTS_PER_MSG dots.
942 if (!(reccount % recs_per_dot)) {
943 if (++dot_count > DOTS_PER_MSG) {
945 print_progress ("Progress");
948 fputc ('.', aa_stderr);
949 need_linefeed = TRUE;
950 if (!(dot_count % 10L))
951 fputc (' ', aa_stderr);
954 } /* end of print-a-dot */
957 end_of_job (shutdown_now, SHOW_PROGRESS + SHOW_EXITCODE);
958 KEYNEXT (PROGNAME "196", keyfield, 0);
959 } /* end of main loop on each word in database */
963 } /* copy_new_d99() */
966 /************************************************/
970 /************************************************/
971 int main (int argc, char *argv[])
975 unsigned char *bvptr;
976 DB_ADDR dba, dba1, dbaorig;
978 char fname_d21_new[1024];
979 char fname_d21_old[1024];
980 char fname_d22_new[1024];
981 char fname_d22_old[1024];
982 char fname_d23_new[1024];
983 char fname_d23_old[1024];
984 FILE *fp_d21_new = NULL;
985 FILE *fp_d21_old = NULL;
986 FILE *fp_d22_new = NULL;
987 FILE *fp_d22_old = NULL;
988 FILE *fp_d23_new = NULL;
989 FILE *fp_d23_old = NULL;
990 char full_dbname_old[1024];
991 char full_dbname_new[1024];
992 DtSrINT32 max_bitvec = 0L;
995 char readbuf[1024 + 32];
996 unsigned long reads_per_dot;
997 char recidbuf[DtSrMAX_DB_KEYSIZE + 4];
1000 struct or_dbrec dbrec;
1003 setlocale (LC_ALL, "");
1004 dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
1007 strftime (dbfpath, sizeof (dbfpath), /* just use any ol' buffer */
1008 catgets (dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"),
1009 localtime (&starttime));
1010 printf ( catgets(dtsearch_catd, MS_dtsrclean, 11,
1011 "%s Version %s. Run %s.\n") ,
1012 aa_argv0, AUSAPI_VERSION, dbfpath);
1014 signal (SIGHUP, signal_shutdown);
1015 signal (SIGINT, signal_shutdown);
1016 signal (SIGQUIT, signal_shutdown);
1017 signal (SIGTRAP, signal_shutdown);
1018 signal (SIGKILL, signal_shutdown); /* this cannot be trapped */
1019 signal (SIGALRM, signal_shutdown);
1020 signal (SIGTERM, signal_shutdown);
1022 signal (SIGPWR, signal_shutdown);
1025 signal (SIGXCPU, signal_shutdown);
1026 signal (SIGDANGER, signal_shutdown);
1029 user_args_processor (argc, argv);
1031 /* In order to find old files, we have to check if
1032 * DBFPATH environment variable has been set.
1033 * Load the fully constructed DBFPATH-dbname into its own buffer.
1035 full_dbname_old[0] = '\0';
1037 if ((ptr = getenv ("DBFPATH")) != NULL) {
1040 catgets(dtsearch_catd, MS_dtsrclean, 12,
1041 "%s: Ignoring empty DBFPATH environment variable.\n") ,
1044 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 13,
1045 "%s: Using DBFPATH = '%s'.\n") ,
1047 snprintf(full_dbname_old, sizeof(full_dbname_old), "%s", ptr);
1049 /* Ensure that DBFPATH ends in a slash. */
1050 ptr = strchr (full_dbname_old, '\0');
1051 if (*(ptr - 1) != LOCAL_SLASH) {
1052 *ptr++ = LOCAL_SLASH;
1055 strcpy (dbfpath, full_dbname_old);
1059 /* Currently full_dbname_old contains just the path.
1060 * Similarly, build just path name for the 2 new files
1061 * using full_dbname_new as a buffer.
1062 * Verify they don't both refer to the same directory.
1064 strcpy (full_dbname_new, arg_newpath);
1065 ptr = strchr (full_dbname_new, '\0');
1066 if (*(ptr - 1) != LOCAL_SLASH) {
1067 *ptr++ = LOCAL_SLASH;
1070 if (strcmp (full_dbname_old, full_dbname_new) == 0) {
1071 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 393,
1072 "%s Old and new directories are identical: '%s'.\n"),
1073 PROGNAME"393", full_dbname_old);
1074 end_of_job (2, SHOW_USAGE);
1077 /* Complete full_dbname_old by appending dbname to the path prefix.
1078 * Then build full path/file names for all 4 files.
1080 strcat (full_dbname_old, arg_dbname);
1081 strcat (full_dbname_new, arg_dbname);
1082 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 14,
1083 "%s: Old files: '%s.d2x, .d99'.\n") ,
1084 aa_argv0, full_dbname_old);
1085 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 15,
1086 "%s: New files: '%s.d2x, .d99'.\n") ,
1087 aa_argv0, full_dbname_new);
1089 strcpy (fname_d99_old, full_dbname_old);
1090 strcat (fname_d99_old, ".d99");
1091 strcpy (fname_d21_old, full_dbname_old);
1092 strcat (fname_d21_old, ".d21");
1093 strcpy (fname_d22_old, full_dbname_old);
1094 strcat (fname_d22_old, ".d22");
1095 strcpy (fname_d23_old, full_dbname_old);
1096 strcat (fname_d23_old, ".d23");
1097 strcpy (fname_d99_new, full_dbname_new);
1098 strcat (fname_d99_new, ".d99");
1099 strcpy (fname_d21_new, full_dbname_new);
1100 strcat (fname_d21_new, ".d21");
1101 strcpy (fname_d22_new, full_dbname_new);
1102 strcat (fname_d22_new, ".d22");
1103 strcpy (fname_d23_new, full_dbname_new);
1104 strcat (fname_d23_new, ".d23");
1106 /* If the user hasn't already authorized overwriting preexisting files,
1107 * check new directory and if new files already exist,
1108 * ask permission to overwrite.
1111 oops = FALSE; /* TRUE forces a user prompt */
1112 if ((fp_d99_new = fopen (fname_d99_new, "r")) != NULL) {
1113 fclose (fp_d99_new);
1116 if ((fp_d21_new = fopen (fname_d21_new, "r")) != NULL) {
1117 fclose (fp_d21_new);
1120 if ((fp_d22_new = fopen (fname_d22_new, "r")) != NULL) {
1121 fclose (fp_d22_new);
1124 if ((fp_d23_new = fopen (fname_d23_new, "r")) != NULL) {
1125 fclose (fp_d23_new);
1129 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 24,
1130 "%s: One or more new files already exist.\n") ,
1133 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 463,
1134 "%s Command line argument disallows file overlay.\n"),
1136 end_of_job (2, SHOW_EXITCODE);
1138 fputs (catgets(dtsearch_catd, MS_dtsrclean, 45,
1139 " Is it ok to overlay files in new directory? [y/n] "),
1143 if(NULL == fgets (readbuf, sizeof(readbuf), stdin)) {
1144 fprintf (aa_stderr, "Failed to read from stdin\n");
1145 end_of_job (2, SHOW_EXITCODE);
1147 if (strlen(readbuf) && readbuf[strlen(readbuf)-1] == '\n')
1148 readbuf[strlen(readbuf)-1] = '\0';
1150 if (tolower (*readbuf) != 'y')
1151 end_of_job (2, SHOW_NOTHING);
1153 } /* end of check for overlaying new files */
1155 /* Open all files. The d2x's are opened so that the old ones
1156 * can be copied into the new directory before starting
1157 * the garbage collection process proper.
1158 * The d99's are opened now just to verify permissions.
1160 oops = FALSE; /* TRUE ends job, but only after trying all 4 files */
1161 open_all_files (&fp_d21_old, fname_d21_old, "rb", &size_d21_old, &oops);
1162 open_all_files (&fp_d22_old, fname_d22_old, "rb", &size_d22_old, &oops);
1163 open_all_files (&fp_d23_old, fname_d23_old, "rb", &size_d23_old, &oops);
1164 open_all_files (&fp_d99_old, fname_d99_old, "rb", &size_d99_old, &oops);
1165 open_all_files (&fp_d21_new, fname_d21_new, "wb", NULL, &oops);
1166 open_all_files (&fp_d22_new, fname_d22_new, "wb", NULL, &oops);
1167 open_all_files (&fp_d23_new, fname_d23_new, "wb", NULL, &oops);
1168 open_all_files (&fp_d99_new, fname_d99_new, "wb", NULL, &oops);
1171 end_of_job (shutdown_now, SHOW_EXITCODE);
1173 end_of_job (2, SHOW_EXITCODE);
1175 /* Copy old d2x files to new directory.
1176 * Database will open using new files so only they will be changed.
1178 copy_old_d2x_to_new (fname_d21_old, fname_d21_new, fp_d21_old, fp_d21_new);
1179 copy_old_d2x_to_new (fname_d22_old, fname_d22_new, fp_d22_old, fp_d22_new);
1180 copy_old_d2x_to_new (fname_d23_old, fname_d23_new, fp_d23_old, fp_d23_new);
1182 /* Open database, but use new d2x files for updates. */
1183 RENFILE (PROGNAME"1102", arg_dbname, OR_D21, fname_d21_new);
1184 RENFILE (PROGNAME"1104", arg_dbname, OR_D22, fname_d22_new);
1185 RENFILE (PROGNAME"1106", arg_dbname, OR_D23, fname_d23_new);
1186 if (!austext_dopen (arg_dbname, (dbfpath[0] == 0) ? NULL : dbfpath,
1188 puts (DtSearchGetMessages ());
1189 end_of_job (3, SHOW_EXITCODE);
1192 /* This is where efim changed real dba to
1193 * record number (still called dba)
1195 RECFRST (PROGNAME "1067", OR_OBJREC, 0);
1196 CRGET (PROGNAME "1068", &dba, 0); /* dba of first real obj
1198 recslots = dbrec.or_recslots; /* vista slots per obj
1200 dba_offset = recslots - (dba & 0xffffff); /* accounts for dbrec */
1202 /* total_num_addrs = what reccount would be if
1203 * all holes were filled with good records.
1205 total_num_addrs = (dbrec.or_maxdba - (dba & 0xffffff) + 1) / recslots + 1;
1206 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 25,
1207 "%s: curr reccnt=%ld, mxdba=%ld, sl/rec=%ld, tot#adr=%ld.\n") ,
1208 aa_argv0, (long)dbrec.or_reccount, (long)dbrec.or_maxdba,
1209 (long)dbrec.or_recslots, (long)total_num_addrs);
1211 /* Initialize validation_mode (checkd99) */
1212 if (validation_mode) {
1214 * Allocate and initialize a bit vector: 4 bits for every
1215 * possible d00 database address.
1217 max_bitvec = (total_num_addrs >> 1) + 2;
1218 if ((bit_vector = malloc ((size_t)max_bitvec + 64)) == NULL) {
1219 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 465,
1220 "%s WARNING: Can't allocate memory for bit vector.\n"
1221 " 'Validate' mode switched off.\n"),
1223 validation_mode = FALSE;
1224 normal_exitcode = 1; /* warning */
1225 goto EXIT_INIT_VALIDATION;
1227 memset (bit_vector, 0, (size_t)max_bitvec);
1230 * Read every d00 rec sequentially. 1 in bit #1 (lowest
1231 * order) in bit vector means record (dba) exists in d00
1232 * file. While we're at it, count the total number of
1235 x = dbrec.or_reccount / 50 + 1; /* x = recs per dot */
1236 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 26,
1237 "%s: Reading d00 file. Each dot appx %ld database documents...\n"),
1241 RECFRST (PROGNAME "534", OR_OBJREC, 0);
1242 while (db_status == S_OKAY) {
1243 CRREAD (PROGNAME "617", OR_OBJKEY, recidbuf, 0);
1245 /* print periodic progress dots */
1246 if (!(++reccount % x)) {
1247 fputc ('.', aa_stderr);
1248 need_linefeed = TRUE;
1249 if (!(++dot_count % 10L))
1250 fputc (' ', aa_stderr);
1255 * Get dba and record number and confirm it will not
1256 * overflow bit vector.
1258 CRGET (PROGNAME "537", &dba, 0);
1259 dba &= 0x00ffffff; /* mask out file number in high order byte */
1260 dba1 = (dba + dba_offset) / recslots; /* ="rec number", base 1 */
1261 if (dba1 >= total_num_addrs) {
1263 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 561,
1264 "%s DBA '%d:%ld' (rec #%ld) in d00 exceeds "
1265 "total num addrs %ld;\n"
1266 " Bit vector overflow because maxdba %ld"
1267 " in dbrec is incorrect.\n"),
1268 PROGNAME"561", OR_D00, (long)dba, (long)dba1,
1269 (long)total_num_addrs, (long)dbrec.or_maxdba);
1270 end_of_job (7, SHOW_EXITCODE);
1273 end_of_job (shutdown_now, SHOW_EXITCODE);
1276 * Set bit #1 of even or odd nibble to indicate that
1277 * this record *number* actually exists in d00 file.
1279 bit_vector[dba1 >> 1] |= (dba1 & 1L) ? 0x01 : 0x10;
1281 RECNEXT (PROGNAME "541", 0);
1282 } /* end of sequential read thru d00 file */
1284 TERMINATE_LINE (); /* end the dots... */
1286 /* confirm that RECCOUNT record holds the correct number */
1287 if (dbrec.or_reccount == reccount) {
1289 catgets(dtsearch_catd, MS_dtsrclean, 27,
1290 "%s: Confirmed %ld DOCUMENTS in %s.d00.\n") ,
1291 aa_argv0, (long)dbrec.or_reccount, arg_dbname);
1294 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 28,
1295 "%s: %ld DOCUMENTS actually in %s.d00 not ="
1296 " %ld count stored there.\n"
1297 " Count will be corrected in new d00 file.\n") ,
1298 aa_argv0, (long)reccount, arg_dbname, (long)dbrec.or_reccount);
1299 dbrec.or_reccount = reccount;
1300 rewrite_reccount = TRUE;
1303 EXIT_INIT_VALIDATION:;
1304 } /* end of validation_mode initialization */
1306 /* initialize main loop */
1310 dot_count = DOTS_PER_MSG; /* force initial msg after first
1313 fprintf (aa_stderr, catgets(dtsearch_catd, MS_dtsrclean, 29,
1314 "%s: Compressing into %s. Each dot appx %lu words...\n") ,
1315 aa_argv0, arg_newpath, (unsigned long)recs_per_dot);
1317 /* write New Header Information to a new d99 file */
1318 init_header (fp_d99_new, &fl_hdr);
1320 /* Sequentially read each word key file in big loop.
1321 * For each word, read the d99.
1322 * In validation mode check the dbas.
1323 * If not validating, just blindly rewrite the old d99 to the new one.
1324 * If validating only write good dba's and mark the bit vector.
1326 copy_new_d99 (OR_SWORDKEY);
1327 copy_new_d99 (OR_LWORDKEY);
1328 copy_new_d99 (OR_HWORDKEY);
1332 end_of_job (50, SHOW_PROGRESS + SHOW_EXITCODE);
1334 print_progress ("Final");
1336 /* If validation_mode requested, traverse bit vector and print out
1337 * table of each d00 record which cannot be accessed from any d99 word.
1338 * If a validation file name was provided, write out a line for each
1339 * bad reecord in alebeniz-compatible format.
1341 if (validation_mode) {
1342 for (x = 0, bvptr = bit_vector; x < max_bitvec; x++, bvptr++) {
1343 for (j = 0; j < 8; j += 4) { /* j = 0 or 4, amount of
1345 /* a = bits #1 and #2 of current nibble */
1346 a = 0x30 & (*bvptr << j);
1348 /* if dba is in d00 but not in d99... */
1349 if (a & 0x10 && !(a & 0x20)) {
1350 /* ...construct valid vista dba */
1353 dbaorig++; /* slot number */
1354 /*** dba = dbaorig | (OR_D00 << 24); ***//* r
1357 /* now efim retranslates back to real dba */
1358 dba = ((dbaorig + 1) * recslots - dba_offset)
1361 /* ...print out err msg */
1362 CRSET (PROGNAME "734", &dba, 0);
1363 CRREAD (PROGNAME "735", OR_OBJKEY, readbuf, 0);
1365 catgets(dtsearch_catd, MS_dtsrclean, 444,
1366 "*** %s d00 record '%s' is not referenced in d99.\n"
1367 " DBA = %d:%ld (x%02x:%06lx).\n") ,
1368 PROGNAME"444", readbuf, OR_D00,
1369 (long)dba, OR_D00, (long)dba);
1371 /*...if albeniz compatible output requested, do it */
1373 fprintf (frecids, DISCARD_FORMAT, arg_dbname,
1374 readbuf, "MrClean", datestr);
1378 if (max_corruption > 0L &&
1379 corruption_count >= max_corruption)
1380 end_of_job (94, SHOW_EXITCODE);
1381 } /* endif where d00 is not referenced by d99 */
1382 } /* end forloop: every 2 bits in a bitvector byte */
1383 } /* end forloop: every byte in bitvector */
1386 /* Normal_exitcode currently will contain either a 0 or a 1.
1387 * If we were uncorrupting the d99 and found any corrupt links,
1388 * make sure it's 1 (warning). If there were corrupt links and
1389 * we weren't trying to uncorrupt it, change it to a hard error.
1391 /***by the way, corruption_count can be > 0 only if in validation_mode.**/
1392 if (corruption_count > 0L) {
1393 if (validation_mode)
1394 normal_exitcode = 1;
1396 normal_exitcode = 90;
1398 end_of_job (normal_exitcode, SHOW_EXITCODE);
1401 /*************************** DTSRCLEAN.C ****************************/