os161-1.99
 All Data Structures
psort.c
00001 /*
00002  * Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
00003  *      The President and Fellows of Harvard College.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  * 1. Redistributions of source code must retain the above copyright
00009  *    notice, this list of conditions and the following disclaimer.
00010  * 2. Redistributions in binary form must reproduce the above copyright
00011  *    notice, this list of conditions and the following disclaimer in the
00012  *    documentation and/or other materials provided with the distribution.
00013  * 3. Neither the name of the University nor the names of its contributors
00014  *    may be used to endorse or promote products derived from this software
00015  *    without specific prior written permission.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 /*
00031  * psort - parallel sort.
00032  *
00033  * This is loosely based on some real parallel sort benchmarks, but
00034  * because of various limitations of OS/161 it is massively
00035  * inefficient. But that's ok; the goal is to stress the VM and buffer
00036  * cache.
00037  */
00038 
00039 #include <sys/types.h>
00040 #include <sys/stat.h>
00041 #include <sys/wait.h>
00042 #include <stdio.h>
00043 #include <stdarg.h>
00044 #include <stdlib.h>
00045 #include <string.h>
00046 #include <assert.h>
00047 #include <unistd.h>
00048 #include <fcntl.h>
00049 #include <errno.h>
00050 
00051 #ifndef RANDOM_MAX
00052 /* Note: this is correct for OS/161 but not for some Unix C libraries */
00053 #define RANDOM_MAX RAND_MAX
00054 #endif
00055 
00056 #define PATH_KEYS    "sortkeys"
00057 #define PATH_SORTED  "output"
00058 #define PATH_TESTDIR "psortdir"
00059 #define PATH_RANDOM  "rand:"
00060 
00061 #define WORKNUM      (128*1024)
00062 
00063 
00064 static int workspace[WORKNUM];
00065 
00066 static const char *progname;
00067 
00068 static int numprocs = 4;
00069 static int numkeys = 10000;
00070 static long randomseed = 15432753;
00071 
00072 static off_t correctsize;
00073 static unsigned long checksum;
00074 
00075 #define NOBODY (-1)
00076 static int me = NOBODY;
00077 
00078 ////////////////////////////////////////////////////////////
00079 
00080 static
00081 void
00082 sortints(int *v, int num)
00083 {
00084         int pivotval, pivotpoint, pivotcount;
00085         int frontpos, readpos, endpos, i, j;
00086         int tmp;
00087 
00088         if (num < 2) {
00089                 return;
00090         }
00091 
00092         pivotpoint = num/2;
00093         pivotval = v[pivotpoint];
00094         pivotcount = 0;
00095 
00096         frontpos = 0;
00097         readpos = 0;
00098         endpos = num;
00099         while (readpos < endpos) {
00100                 if (v[readpos] < pivotval) {
00101                         v[frontpos++] = v[readpos++];
00102                 }
00103                 else if (v[readpos] == pivotval) {
00104                         readpos++;
00105                         pivotcount++;
00106                 }
00107                 else {
00108                         tmp = v[--endpos];
00109                         v[endpos] = v[readpos];
00110                         v[readpos] = tmp;
00111                 }
00112         }
00113         assert(readpos == endpos);
00114         assert(frontpos + pivotcount == readpos);
00115 
00116         for (i=frontpos; i<endpos; i++) {
00117                 v[i] = pivotval;
00118         }
00119 
00120         for (i=endpos, j=num-1; i<j; i++,j--) {
00121                 tmp = v[i];
00122                 v[i] = v[j];
00123                 v[j] = tmp;
00124         }
00125 
00126         sortints(v, frontpos);
00127         sortints(&v[endpos], num-endpos);
00128 }
00129 
00130 ////////////////////////////////////////////////////////////
00131 
00132 static
00133 void
00134 initprogname(const char *av0)
00135 {
00136         if (av0) {
00137                 progname = strrchr(av0, '/');
00138                 if (progname) {
00139                         progname++;
00140                 }
00141                 else {
00142                         progname = av0;
00143                 }
00144         }
00145         else {
00146                 progname = "psort";
00147         }
00148 }
00149 
00150 static
00151 void
00152 vscomplain(char *buf, size_t len, const char *fmt, va_list ap, int err)
00153 {
00154         size_t pos;
00155 
00156         if (me >= 0) {
00157                 snprintf(buf, len, "%s: proc %d: ", progname, me);
00158         }
00159         else {
00160                 snprintf(buf, len, "%s: ", progname);
00161         }
00162         pos = strlen(buf);
00163 
00164         vsnprintf(buf+pos, len-pos, fmt, ap);
00165         pos = strlen(buf);
00166 
00167         if (err >= 0) {
00168                 snprintf(buf+pos, len-pos, ": %s\n", strerror(err));
00169         }
00170         else {
00171                 snprintf(buf+pos, len-pos, "\n");
00172         }
00173 }
00174 
00175 static
00176 void
00177 complainx(const char *fmt, ...)
00178 {
00179   int rc;
00180         char buf[256];
00181         va_list ap;
00182 
00183         va_start(ap, fmt);
00184         vscomplain(buf, sizeof(buf), fmt, ap, -1);
00185         va_end(ap);
00186 
00187         /* Write the message in one go so it's atomic */
00188         rc = write(STDERR_FILENO, buf, strlen(buf));
00189         /* suppress compiler warning about setting but not
00190            using rc */
00191         (void)rc;
00192 }
00193 
00194 static
00195 void
00196 complain(const char *fmt, ...)
00197 {
00198   int rc;
00199         char buf[256];
00200         va_list ap;
00201         int err = errno;
00202 
00203         va_start(ap, fmt);
00204         vscomplain(buf, sizeof(buf), fmt, ap, err);
00205         va_end(ap);
00206 
00207         /* Write the message in one go so it's atomic */
00208         rc = write(STDERR_FILENO, buf, strlen(buf));
00209         /* suppress compiler warning about setting but not
00210            using rc */
00211         (void)rc;
00212 }
00213 
00214 ////////////////////////////////////////////////////////////
00215 
00216 static
00217 int
00218 doopen(const char *path, int flags, int mode)
00219 {
00220         int fd;
00221 
00222         fd = open(path, flags, mode);
00223         if (fd<0) {
00224                 complain("%s", path);
00225                 exit(1);
00226         }
00227         return fd;
00228 }
00229 
00230 static
00231 void
00232 doclose(const char *path, int fd)
00233 {
00234         if (close(fd)) {
00235                 complain("%s: close", path);
00236                 exit(1);
00237         }
00238 }
00239 
00240 static
00241 void
00242 docreate(const char *path)
00243 {
00244         int fd;
00245 
00246         fd = doopen(path, O_WRONLY|O_CREAT|O_TRUNC, 0664);
00247         doclose(path, fd);
00248 }
00249 
00250 static
00251 void
00252 doremove(const char *path)
00253 {
00254         static int noremove;
00255 
00256         if (noremove) {
00257                 return;
00258         }
00259 
00260         if (remove(path) < 0) {
00261                 if (errno == ENOSYS) {
00262                         /* Complain (and try) only once. */
00263                         noremove = 1;
00264                 }
00265                 complain("%s: remove", path);
00266         }
00267 }
00268 
00269 static
00270 off_t
00271 getsize(const char *path)
00272 {
00273         struct stat buf;
00274         int fd;
00275         static int no_stat, no_fstat;
00276 
00277         if (!no_stat) {
00278                 if (stat(path, &buf) == 0) {
00279                         return buf.st_size;
00280                 }
00281                 if (errno != ENOSYS) {
00282                         complain("%s: stat", path);
00283                         exit(1);
00284                 }
00285                 /* Avoid further "Unknown syscall 81" messages */
00286                 no_stat = 1;
00287         }
00288 
00289         fd = doopen(path, O_RDONLY, 0);
00290         if (!no_fstat) {
00291                 if (fstat(fd, &buf) == 0) {
00292                         close(fd);
00293                         return buf.st_size;
00294                 }
00295                 if (errno != ENOSYS) {
00296                         complain("%s: stat", path);
00297                         exit(1);
00298                 }
00299                 /* Avoid further "Unknown syscall 82" messages */
00300                 no_fstat = 1;
00301         }
00302 
00303         /* otherwise, lseek */
00304         if (lseek(fd, 0, SEEK_END) >= 0) {
00305                 buf.st_size = lseek(fd, 0, SEEK_CUR);
00306                 if (buf.st_size >= 0) {
00307                         return buf.st_size;
00308                 }
00309         }
00310         complain("%s: getting file size with lseek", path);
00311         close(fd);
00312         exit(1);
00313 }
00314 
00315 static
00316 size_t
00317 doread(const char *path, int fd, void *buf, size_t len)
00318 {
00319         int result;
00320 
00321         result = read(fd, buf, len);
00322         if (result < 0) {
00323                 complain("%s: read", path);
00324                 exit(1);
00325         }
00326         return (size_t) result;
00327 }
00328 
00329 static
00330 void
00331 doexactread(const char *path, int fd, void *buf, size_t len)
00332 {
00333         size_t result;
00334 
00335         result = doread(path, fd, buf, len);
00336         if (result != len) {
00337                 complainx("%s: read: short count", path);
00338                 exit(1);
00339         }
00340 }
00341 
00342 static
00343 void
00344 dowrite(const char *path, int fd, const void *buf, size_t len)
00345 {
00346         int result;
00347 
00348         result = write(fd, buf, len);
00349         if (result < 0) {
00350                 complain("%s: write", path);
00351                 exit(1);
00352         }
00353         if ((size_t) result != len) {
00354                 complainx("%s: write: short count", path);
00355                 exit(1);
00356         }
00357 }
00358 
00359 static
00360 void
00361 dolseek(const char *name, int fd, off_t offset, int whence)
00362 {
00363         if (lseek(fd, offset, whence) < 0) {
00364                 complain("%s: lseek", name);
00365                 exit(1);
00366         }
00367 }
00368 
00369 #if 0 /* let's not require subdirs */
00370 static
00371 void
00372 dochdir(const char *path)
00373 {
00374         if (chdir(path) < 0) {
00375                 complain("%s: chdir", path);
00376                 exit(1);
00377         }
00378 }
00379 
00380 static
00381 void
00382 domkdir(const char *path, int mode)
00383 {
00384         if (mkdir(path, mode) < 0) {
00385                 complain("%s: mkdir", path);
00386                 exit(1);
00387         }
00388 }
00389 #endif /* 0 */
00390 
00391 static
00392 pid_t
00393 dofork(void)
00394 {
00395         pid_t pid;
00396 
00397         pid = fork();
00398         if (pid < 0) {
00399                 complain("fork");
00400                 /* but don't exit */
00401         }
00402 
00403         return pid;
00404 }
00405 
00406 ////////////////////////////////////////////////////////////
00407 
00408 static
00409 int
00410 dowait(int guy, pid_t pid)
00411 {
00412         int status, result;
00413 
00414         result = waitpid(pid, &status, 0);
00415         if (result < 0) {
00416                 complain("waitpid");
00417                 return -1;
00418         }
00419         if (WIFSIGNALED(status)) {
00420                 complainx("proc %d: signal %d", guy, WTERMSIG(status));
00421                 return -1;
00422         }
00423         assert(WIFEXITED(status));
00424         status = WEXITSTATUS(status);
00425         if (status) {
00426                 complainx("proc %d: exit %d", guy, status);
00427                 return -1;
00428         }
00429         return 0;
00430 }
00431 
00432 static
00433 void
00434 doforkall(const char *phasename, void (*func)(void))
00435 {
00436         int i, bad = 0;
00437         pid_t pids[numprocs];
00438 
00439         for (i=0; i<numprocs; i++) {
00440                 pids[i] = dofork();
00441                 if (pids[i] < 0) {
00442                         bad = 1;
00443                 }
00444                 else if (pids[i] == 0) {
00445                         /* child */
00446                         me = i;
00447                         func();
00448                         exit(0);
00449                 }
00450         }
00451 
00452         for (i=0; i<numprocs; i++) {
00453                 if (pids[i] > 0 && dowait(i, pids[i])) {
00454                         bad = 1;
00455                 }
00456         }
00457 
00458         if (bad) {
00459                 complainx("%s failed.", phasename);
00460                 exit(1);
00461         }
00462 }
00463 
00464 static
00465 void
00466 seekmyplace(const char *name, int fd)
00467 {
00468         int keys_per, myfirst;
00469         off_t offset;
00470 
00471         keys_per = numkeys / numprocs;
00472         myfirst = me*keys_per;
00473         offset = myfirst * sizeof(int);
00474 
00475         dolseek(name, fd, offset, SEEK_SET);
00476 }
00477 
00478 static
00479 int
00480 getmykeys(void)
00481 {
00482         int keys_per, myfirst, mykeys;
00483 
00484         keys_per = numkeys / numprocs;
00485         myfirst = me*keys_per;
00486         mykeys = (me < numprocs-1) ? keys_per : numkeys - myfirst;
00487 
00488         return mykeys;
00489 }
00490 
00491 ////////////////////////////////////////////////////////////
00492 
00493 static
00494 unsigned long
00495 checksum_file(const char *path)
00496 {
00497         int fd;
00498         char buf[512];
00499         size_t count, i;
00500         unsigned long sum = 0;
00501 
00502         fd = doopen(path, O_RDONLY, 0);
00503 
00504         while ((count = doread(path, fd, buf, sizeof(buf))) > 0) {
00505                 for (i=0; i<count; i++) {
00506                         sum += (unsigned char) buf[i];
00507                 }
00508         }
00509 
00510         doclose(path, fd);
00511 
00512         return sum;
00513 }
00514 
00515 ////////////////////////////////////////////////////////////
00516 
00517 static long *seeds;
00518 
00519 static
00520 void
00521 genkeys_sub(void)
00522 {
00523         int fd, i, mykeys, keys_done, keys_to_do, value;
00524 
00525         fd = doopen(PATH_KEYS, O_WRONLY, 0);
00526 
00527         mykeys = getmykeys();
00528         seekmyplace(PATH_KEYS, fd);
00529 
00530         srandom(seeds[me]);
00531         keys_done = 0;
00532         while (keys_done < mykeys) {
00533                 keys_to_do = mykeys - keys_done;
00534                 if (keys_to_do > WORKNUM) {
00535                         keys_to_do = WORKNUM;
00536                 }
00537 
00538                 for (i=0; i<keys_to_do; i++) {
00539                         value = random();
00540 
00541                         // check bounds of value
00542                         assert(value >= 0);
00543                         assert(value <= RANDOM_MAX);
00544 
00545                         // do not allow the value to be zero or RANDOM_MAX
00546                         while (value == 0 || value == RANDOM_MAX) {
00547                                 value = random();
00548                         }
00549 
00550                         workspace[i] = value;
00551                 }
00552 
00553                 dowrite(PATH_KEYS, fd, workspace, keys_to_do*sizeof(int));
00554                 keys_done += keys_to_do;
00555         }
00556 
00557         doclose(PATH_KEYS, fd);
00558 }
00559 
00560 static
00561 void
00562 genkeys(void)
00563 {
00564         long seedspace[numprocs];
00565         int i;
00566 
00567         /* Create the file. */
00568         docreate(PATH_KEYS);
00569 
00570         /* Generate random seeds for each subprocess. */
00571         srandom(randomseed);
00572         for (i=0; i<numprocs; i++) {
00573                 seedspace[i] = random();
00574         }
00575 
00576         /* Do it. */
00577         seeds = seedspace;
00578         doforkall("Initialization", genkeys_sub);
00579         seeds = NULL;
00580 
00581         /* Cross-check the size of the output. */
00582         if (getsize(PATH_KEYS) != correctsize) {
00583                 complainx("%s: file is wrong size", PATH_KEYS);
00584                 exit(1);
00585         }
00586 
00587         /* Checksum the output. */
00588         checksum = checksum_file(PATH_KEYS);
00589         complainx("Checksum of unsorted keys: %ld", checksum);
00590 }
00591 
00592 ////////////////////////////////////////////////////////////
00593 
00594 static
00595 const char *
00596 binname(int a, int b)
00597 {
00598         static char rv[32];
00599         snprintf(rv, sizeof(rv), "bin-%d-%d", a, b);
00600         return rv;
00601 }
00602 
00603 static
00604 const char *
00605 mergedname(int a)
00606 {
00607         static char rv[32];
00608         snprintf(rv, sizeof(rv), "merged-%d", a);
00609         return rv;
00610 }
00611 
00612 static
00613 void
00614 bin(void)
00615 {
00616         int infd, outfds[numprocs];
00617         const char *name;
00618         int i, mykeys, keys_done, keys_to_do;
00619         int key, pivot, binnum;
00620 
00621         infd = doopen(PATH_KEYS, O_RDONLY, 0);
00622 
00623         mykeys = getmykeys();
00624         seekmyplace(PATH_KEYS, infd);
00625 
00626         for (i=0; i<numprocs; i++) {
00627                 name = binname(me, i);
00628                 outfds[i] = doopen(name, O_WRONLY|O_CREAT|O_TRUNC, 0664);
00629         }
00630 
00631         pivot = (RANDOM_MAX / numprocs);
00632 
00633         keys_done = 0;
00634         while (keys_done < mykeys) {
00635                 keys_to_do = mykeys - keys_done;
00636                 if (keys_to_do > WORKNUM) {
00637                         keys_to_do = WORKNUM;
00638                 }
00639 
00640                 doexactread(PATH_KEYS, infd, workspace,
00641                             keys_to_do * sizeof(int));
00642 
00643                 for (i=0; i<keys_to_do; i++) {
00644                         key = workspace[i];
00645 
00646                         binnum = key / pivot;
00647                         if (key <= 0) {
00648                                 complainx("proc %d: garbage key %d", me, key);
00649                                 key = 0;
00650                         }
00651                         assert(binnum >= 0);
00652                         assert(binnum < numprocs);
00653                         dowrite("bin", outfds[binnum], &key, sizeof(key));
00654                 }
00655 
00656                 keys_done += keys_to_do;
00657         }
00658         doclose(PATH_KEYS, infd);
00659 
00660         for (i=0; i<numprocs; i++) {
00661                 doclose(binname(me, i), outfds[i]);
00662         }
00663 }
00664 
00665 static
00666 void
00667 sortbins(void)
00668 {
00669         const char *name;
00670         int i, fd;
00671         off_t binsize;
00672 
00673         for (i=0; i<numprocs; i++) {
00674                 name = binname(me, i);
00675                 binsize = getsize(name);
00676                 if (binsize % sizeof(int) != 0) {
00677                         complainx("%s: bin size %ld no good", name,
00678                                   (long) binsize);
00679                         exit(1);
00680                 }
00681                 if (binsize > (off_t) sizeof(workspace)) {
00682                         complainx("proc %d: %s: bin too large", me, name);
00683                         exit(1);
00684                 }
00685 
00686                 fd = doopen(name, O_RDWR, 0);
00687                 doexactread(name, fd, workspace, binsize);
00688 
00689                 sortints(workspace, binsize/sizeof(int));
00690 
00691                 dolseek(name, fd, 0, SEEK_SET);
00692                 dowrite(name, fd, workspace, binsize);
00693                 doclose(name, fd);
00694         }
00695 }
00696 
00697 static
00698 void
00699 mergebins(void)
00700 {
00701         int infds[numprocs], outfd;
00702         int values[numprocs], ready[numprocs];
00703         const char *name, *outname;
00704         int i, result;
00705         int numready, place, val, worknum;
00706 
00707         outname = mergedname(me);
00708         outfd = doopen(outname, O_WRONLY|O_CREAT|O_TRUNC, 0664);
00709 
00710         for (i=0; i<numprocs; i++) {
00711                 name = binname(i, me);
00712                 infds[i] = doopen(name, O_RDONLY, 0);
00713                 values[i] = 0;
00714                 ready[i] = 0;
00715         }
00716 
00717         worknum = 0;
00718 
00719         while (1) {
00720                 numready = 0;
00721                 for (i=0; i<numprocs; i++) {
00722                         if (infds[i] < 0) {
00723                                 continue;
00724                         }
00725 
00726                         if (!ready[i]) {
00727                                 result = doread("bin", infds[i], 
00728                                                 &val, sizeof(int));
00729                                 if (result == 0) {
00730                                         doclose("bin", infds[i]);
00731                                         infds[i] = -1;
00732                                         continue;
00733                                 }
00734                                 if ((size_t) result != sizeof(int)) {
00735                                         complainx("%s: read: short count",
00736                                                   binname(i, me));
00737                                         exit(1);
00738                                 }
00739                                 values[i] = val;
00740                                 ready[i] = 1;
00741                         }
00742                         numready++;
00743                 }
00744                 if (numready == 0) {
00745                         break;
00746                 }
00747 
00748                 /* find the smallest */
00749                 place = -1;
00750                 for (i=0; i<numprocs; i++) {
00751                         if (!ready[i]) {
00752                                 continue;
00753                         }
00754                         if (place < 0 || values[i] < val) {
00755                                 val = values[i];
00756                                 place = i;
00757                         }
00758                 }
00759                 assert(place >= 0);
00760 
00761                 workspace[worknum++] = val;
00762                 if (worknum >= WORKNUM) {
00763                         assert(worknum == WORKNUM);
00764                         dowrite(outname, outfd, workspace,
00765                                 worknum * sizeof(int));
00766                         worknum = 0;
00767                 }
00768                 ready[place] = 0;
00769         }
00770 
00771         dowrite(outname, outfd, workspace, worknum * sizeof(int));
00772         doclose(outname, outfd);
00773 
00774         for (i=0; i<numprocs; i++) {
00775                 assert(infds[i] < 0);
00776         }
00777 }
00778 
00779 static
00780 void
00781 assemble(void)
00782 {
00783         off_t mypos;
00784         int i, fd;
00785         const char *args[3];
00786 
00787         mypos = 0;
00788         for (i=0; i<me; i++) {
00789                 mypos += getsize(mergedname(i));
00790         }
00791 
00792         fd = doopen(PATH_SORTED, O_WRONLY, 0);
00793         dolseek(PATH_SORTED, fd, mypos, SEEK_SET);
00794 
00795         if (dup2(fd, STDOUT_FILENO) < 0) {
00796                 complain("dup2");
00797                 exit(1);
00798         }
00799 
00800         doclose(PATH_SORTED, fd);
00801 
00802         args[0] = "cat";
00803         args[1] = mergedname(me);
00804         args[2] = NULL;
00805         execv("/bin/cat", (char **) args);
00806         complain("/bin/cat: exec");
00807         exit(1);
00808 }
00809 
00810 static
00811 void
00812 checksize_bins(void)
00813 {
00814         off_t totsize;
00815         int i, j;
00816 
00817         totsize = 0;
00818         for (i=0; i<numprocs; i++) {
00819                 for (j=0; j<numprocs; j++) {
00820                         totsize += getsize(binname(i, j));
00821                 }
00822         }
00823         if (totsize != correctsize) {
00824                 complain("Sum of bin sizes is wrong (%ld, should be %ld)",
00825                          (long) totsize, (long) correctsize);
00826                 exit(1);
00827         }
00828 }
00829 
00830 static
00831 void
00832 checksize_merge(void)
00833 {
00834         off_t totsize;
00835         int i;
00836 
00837         totsize = 0;
00838         for (i=0; i<numprocs; i++) {
00839                 totsize += getsize(mergedname(i));
00840         }
00841         if (totsize != correctsize) {
00842                 complain("Sum of merged sizes is wrong (%ld, should be %ld)",
00843                          (long) totsize, (long) correctsize);
00844                 exit(1);
00845         }
00846 }
00847 
00848 static
00849 void
00850 sort(void)
00851 {
00852         unsigned long sortedsum;
00853         int i, j;
00854 
00855         /* Step 1. Toss into bins. */
00856         doforkall("Tossing", bin);
00857         checksize_bins();
00858         complainx("Done tossing into bins.");
00859 
00860         /* Step 2: Sort the bins. */
00861         doforkall("Sorting", sortbins);
00862         checksize_bins();
00863         complainx("Done sorting the bins.");
00864 
00865         /* Step 3: Merge corresponding bins. */
00866         doforkall("Merging", mergebins);
00867         checksize_merge();
00868         complainx("Done merging the bins.");
00869 
00870         /* Step 3a: delete the bins */
00871         for (i=0; i<numprocs; i++) {
00872                 for (j=0; j<numprocs; j++) {
00873                         doremove(binname(i, j));
00874                 }
00875         }
00876 
00877         /* Step 4: assemble output file */
00878         docreate(PATH_SORTED);
00879         doforkall("Final assembly", assemble);
00880         if (getsize(PATH_SORTED) != correctsize) {
00881                 complainx("%s: file is wrong size", PATH_SORTED);
00882                 exit(1);
00883         }
00884 
00885         /* Step 4a: delete the merged bins */
00886         for (i=0; i<numprocs; i++) {
00887                 doremove(mergedname(i));
00888         }
00889 
00890         /* Step 5: Checksum the result. */
00891         sortedsum = checksum_file(PATH_SORTED);
00892         complainx("Checksum of sorted keys: %ld", sortedsum);
00893 
00894         if (sortedsum != checksum) {
00895                 complainx("Sums do not match");
00896                 exit(1);
00897         }
00898 }
00899 
00900 ////////////////////////////////////////////////////////////
00901 
00902 static
00903 const char *
00904 validname(int a)
00905 {
00906         static char rv[32];
00907         snprintf(rv, sizeof(rv), "valid-%d", a);
00908         return rv;
00909 }
00910 
00911 static
00912 void
00913 checksize_valid(void)
00914 {
00915         off_t totvsize, correctvsize;
00916         int i;
00917 
00918         correctvsize = (off_t) numprocs*2*sizeof(int);
00919 
00920         totvsize = 0;
00921         for (i=0; i<numprocs; i++) {
00922                 totvsize += getsize(validname(i));
00923         }
00924         if (totvsize != correctvsize) {
00925                 complainx("Sum of validation sizes is wrong "
00926                           "(%ld, should be %ld)",
00927                           (long) totvsize, (long) correctvsize);
00928                 exit(1);
00929         }
00930 }
00931 
00932 static
00933 void
00934 dovalidate(void)
00935 {
00936         const char *name;
00937         int fd, i, mykeys, keys_done, keys_to_do;
00938         int key, smallest, largest;
00939 
00940         name = PATH_SORTED;
00941         fd = doopen(name, O_RDONLY, 0);
00942 
00943         mykeys = getmykeys();
00944         seekmyplace(name, fd);
00945 
00946         smallest = RANDOM_MAX;
00947         largest = 0;
00948 
00949         keys_done = 0;
00950         while (keys_done < mykeys) {
00951                 keys_to_do = mykeys - keys_done;
00952                 if (keys_to_do > WORKNUM) {
00953                         keys_to_do = WORKNUM;
00954                 }
00955 
00956                 doexactread(name, fd, workspace, keys_to_do * sizeof(int));
00957 
00958                 for (i=0; i<keys_to_do; i++) {
00959                         key = workspace[i];
00960 
00961                         if (key < 0) {
00962                                 complain("%s: found negative key", name);
00963                                 exit(1);
00964                         }
00965                         if (key == 0) {
00966                                 complain("%s: found zero key", name);
00967                                 exit(1);
00968                         }
00969                         if (key >= RANDOM_MAX) {
00970                                 complain("%s: found too-large key", name);
00971                                 exit(1);
00972                         }
00973 
00974                         if (key < smallest) {
00975                                 smallest = key;
00976                         }
00977                         if (key > largest) {
00978                                 largest = key;
00979                         }
00980                 }
00981 
00982                 keys_done += keys_to_do;
00983         }
00984         doclose(name, fd);
00985 
00986         name = validname(me);
00987         fd = doopen(name, O_WRONLY|O_CREAT|O_TRUNC, 0664);
00988         dowrite(name, fd, &smallest, sizeof(smallest));
00989         dowrite(name, fd, &largest, sizeof(largest));
00990         doclose(name, fd);
00991 }
00992 
00993 static
00994 void
00995 validate(void)
00996 {
00997         int smallest, largest, prev_largest;
00998         int i, fd;
00999         const char *name;
01000 
01001         doforkall("Validation", dovalidate);
01002         checksize_valid();
01003 
01004         prev_largest = 1;
01005 
01006         for (i=0; i<numprocs; i++) {
01007                 name = validname(i);
01008                 fd = doopen(name, O_RDONLY, 0);
01009 
01010                 doexactread(name, fd, &smallest, sizeof(int));
01011                 doexactread(name, fd, &largest, sizeof(int));
01012 
01013                 if (smallest < 1) {
01014                         complainx("Validation: block %d: bad SMALLEST", i);
01015                         exit(1);
01016                 }
01017                 if (largest >= RANDOM_MAX) {
01018                         complainx("Validation: block %d: bad LARGEST", i);
01019                         exit(1);
01020                 }
01021                 if (smallest > largest) {
01022                         complainx("Validation: block %d: SMALLEST > LARGEST",
01023                                   i);
01024                         exit(1);
01025                 }
01026 
01027                 if (smallest < prev_largest) {
01028                         complain("Validation: block %d smallest key %d",
01029                                  i, smallest);
01030                         complain("Validation: previous block largest key %d",
01031                                  prev_largest);
01032                         complain("Validation failed");
01033                         exit(1);
01034                 }
01035         }
01036 
01037 
01038         for (i=0; i<numprocs; i++) {
01039                 doremove(validname(i));
01040         }
01041 }
01042 
01043 ////////////////////////////////////////////////////////////
01044 
01045 static
01046 void
01047 setdir(void)
01048 {
01049 #if 0 /* let's not require subdirs */
01050         domkdir(PATH_TESTDIR, 0775);
01051         dochdir(PATH_TESTDIR);
01052 #endif /* 0 */
01053 }
01054 
01055 static
01056 void
01057 unsetdir(void)
01058 {
01059         doremove(PATH_KEYS);
01060         doremove(PATH_SORTED);
01061 #if 0 /* let's not require subdirs */
01062         dochdir("..");
01063 
01064         if (rmdir(PATH_TESTDIR) < 0) {
01065                 complain("%s: rmdir", PATH_TESTDIR);
01066                 /* but don't exit */
01067         }
01068 #endif /* 0 */
01069 }
01070 
01071 ////////////////////////////////////////////////////////////
01072 
01073 static
01074 void
01075 randomize(void)
01076 {
01077         int fd;
01078 
01079         fd = doopen(PATH_RANDOM, O_RDONLY, 0);
01080         doexactread(PATH_RANDOM, fd, &randomseed, sizeof(randomseed));
01081         doclose(PATH_RANDOM, fd);
01082 }
01083 
01084 static
01085 void
01086 usage(void)
01087 {
01088         complain("Usage: %s [-p procs] [-k keys] [-s seed] [-r]", progname);
01089         exit(1);
01090 }
01091 
01092 static
01093 void
01094 doargs(int argc, char *argv[])
01095 {
01096         int i, ch, val, arg;
01097 
01098         for (i=1; i<argc; i++) {
01099                 if (argv[i][0] != '-') {
01100                         usage();
01101                         return;
01102                 }
01103                 ch = argv[i][1];
01104                 switch (ch) {
01105                     case 'p': arg = 1; break;
01106                     case 'k': arg = 1; break;
01107                     case 's': arg = 1; break;
01108                     case 'r': arg = 0; break;
01109                     default: usage(); return;
01110                 }
01111                 if (arg) {
01112                         if (argv[i][2]) {
01113                                 val = atoi(argv[i]+2);
01114                         }
01115                         else {
01116                                 i++;
01117                                 if (!argv[i]) {
01118                                         complain("Option -%c requires an "
01119                                                  "argument", ch);
01120                                         exit(1);
01121                                 }
01122                                 val = atoi(argv[i]);
01123                         }
01124                         switch (ch) {
01125                             case 'p': numprocs = val; break;
01126                             case 'k': numkeys = val; break;
01127                             case 's': randomseed = val; break;
01128                             default: assert(0); break;
01129                         }
01130                 }
01131                 else {
01132                         switch (ch) {
01133                             case 'r': randomize(); break;
01134                             default: assert(0); break;
01135                         }
01136                 }
01137         }
01138 }
01139 
01140 int
01141 main(int argc, char *argv[])
01142 {
01143         initprogname(argc > 0 ? argv[0] : NULL);
01144 
01145         doargs(argc, argv);
01146         correctsize = (off_t) (numkeys*sizeof(int));
01147 
01148         setdir();
01149 
01150         genkeys();
01151         sort();
01152         validate();
01153         complainx("Succeeded.");
01154 
01155         unsetdir();
01156 
01157         return 0;
01158 }
 All Data Structures