// cssutil.c - utility for munging css files, version X0.1 // Copyright 2001-2004 William S. Yerazunis, all rights reserved. // // This software is licensed to the public under the Free Software // Foundation's GNU GPL, version 2.0. You may obtain a copy of the // GPL by visiting the Free Software Foundations web site at // www.fsf.org . Other licenses may be negotiated; contact the // author for details. // // include some standard files #include "crm114_sysincludes.h" // include any local crm114 configuration file #include "crm114_config.h" // include the crm114 data structures file #include "crm114_structs.h" // and include the routine declarations file #include "crm114.h" int main (int argc, char **argv) { long i,j,k; // some random counters, when we need a loop long hfsize, hfsize1, hfsize2; int hfd1, hfd2; long f1, f2; long sim, diff, dom1, dom2, hclash, kclash; { struct stat statbuf; // filestat buffer FEATUREBUCKET_TYPE *h1, *h2; // the text of the hash file // filename is argv [1] // and stat it to get it's length if(!argv[1] || !argv[2]) { fprintf (stdout, "Usage: cssdiff \n"); return (EXIT_SUCCESS); }; // quick check- does the first file even exist? k = stat (argv[1], &statbuf); if (k != 0) { fprintf (stderr, "\n CSS file '%s' not found. \n", argv[1]); exit (EXIT_FAILURE); }; // hfsize = statbuf.st_size; // mmap the hash file into memory so we can bitwhack it hfd1 = open (argv[1], O_RDONLY); if (hfd1 < 0) { fprintf (stderr, "\n Cannot read file %s\n", argv[1]); exit (EXIT_FAILURE); }; h1 = (FEATUREBUCKET_TYPE *) mmap (NULL, hfsize, PROT_READ, MAP_PRIVATE, hfd1, 0); if (h1 == MAP_FAILED) { fprintf (stderr, "\n MMAP failed on file %s\n", argv[1]); exit (EXIT_FAILURE); }; hfsize1 = statbuf.st_size / sizeof (FEATUREBUCKET_TYPE); // // and repeat the process for the second file: k = stat (argv[2], &statbuf); // quick check- does the file even exist? if (k != 0) { fprintf (stderr, "\n.CSS file '%s' not found.\n", argv[2]); exit (EXIT_FAILURE); }; hfsize2 = statbuf.st_size; // mmap the hash file into memory so we can bitwhack it hfd2 = open (argv[2], O_RDONLY); if (hfd2 < 0) { fprintf (stderr, "\n Cannot read file %s\n", argv[2]); exit (EXIT_FAILURE); }; h2 = (FEATUREBUCKET_TYPE *) mmap (NULL, hfsize2, PROT_READ, MAP_PRIVATE, hfd2, 0); if (h2 == MAP_FAILED) { fprintf (stderr, "\n MMAP failed on file %s\n", argv[2]); exit (EXIT_FAILURE); }; hfsize2 = hfsize2 / sizeof (FEATUREBUCKET_TYPE); fprintf (stderr, "Sparse spectra file %s has %ld bins total\n", argv[1], hfsize1); fprintf (stdout, "Sparse spectra file %s has %ld bins total\n", argv[2], hfsize2); // // if (hfsize1 != hfsize2) { fprintf (stderr, "\n.CSS files %s, %s :\n lengths differ: %ld vs %ld.\n", argv[1],argv[2], hfsize1, hfsize2); fprintf (stderr, "\n This is not a fatal error, but be warned.\n"); }; f1 = 0; f2 = 0; sim = 0; diff = 0; dom1 = 0; dom2 = 0; hclash = 0; kclash = 0; // // The algorithm - for each file, // for each bucket in each file // find corresponding bucket in other file // increment dom1 or dom2 as appropriate // always increment sim and diff // end // end // divide sim and diff by 2, as they are doublecounted // print statistics and exit. // // start at 1 - no need to check bin 0 (version). for ( i = 1; i < hfsize1; i++) { if ( h1[i].key != 0 ) { f1 += h1[i].value; k = h1[i].hash % hfsize2; if (k == 0) k = 1; while (h2[k].value != 0 && (h2[k].hash != h1[i].hash || h2[k].key != h1[i].key)) { k++; if (k >= hfsize2) k = 1; }; // Now we've found the corresponding (or vacant) slot in // h2. Do our tallies... j = h1[i].value ; if (j > h2[k].value ) j = h2[k].value; sim += j; j = h1[i].value - h2[k].value; if (j < 0) j = -j; diff += j; j = h1[i].value - h2[k].value; if (j < 0) j = 0; dom1 += j; }; }; // // And repeat for file 2. for ( i = 1; i < hfsize2; i++) { if ( h2[i].key != 0 ) { f2 += h2[i].value; k = h2[i].hash % hfsize1; if (k == 0) k = 1; while (h1[k].value != 0 && (h1[k].hash != h2[i].hash || h1[k].key != h2[i].key)) { k++; if (k >= hfsize1) k = 1; }; // Now we've found the corresponding (or vacant) slot in // h1. Do our tallies... j = h2[i].value ; if (j > h1[k].value ) j = h1[k].value; sim += j; j = h1[k].value - h2[i].value; if (j < 0) j = -j; diff += j; j = h2[i].value - h1[k].value; if (j < 0) j = 0; dom2 += j; }; }; fprintf (stdout, "\n File 1 total features : %12ld", f1); fprintf (stdout, "\n File 2 total features : %12ld\n", f2); fprintf (stdout, "\n Similarities between files : %12ld", sim/2); fprintf (stdout, "\n Differences between files : %12ld\n", diff/2); fprintf (stdout, "\n File 1 dominates file 2 : %12ld", dom1); fprintf (stdout, "\n File 2 dominates file 1 : %12ld\n", dom2); } return 0; }