/* @(#)bias.c 1.5 (QUALCOMM) 03/04/04 */ /* This is an expanded version of "hwt" that handles larger input data * sets, is more efficient, and does thing byte- or word- parallel * if requested. */ /* https://opensource.qualcomm.com/assets/tgz/ */ #include #include #include #include #include "tab.h" #define MAXBLOCK 32 /* bytes in largest block */ int blocksize = 1; int blockbytes = 1; /* number of bytes per block */ int verbose = 0; /* report statistics no matter what */ int reported = 0; /* reported something, so give overall report */ char *myname; /* being balanced isn't interesting until you have lots of bits; * P(exactly 5000 out of 10000) ~= .008). * Similarly, the exact proportions aren't interesting past that point. */ #define BAL 10000 struct { double limit; /* double ended test probability */ int report; char *message; } sigtable[] = { { 6, 3, "certainly non-uniform (>6sigma, z = %g)\n" }, { 3.2908, 2, "almost certainly non-uniform (>99.9%%)\n" }, { 2.5762, 2, "likely non-uniform (>99%%)\n" }, { 1.9604, 1, "might be non-uniform (>95%%)\n" }, { -1, 0, "not statistically significant\n" }, }; /* accumulate counts of bytes in a double array, to avoid overflow */ double C[MAXBLOCK][256]; double chisq; void printbit(double nbits, double nones, int bitnum) { double p, z; int i, balanced; static int nbitsprinted = 0; balanced = (nbits == (nones * 2)); p = (double)nones / nbits; z = 2.0 * sqrt((double)nbits) * (p - 0.5); chisq += z*z; /* find category */ for (i = 0; sigtable[i].report; ++i) if (abs(z) > sigtable[i].limit) break; if (verbose || (balanced && nbits > BAL) || blocksize == 1 || sigtable[i].report > 1 ) { /* we're going to print something */ /* if we aren't printing nbits on each line, print it once */ if (!nbitsprinted && nbits > BAL) { printf("%.20g samples\n", nbits); nbitsprinted = 1; } if (blocksize != 1) printf("bit %3d: ", bitnum); if (nbits <= BAL) printf("%.20g/%.20g ", nones, nbits); printf(".5%+7f ", 0.5-p); if (balanced && (nbits > BAL || verbose)) printf("balanced\n"); else if (sigtable[i].report > 2) printf(sigtable[i].message, z); else printf(sigtable[i].message); } } /* chi squared threshold tables for N <= 31 */ #define NCHI 31 float chi95[NCHI] = { 3.84146, 5.99146, 7.81473, 9.48773, 11.0705, 12.5916, 14.0671, 15.5073, 16.919, 18.307, 19.6751, 21.0261, 22.362, 23.6848, 24.9958, 26.2962, 27.5871, 28.8693, 30.1435, 31.4104, 32.6706, 33.9244, 35.1725, 36.415, 37.6525, 38.8851, 40.1133, 41.3371, 42.557, 43.773, 44.9853 }; float chi99[NCHI] = { 6.6349, 9.21034, 11.3449, 13.2767, 15.0863, 16.8119, 18.4753, 20.0902, 21.666, 23.2093, 24.725, 26.217, 27.6882, 29.1412, 30.5779, 31.9999, 33.4087, 34.8053, 36.1909, 37.5662, 38.9322, 40.2894, 41.6384, 42.9798, 44.3141, 45.6417, 46.9629, 48.2782, 49.5879, 50.8922, 52.1914 }; #define NORM99 2.32635 #define NORM95 1.64485 void dochisq(void) { double t1, t5; if (blocksize - 1 <= NCHI) { t1 = chi99[blocksize-2]; t5 = chi95[blocksize-2]; } else { t1 = NORM99 + sqrt(2*blocksize - 2); t1 = t1 * t1 / 2; t5 = NORM95 + sqrt(2*blocksize - 2); t5 = t5 * t5 / 2; } if (t1 < chisq) { printf("Overall Chi Squared value is %.7g (threshold %.7g)\n", chisq, t1); printf("Overall likely non-uniform (>99%%)\n"); } else if (t5 < chisq) printf("Overall might be non-uniform (>95%%)\n"); else if (verbose || reported) printf("Overall not statistically significant\n"); } int main(int ac, char **av) { register int i; register int c; register double nbytes = 0, nones = 0; register int b; long l; char *endptr; myname = av[0]; if (ac >= 2 && strcmp(av[1], "-v") == 0) { ++verbose; --ac, ++av; } if (ac == 2) { l = strtol(av[1], &endptr, 0); if (*endptr != '\0') { fprintf(stderr, "'%s' was expected to be number of bytes per block\n", av[1]); goto usage; } if (l < 1 || MAXBLOCK < l) { fprintf(stderr, "'%s' must be in range 1..%d\n", av[1], MAXBLOCK); goto usage; } blockbytes = l; blocksize = 8*blockbytes; ++av, --ac; } if (ac != 1) { usage: fprintf(stderr, "usage: %s [-v] [bytes-per-block]\n", myname); return 1; } /* read in all the available bytes */ b = blockbytes; while ((c = getchar()) != EOF) { ++nbytes; ++C[--b][c]; if (b == 0) b = blockbytes; } /* warn if funny sized file */ if (nbytes < blockbytes) { fprintf(stderr, "%s: Error: less than one block of data\n", myname); return 1; } if (b != blockbytes) fprintf(stderr, "%s: Warning: bytes not multiple of block size (%.20g)\n", myname, nbytes); nbytes /= blockbytes; /* now it's really nblocks, but who cares? */ /* handle the simple hamming weight case */ if (blocksize == 1) { for (nones = i = 0; i < 256; ++i) nones += C[0][i] * Popcount[i]; printbit(nbytes*8, nones, 0); return 0; } /* handle the multi-bit-block cases */ for (b = 0; b < blocksize; ++b) { for (nones = i = 0; i < 256; ++i) { if (i & (1 << (b & 7))) nones += C[b >> 3][i]; } printbit(nbytes, nones, b); } dochisq(); return 0; }