2 * Keyfinder - finds crypto keys, encrypted data and compressed data in files
3 * by analyzing the entropy of parts of the file.
5 * (c) 2005 by van Hauser / THC <vh@thc.org> www.thc.org
6 * The GPL 2.0 applies to this code.
8 * Based on the paper "Playing hide and seek with stored keys" by Shamir and
9 * van Someren. www.ncipher.com/products/files/papers/anguilla/keyhide2.pdf
11 * In my experiments I went however a different route to identify keys which
12 * seems to be better when identifying keys.
13 * The paper evaluates 60 byte chunks on their entropy, and depending on the
14 * number of consecutive chunks with high entropies, this could be the key.
15 * This tool evalutes the full key size for the entropy, increasing by an
16 * approx 10% of the key size windows. Hence if the key is 1024 bit = 128 byte
17 * long, the window size is 10, and the file size is 500 bytes, it looks at
18 * the randomness from bytes 0-128, then 10-138, next 20-148 etc.
19 * Additionally to measuring the entropy, I added checking for the
20 * arithmetical mean, and detecting couting bytes up- and downwards in the
21 * beginning, middle or end of the file.
22 * By having three randomness checks and evaluating the full key size with a
23 * sliding window, the best keyfinding measures are in place, and much better
24 * than in the described paper.
26 * However still beware: you will 1) receive some false positives, and 2)
27 * Keyfinder can not find the exact start/end region of the key, it will
28 * usually be some bytes before or after the reported file area.
30 * For usage hints, type "keyfinder -h"
32 * To compile: gcc -o keyfinder keyfinder.c -lm
37 #include <sys/types.h>
46 #define MINIMUM_RANDOMNESS 85
48 #define WINDOW_SIZE 10
51 int minimal_randomness = MINIMUM_RANDOMNESS;
58 printf("Keyfinder v1.0 (c) 2005 by van Hauser / THC <vh@thc.org> www.thc.org\n");
59 printf("\nSyntax: %s [-k KEY_SIZE] [-w WINDOW_SIZE] [-r MINIMUM_RANDOMNESS] FILE\n", prg);
60 printf("\nOptions:\n");
61 printf(" -k KEY_SIZE Key size to look for (default: %d byte [%d bit])\n", KEY_SIZE, KEY_SIZE * 8);
62 printf(" -w WINDOW_SIZE Window size to check (default: %d byte)\n", WINDOW_SIZE);
63 printf(" -r MINIMUM_RANDOMNESS Minimum %% of randomness for keys (default: %d%%)\n", MINIMUM_RANDOMNESS);
64 printf(" -d Print debug output\n");
65 printf("\nFinds binary crypto keys, crypto data and compressed data in files.\n");
66 printf("The result is an indicator where the key could be, not a byte exact match.\n");
67 printf("The randomness is calculated by the entropy, the arithmetic mean value and a\n");
68 printf("counting check. Read more information in the header of the keyfinder.c file.\n");
69 printf("Note: If -k is specified but not -w, -w will be 10%% of -k.\n");
70 printf("Hints: (1) the smaller -k, the smaller should be -r\n");
71 printf(" (2) the smaller -r the more false positives\n");
72 printf(" (3) -w should be 1/8 to 1/20 of -k\n");
73 printf(" (4) -k values are 128/256/512 byte for RSA/asymmetric keys\n");
74 printf(" (5) -k 512 -> -r 95; -k 128 -> -r 85 \n");
78 /* Why is log2() in libm not working?? what a fucking #!+~$$!! */
79 #define log2of10 3.32192809488736234787
80 static double log2_(double x) {
81 return (log2of10 * (log10(x)));
84 void calculate_randomness(unsigned char *buf, int buflen) {
88 unsigned long ccount[256];
92 for (i = 0; i < 256; i++)
95 for (i = 0; i < buflen; i++)
98 for (i = 0; i < 256; i++) {
99 prob[i] = (double) ccount[i] / buflen;
100 datasum += ((double) i) * ccount[i]; /**/
103 for (i = 0; i < 256; i++) {
105 ent += prob[i] * log2_((1.0 / prob[i]));
106 // printf("%f += %f * %f\n", ent, prob[i], log2_((1.0 / prob[i])));
110 mean = datasum / buflen; /**/
111 ext_mean = (mean - 127.5) / 1.275;
113 ext_mean = ext_mean * -1;
114 ext_mean = 100 - ext_mean;
116 ext_entropy = (ent * 100) / 8;
119 printf("Entropy: %f bits (8 is totally random)\n", ent);
120 printf("Mean: %1.4f (127.5 is totally random)\n", mean);
123 if (ext_entropy + ext_mean >= minimal_randomness) {
124 /* check for counting in the beginning */
125 for (i = 0; i < 8 && j == 0; i++)
126 if (buf[i] + 1 != buf[i + 1])
132 for (i = 0; i < 8 && j == 0; i++)
133 if (buf[i] - 1 != buf[i++ + 1])
140 /* check for counting in the middle */
141 for (i = 0; i < 8 && j == 0; i++)
142 if (buf[((buflen/2) - i) - 4] != buf[((buflen/2) - i) - 3] + 1)
148 for (i = 0; i < 8 && j == 0; i++)
149 if (buf[((buflen/2) - i) - 4] != buf[((buflen/2) - i) - 3] - 1)
156 /* check for counting in the end */
157 for (i = 1; i <= 8 && j == 0; i++)
158 if (buf[buflen - i] != buf[(buflen - i) - 1] + 1)
164 for (i = 1; i <= 8 && j == 0; i++)
165 if (buf[buflen - i] != buf[(buflen - i) - 1] - 1)
174 printf("Counting detected, false positive, ignoring...\n");
181 void dump_asciihex(unsigned char *string, int length, unsigned int offset) {
182 unsigned char *p = (unsigned char *) string;
183 unsigned char lastrow_data[16];
184 unsigned int rows = length / DUMP_ROWS;
185 unsigned int lastrow = length % DUMP_ROWS;
188 for (i = 0; i < rows; i++) {
189 printf("%08hx: ", i * 16 + offset);
190 for (j = 0; j < DUMP_ROWS; j++) {
191 printf("%02x", p[(i * 16) + j]);
196 for (j = 0; j < DUMP_ROWS; j++) {
197 if (isprint(p[(i * 16) + j]))
198 printf("%c", p[(i * 16) + j]);
205 memset(lastrow_data, 0, sizeof(lastrow_data));
206 memcpy(lastrow_data, p + length - lastrow, lastrow);
207 printf("%08hx: ", i * 16 + offset);
208 for (j = 0; j < lastrow; j++) {
209 printf("%02x", p[(i * 16) + j]);
213 while(j < DUMP_ROWS) {
220 for (j = 0; j < lastrow; j++) {
221 if (isprint(p[(i * 16) + j]))
222 printf("%c", p[(i * 16) + j]);
226 while(j < DUMP_ROWS) {
234 void dump_found(char *buf, int key_size, unsigned int block_count, int entropy, int mean) {
235 printf("Found at block %u (Entropy is %d%% | Mean Deviation is %d%% = %d%%):\n", block_count * 64, entropy, mean, (entropy + mean) / 2);
236 dump_asciihex(buf, key_size, block_count * 64);
240 int main(int argc, char *argv[]) {
241 int key_size = KEY_SIZE;
248 unsigned int block_count = 0;
252 if (argc < 2 || strcmp(argv[1], "-h") == 0 || strncmp(argv[1], "--h", 3) == 0)
255 while ((i = getopt(argc, argv, "dw:r:k:")) >= 0) {
261 window_size = atoi(optarg);
264 minimal_randomness = atoi(optarg);
267 key_size = atoi(optarg);
274 if (key_size != KEY_SIZE) {
275 if (window_size == 0)
276 window_size = (key_size / 10) - 1;
278 window_size = WINDOW_SIZE;
280 if (key_size < 20 || key_size > 65535 || window_size < 1 || window_size >= key_size || minimal_randomness < 1 || minimal_randomness > 99) {
281 fprintf(stderr, "Error: Wrong Values! Limits: 20 < key_size < 65535; 1 < window_size < key_size; 1 < minimal_randomness < 100\n");
285 if (key_size < window_size * 8)
286 fprintf(stderr, "Warning: The window size is too large, -w should be 1/8 to 1/16 of -k\n");
288 if (optind + 1 != argc)
293 if ((f = fopen(fn, "r")) == NULL) {
294 fprintf(stderr, "Error: Can not open file %s\n", fn);
298 if ((buf = malloc(key_size + window_size)) == NULL) {
299 fprintf(stderr, "Error: malloc() failed\n");
302 memset(buf, 0, key_size + window_size);
304 printf("Analyzing %s:\n", fn);
306 printf("[Key Size: %d byte/%d bit, Window Size: %d byte, Minimal Randomness: %d%%]\n", key_size, key_size * 8, window_size, minimal_randomness);
308 minimal_randomness = minimal_randomness * 2;
310 if ((reading = fread(buf, 1, key_size, f)) > 0) {
311 calculate_randomness(buf, reading);
312 if ((ext_entropy + ext_mean) >= minimal_randomness && reading == key_size)
313 dump_found(buf, key_size, block_count, ext_entropy, ext_mean);
314 if (reading == key_size)
315 reading = window_size;
316 while (!feof(f) && reading == window_size) {
317 if ((reading = fread(buf + key_size, 1, window_size, f)) > 0) {
319 memmove(buf, buf + reading, key_size);
320 calculate_randomness(buf, key_size);
321 if ((ext_entropy + ext_mean) >= minimal_randomness)
322 dump_found(buf, key_size, block_count, ext_entropy, ext_mean);