nettime's_dusty_archivist on Mon, 20 Mar 2000 07:55:10 +0100 (CET) |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
<nettime> cndecode.c |
/* * cndecode.c * By Matthew Skala */ /* * Utility to decode the Cyber Patrol 4 cyber.not file * Usage: * cndecode cyber.not dictionary suppdict iplist * all arguments except the first are optional * "dictionary" is a list (newline separated) of words for the URL-hash attack * typically /usr/dict/words or equivalent, or use our ready-made one * suppdict is another such list; if it's specified, it will be written to * with the list of actual words found (saves time next run) * iplist is lines of ip address and domain name, tab separated; it will be * written to with any new addresses looked up (if we compile with reverse * DNS enabled). */ /***************************************************************************/ /* System stuff */ /* * Compiling notes: * This was written under Linux on a PC, but should be portable to any 32-bit * little-endian architecture. Since CP4 is PC-specific, that shouldn't be * too much of a limitation. This program does require structures bigger than * 64K, and so it might be touch-and-go on a 16-bit PC compiler. If you * enable reverse DNS with the defines below, you will have to have reasonably * Linux-ish (which in turn means reasonably BSD-ish) networking libraries. * Reverse queries can take a long time. If you find this annoying, you * can set a time limit, and then after that time expires the system will stop * attempting reverse lookups. This is a win because the reverse lookups it * already did are saved; next time, it'll pick up where it left off. Real * Programmers, of course, would run multiple queries at once, but that would * mean either splitting into several processes, multithreading, or "fake" * multithreading with custom-written resolver routines. This way is a lot * less stressful. (I have a perl script that spawns 40 processes to max out * my modem, but it's a monstrosity.) The amount of CPU power required for * CRC reversal scales exponentially with the number of characters of CRC * reversing you choose; that also determines how accurately it'll guess for * URL hashes that are not in the dictionary. It's probably smarter to get a * bigger dictionary. */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include <ctype.h> /* Compile-time configuration */ /* Should we attempt reverse lookups? Requires Net libraries, active * connection, and a certain amount of time. */ #define REVERSE_DNS /* After this many seconds, no more reverse DNS queries will be performed. * Default one hour, and ignored if you turned reverse DNS off. */ #define TIME_LIMIT 3600 /* How many characters of CRC reversal? Five is essentially free, more takes * exponentially longer time. */ #define REVERSE_CRC_LENGTH 8 /* How many entries in the dictionary hash table? */ #define DICTHASH_SIZE 32767 /* How many entries in the IP address hash table? */ #define IPHASH_SIZE 32767 /* What's the longest line length we expect to see? */ #define LINELEN 256 /* Headers we only need if we're doing reverse DNS */ #ifdef REVERSE_DNS #include <netdb.h> #include <sys/socket.h> #include <time.h> #endif /****************************************************************************/ /* Reference tables */ /* Forward CRC polynomial table */ unsigned long crctable[256]={ 0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL, 0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L, 0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L, 0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L, 0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL, 0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L, 0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL, 0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L, 0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L, 0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL, 0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L, 0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L, 0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L, 0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL, 0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L, 0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL, 0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL, 0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L, 0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L, 0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L, 0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL, 0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L, 0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL, 0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L, 0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L, 0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL, 0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L, 0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L, 0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L, 0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL, 0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L, 0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL, 0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL, 0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L, 0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L, 0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L, 0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL, 0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L, 0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL, 0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L, 0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L, 0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL, 0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L, 0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L, 0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L, 0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL, 0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L, 0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL, 0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL, 0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L, 0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L, 0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L, 0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL, 0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L, 0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL, 0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L, 0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L, 0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL, 0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L, 0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L, 0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L, 0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL, 0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L, 0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL }; /* This answers the question: how many freely-chosen bits do I include * when I ask for a crc collision with input length (index)? */ char freebits[12]={0,0,0,0,1,4,10,16,22,28,34,40}; /* This says where each forced bit goes */ char bitsforced[32]={0,1,2,3,4,6, 8,9,10,11,12,14, 16,17,18,19,20,22, 24,25,26,27,28,30, 32,33,34,35,38, 40,41,42}; /* This says where each free bit goes */ char bitsfree[40]={36,43,44,46, 48,49,50,51,52,54, 56,57,58,59,60,62, 64,65,66,67,68,70, 72,73,74,75,76,78, 80,81,82,83,84,86, 88,89,90,91,92,94}; /* The portion of the inverted matrix corresponding to the CRC bits */ unsigned long crcmatrix[32]={ 0x9BF7B4FE,0x10CEBBDB,0x3EC28E73,0xE516F5B2, 0x3EB07172,0xAC6CB91B,0x2344667F,0x25ECE58C, 0xD24109C4,0x501CB10A,0x97761211,0x0A2EF700, 0x0C806D13,0x55AE3901,0x4C147270,0xDAC3C857, 0x384B8A54,0xF7583CAD,0xA1DA1DC4,0x0028BBDC, 0xB5BB7FE3,0x99610C1A,0x1FC446C4,0x8DE0FF05, 0x01D3D128,0x64FAC9B2,0x3BC5E604,0xE564A85C, 0xADEB84A5,0xCFCDBB2B,0x3E7D9F68,0xA102B971 }; /* The portion of the inverted matrix corresponding to the free bits */ unsigned long freematrix[40]={ 0x0CBFC054,0xAEAB35B2,0x315B20B2,0x1F113696, 0x6DA65FB4,0x08F3CFCD,0xC0E8FCF1,0xD928FA77, 0x58C085F6,0x55F7A6A4,0x726948CB,0xBEE706A6, 0xDE9BCF28,0x539FADD8,0xA5D7713D,0xA6B4900F, 0x3CA9547B,0xC98AC9B5,0xAF52FA18,0x60098F5B, 0x142D2C51,0x706AA085,0x46494250,0x54026BCE, 0xEBE4D0A3,0x673646B9,0x945A22D6,0x7C5347FB, 0xC61C9B99,0x97780ADB,0x7E9DB1AE,0x88C43E39, 0x55CEBFB3,0x5C81ADC9,0x0F3DD57C,0x3D44BCF3, 0x0383F8DD,0x73F38757,0xA8F2D5CF,0x2922BEA9 }; /* Matrix columns to take into account the canonicalization */ unsigned long lengthmatrix[12]={ 0x84741063,0xC5273406,0xE5A222DF,0x9941CB2B, 0xD9EBE522,0xCB93A8AF,0x962E3D2D,0x90029144, 0x5B298B04,0x575F1D8A,0x78EE4BEC,0x47B6B86A }; /* The Cyber Patrol blocking categories */ char *category[16]={ "Violence / Profanity", "Partial Nudity", "Full Nudity", "Sexual Acts / Text", "Gross Depictions / Text", "Intolerance", "Satanic or Cult", "Drugs / Drug Culture", "Militant / Extremist", "Sex Education", "Questionable / Illegal & Gambling", "Alcohol & Tobacco", "Reserved 4", "Reserved 3", "Reserved 2", "Reserved 1" }; /* This indicates the "score" for each possible character value. First * 32 entries are for characters 32 to 63, second 32 entries are for * characters 96 to 127. The way this works is that unexpected characters * get higher scores and so are less likely to be chosen... this gives us * that little bit of extra guidance to help find good reverse CRCs. * The baseline is that an ordinary alphabet character is 10 points. * Illegal characters count 50, ensuring that they're unlikely to ever be * chosen. Scores assigned manually, and only semi-systematically. */ unsigned cscore[64]={ /* SP ! " # $ % & ' */ 50,20,30,20,17,50,50,30, /* ( ) * + , - . / */ 20,20,20,17,20,12,12,50, /* 0 1 2 3 4 5 6 7 */ 14,14,15,15,15,15,15,15, /* 8 9 : ; < = > ? */ 15,14,20,20,50,20,50,30, /* ` a b c d e f g */ 30,7,10,10,9,7,10,10, /* h i j k l m n o */ 10,7,12,10,9,10,9,7, /* p q r s t u v w */ 10,15,9,9,9,7,12,10, /* x y z { | } ~ DEL */ 9,10,12,20,30,20,30,50 }; /***************************************************************************/ /* Data structures */ /* Structures for the hash tables */ typedef struct _DICTHASH_ENT { struct _DICTHASH_ENT *next; unsigned long hash; char *word; } DICTHASH_ENT; typedef struct _IPHASH_ENT { struct _IPHASH_ENT *next; unsigned long ip; char *name; } IPHASH_ENT; /* Linked list of blocking masks, for key printing */ typedef struct _BLOCKING_MASK { struct _BLOCKING_MASK *next; unsigned short mask; } BLOCKING_MASK; /* Global vars */ char *cyber_not; long cyber_not_size; DICTHASH_ENT **dicthash; IPHASH_ENT **iphash; BLOCKING_MASK *masks=NULL; #ifdef REVERSE_DNS time_t start_time; #endif /*************************************************************************/ /* Utility functions */ /* Encryption used to conceal the config files, and the deputy password */ void cpcrypt4(char *data,long length) { unsigned char key; long i,j; key=(unsigned char)(length&0xFF); for (i=0;i<2;i++) { for (j=0;j<length;j++) { key=(key>>1)+(key<<7); key^=(unsigned char)data[j]; data[j]=(char)key; } } } /* The slightly nonstandard CRC32 used for URL hashing */ unsigned long forward_crc(char *input,int length) { int i; unsigned long rval=0; for (i=0;i<length;i++) rval=(rval>>8)^crctable[(rval&0xFF)^(input[i]|0x20)]; return rval; } #define GETBIT(p,b) ((((p)[(b)>>3])>>((b)&7))&1) #define FLIPBIT(p,b) ((p)[(b)>>3]^=(1<<((b)&7))) #define FLIPBITR(p,b) { if (length-1-((b)>>3)>=0) \ (p)[length-1-((b)>>3)]^=(1<<((b)&7)); } /* attempt to reverse the CRC32 function */ void reverse_crc(unsigned long crc,int length,char *in,char *out) { unsigned long bits; int i; /* correct for output length */ bits=lengthmatrix[length-1]; /* XOR in the CRC */ for (i=0;i<32;i++) if (crc&(1<<i)) bits^=crcmatrix[i]; /* XOR in the free bits */ for (i=0;i<freebits[length-1];i++) if (GETBIT(in,i)) bits^=freematrix[i]; /* set up output */ for (i=0;i<length;i++) out[i]=0x20; /* output forced bits */ for (i=0;i<32;i++) if (bits&(1<<i)) FLIPBITR(out,bitsforced[i]); /* output free bits */ for (i=0;i<freebits[length-1];i++) if (GETBIT(in,i)) FLIPBITR(out,bitsfree[i]); } /* load a word into dictionary, if its hash was in cyber.not */ int guess_word(char *word,int length) { unsigned long hash; DICTHASH_ENT *tmp; hash=forward_crc(word,length); for (tmp=dicthash[hash%DICTHASH_SIZE]; tmp && (tmp->hash!=hash); tmp=tmp->next); if (tmp && !tmp->word) { tmp->word=(char *)malloc(length+1); if (!tmp->word) { puts("ERROR - out of memory (dicthash entry)"); exit(1); } memcpy(tmp->word,word,length); tmp->word[length]='\0'; return 1; } else return 0; } /* clear out the list of blocking masks */ void clear_blockmask_key(void) { BLOCKING_MASK *tmp; while (masks) { tmp=masks; masks=tmp->next; free(tmp); } } /* add a mask to the sorted list, if it's not already there. Yes, this * is O(n**2), but the list never gets over a few tens of entries, and so * doing it with a more sophisticated structure wouldn't be worthwhile. */ void add_blockmask(unsigned short newmask) { BLOCKING_MASK *tmp=NULL,*tmp2; /* skip past all the entries less than new */ while (masks && (masks->mask<newmask)) { tmp2=masks->next; masks->next=tmp; tmp=masks; masks=tmp2; } /* add new, if appropriate */ if ((!masks) || (masks->mask!=newmask)) { tmp2=(BLOCKING_MASK *)malloc(sizeof(BLOCKING_MASK)); if (!tmp2) { puts("ERROR - out of memory (blocking mask entry)"); exit(1); } tmp2->next=masks; masks=tmp2; tmp2->mask=newmask; } /* replace the skipped entries */ while (tmp) { tmp2=tmp->next; tmp->next=masks; masks=tmp; tmp=tmp2; } } /* print a key of the masks currently on the list */ void print_blockmask_key(void) { BLOCKING_MASK *tmp; unsigned short tm; int print_head,bit; for (tmp=masks;tmp;tmp=tmp->next) { printf("%04X: ",tmp->mask); print_head=6; tm=tmp->mask; bit=0; while (tm>0) { if (tm&1) { if (print_head+strlen(category[bit])>71) { printf("\n "); print_head=7; } else { putchar(' '); print_head++; } printf("%s",category[bit]); print_head+=strlen(category[bit]); if (tm&~1) { putchar(','); print_head++; } } bit++; tm>>=1; } putchar('\n'); } } /* print a pretty IP address, with reverse lookup if we're allowed */ void print_ip(unsigned long ip) { IPHASH_ENT *tmp; char *name; /* check if it's already in the table */ for (tmp=iphash[ip%IPHASH_SIZE]; tmp && (tmp->ip!=ip); tmp=tmp->next); if (tmp) /* if so, just print that */ printf("%s",tmp->name); #ifdef REVERSE_DNS /* if we're allowed a reverse lookup, take it */ else if (time(NULL)<start_time+TIME_LIMIT) { struct hostent *he; he=gethostbyaddr((char *)&ip,4,AF_INET); tmp=(IPHASH_ENT *)malloc(sizeof(IPHASH_ENT)); name=(char *)malloc(he?strlen(he->h_name)+1:16); if (!tmp || !name) { puts("ERROR - out of memory (IP hash ent)"); exit(1); } tmp->next=iphash[ip%IPHASH_SIZE]; iphash[ip%IPHASH_SIZE]=tmp; tmp->ip=ip; tmp->name=name; if (he) strcpy(name,he->h_name); else sprintf(name,"%ld.%ld.%ld.%ld", ip&0xFF,(ip>>8)&0xFF,(ip>>16)&0xFF,ip>>24); printf("%s",name); } #endif else /* finally, we just print it out numerically */ printf("%ld.%ld.%ld.%ld",ip&0xFF,(ip>>8)&0xFF,(ip>>16)&0xFF,ip>>24); } /* reverse a hash and print the results */ void print_revhash(unsigned long hash) { DICTHASH_ENT *tmp; char *neww,freeb[6],plaintext[12]; unsigned score,bestscore=(unsigned)-1; int length,i,bflip; /* check if it's already in the table */ for (tmp=dicthash[hash%DICTHASH_SIZE]; tmp && (tmp->hash!=hash); tmp=tmp->next); if (!tmp) { puts("ERROR - reversing unseen hash (should never happen)"); exit(1); } if (!tmp->word) { /* if no word, attempt reversal */ neww=(char *)malloc(12); if (neww==NULL) { puts("ERROR - out of memory (new word)\n"); exit(1); } tmp->word=neww; strcpy(neww,"?UNKNOWN?"); for (length=1;length<=REVERSE_CRC_LENGTH;length++) { for (i=0;i<6;i++) freeb[i]=0; do { /* reverse the hash, and see if that worked */ reverse_crc(hash,length,freeb,plaintext); if (hash==forward_crc(plaintext,length)) { /* compute the score for this guess */ score=0; for (i=0;i<length;i++) score+=cscore[plaintext[i]-(plaintext[i]>64?64:32)]; if (plaintext[0]=='~') /* ~ at start is ignored */ score-=cscore['~'-64]; /* if this is an improvement, use it */ if (score<bestscore) { bestscore=score; memcpy(neww,plaintext,length); neww[length]='\0'; } } for (bflip=0;GETBIT(freeb,bflip);bflip++) FLIPBIT(freeb,bflip); FLIPBIT(freeb,bflip); } while (bflip<freebits[length-1]); } } /* now print whatever word we found */ printf("%s",tmp->word); } /***************************************************************************/ /* Main functional blocks */ /* Load and decrypt cyber.not */ void load_cyber_not(char *filename) { FILE *datafile; datafile=fopen(filename,"rb"); if (!datafile) { puts("ERROR - can't open cyber.not"); exit(1); } if ((fseek(datafile,0,SEEK_END)<0) || ((cyber_not_size=ftell(datafile))<0) || (fseek(datafile,0,SEEK_SET)<0)) { puts("ERROR - can't reposition in cyber.not"); exit(1); } cyber_not=(char *)malloc(cyber_not_size); if ((!cyber_not) || (fread(cyber_not,1,cyber_not_size,datafile)<cyber_not_size)) { puts("ERROR - can't read cyber.not"); } fclose(datafile); cpcrypt4(cyber_not,cyber_not_size); } /* Initialize the dictionary hash table with the hashes in cyber.not */ void find_hashes_to_reverse(void) { long table1_start,table1_end; long i,hcnt=0; unsigned long hash; short mask; char length; DICTHASH_ENT *tmp; /* find Table 1 in cyber.not */ memcpy(&table1_start,cyber_not+0x0010,4); /* Table 1 offset */ memcpy(&table1_end,cyber_not+0x0014,4); /* Table 1 length */ table1_end+=table1_start; table1_start+=2; /* "SD" marker */ table1_end-=2; /* "ED" marker */ /* initialize our hash table to empty */ dicthash=(DICTHASH_ENT **)malloc(DICTHASH_SIZE*sizeof(DICTHASH_ENT *)); if (!dicthash) { puts("ERROR - can't allocate dictionary hash"); exit(1); } for (i=0;i<DICTHASH_SIZE;i++) dicthash[i]=NULL; /* step through the table, looking for hashes */ for (i=table1_start;i<table1_end;) { i+=4; /* skip IP address */ memcpy(&mask,cyber_not+i,2); /* category mask */ i+=2; /* skip over mask */ if (mask==0) { /* we have hash records */ for (;(length=cyber_not[i]);) { /* yes, this should be assignment */ i+=3; /* skip length and mask */ for (length-=3;length>0;length-=4) { memcpy(&hash,cyber_not+i,4); i+=4; for (tmp=dicthash[hash%DICTHASH_SIZE]; tmp && (tmp->hash!=hash); tmp=tmp->next); /* does this hash exist already? */ if (!tmp) { /* if not, add it */ tmp=(DICTHASH_ENT *)malloc(sizeof(DICTHASH_ENT)); if (!tmp) { puts("ERROR - out of memory"); exit(1); } tmp->next=dicthash[hash%DICTHASH_SIZE]; dicthash[hash%DICTHASH_SIZE]=tmp; tmp->hash=hash; tmp->word=NULL; hcnt++; } } } i++; /* skip terminating length */ } } printf("Scanning cyber.not, found %ld unique hash values\n",hcnt); } /* load a dictionary file */ void load_dictionary(char *filename) { FILE *datafile; char textline[LINELEN+8],x; int length,newwords=0,i; datafile=fopen(filename,"rt"); if (!datafile) { /* not an error! the dictionary need not exist */ printf("Dictionary file %s missing.\n",filename); return; } /* First pass: word, ~word, word.htm, word.html */ textline[0]='~'; while (!feof(datafile) && fgets(textline+1,LINELEN,datafile)) { if (strchr(textline+1,'#') || strchr(textline+1,' ')) continue; length=strlen(textline+1)-1; textline[length+1]='\0'; for (i=1;i<=length;i++) textline[i]=tolower(textline[i]); strcat(textline+1,".html"); newwords+=guess_word(textline+1,length); /* word */ newwords+=guess_word(textline,length+1); /* ~word */ newwords+=guess_word(textline+1,length+4); /* word.htm */ newwords+=guess_word(textline+1,length+5); /* word.html */ } /* Second pass: Xword, ~Xword, wordX, ~wordX, xwordx */ fseek(datafile,0,SEEK_SET); while (!feof(datafile) && fgets(textline+2,LINELEN,datafile)) { if (strchr(textline+2,'#') || strchr(textline+2,' ')) continue; length=strlen(textline+2)-1; for (i=2;i<=length+1;i++) textline[i]=tolower(textline[i]); for (x='a';x<='z';x++) { textline[1]=x; textline[length+2]=x; newwords+=guess_word(textline+1,length+1); /* Xword */ newwords+=guess_word(textline,length+2); /* ~Xword */ newwords+=guess_word(textline+2,length+1); /* wordX */ if (x=='x') newwords+=guess_word(textline+1,length+2); /* xwordx */ textline[1]='~'; newwords+=guess_word(textline+1,length+2); /* ~wordX */ } for (x='0';x<='9';x++) { textline[1]=x; textline[length+2]=x; newwords+=guess_word(textline+1,length+1); /* Xword */ newwords+=guess_word(textline,length+2); /* ~Xword */ newwords+=guess_word(textline+2,length+1); /* wordX */ textline[1]='~'; newwords+=guess_word(textline+1,length+2); /* ~wordX */ } } /* Third pass: .htm and .html variants of second pass */ fseek(datafile,0,SEEK_SET); while (!feof(datafile) && fgets(textline+2,LINELEN,datafile)) { if (strchr(textline+2,'#') || strchr(textline+2,' ')) continue; length=strlen(textline+2)-1; for (i=2;i<=length+1;i++) textline[i]=tolower(textline[i]); for (x='a';x<='z';x++) { textline[1]=x; textline[length+2]=x; textline[length+3]='\0'; strcat(textline+2,".html"); newwords+=guess_word(textline+2,length+5); /* wordX.htm */ newwords+=guess_word(textline+2,length+6); /* wordX.html */ textline[length+2]='\0'; strcat(textline+2,".html"); newwords+=guess_word(textline+1,length+5); /* Xword.htm */ newwords+=guess_word(textline+1,length+6); /* Xword.html */ } for (x='0';x<='9';x++) { textline[1]=x; textline[length+2]=x; textline[length+3]='\0'; strcat(textline+2,".html"); newwords+=guess_word(textline+2,length+5); /* wordX.htm */ newwords+=guess_word(textline+2,length+6); /* wordX.html */ textline[length+2]='\0'; strcat(textline+2,".html"); newwords+=guess_word(textline+1,length+5); /* Xword.htm */ newwords+=guess_word(textline+1,length+6); /* Xword.html */ } } fclose(datafile); printf("Found %d new words in %s\n",newwords,filename); } /* save a dictionary file */ void save_dictionary(char *filename) { FILE *datafile; long i; DICTHASH_ENT *tmp; /* open */ datafile=fopen(filename,"wt"); if (!datafile) { /* this error is not fatal */ printf("ERROR - cannot write dictionary %s (non-fatal)\n",filename); return; } /* write */ for (i=0;i<DICTHASH_SIZE;i++) for (tmp=dicthash[i];tmp;tmp=tmp->next) if (tmp->word) fprintf(datafile,"%s\n",tmp->word); /* close */ fclose(datafile); } void dump_newsgroup_blocks(void) { long table3_start,table3_end; long i; short mask; char length; puts("*** NEWSGROUP BLOCKS ***\n"); /* find Table 3 in cyber.not */ memcpy(&table3_start,cyber_not+0x0024,4); /* Table 3 offset */ memcpy(&table3_end,cyber_not+0x0028,4); /* Table 3 length */ table3_end+=table3_start; table3_start+=2; /* "SD" marker */ table3_end-=2; /* "ED" marker */ /* get ready to print a new blocking mask key */ clear_blockmask_key(); /* step through the table, looking for newsgroups */ for (i=table3_start;i<table3_end;) { length=cyber_not[i]; i+=1; /* skip length byte */ memcpy(&mask,cyber_not+i,2); /* category mask */ i+=2; /* skip over mask */ add_blockmask(mask); printf("%04X ",mask); for (length-=3;length>0;length--) { putchar(cyber_not[i]); i++; } putchar('\n'); } /* print the key */ putchar('\n'); print_blockmask_key(); putchar('\n'); } /* Load a file of reverse-lookup hints. Just initializes the table if * the filename parameter is null. */ void load_iphints(char *filename) { FILE *datafile; char name[LINELEN],*ntmp; int i,ipa,ipb,ipc,ipd,count=0; unsigned long ip; IPHASH_ENT *tmp; /* initialize our hash table to empty */ iphash=(IPHASH_ENT **)malloc(IPHASH_SIZE*sizeof(IPHASH_ENT *)); if (!iphash) { puts("ERROR - can't allocate IP hash"); exit(1); } for (i=0;i<IPHASH_SIZE;i++) iphash[i]=NULL; if (filename==NULL) return; datafile=fopen(filename,"rt"); if (!datafile) { /* not an error! the hints file need not exist */ printf("IP address file %s missing.\n",filename); return; } while (!feof(datafile)) { fscanf(datafile,"%d.%d.%d.%d\t%s\n",&ipa,&ipb,&ipc,&ipd,name); ip=((long)ipa)+((long)ipb<<8)+((long)ipc<<16)+((long)ipd<<24); tmp=(IPHASH_ENT *)malloc(sizeof(IPHASH_ENT)); ntmp=(char *)malloc(strlen(name)+1); if ((!tmp) || (!ntmp)) { puts("ERROR - out of memory (IP hash entry)"); exit(1); } tmp->next=iphash[ip%IPHASH_SIZE]; iphash[ip%IPHASH_SIZE]=tmp; tmp->ip=ip; tmp->name=ntmp; strcpy(ntmp,name); count++; } fclose(datafile); printf("Found %d IP addresses in %s\n",count,filename); } /* save reverse-lookup hints file */ void save_iphints(char *filename) { FILE *datafile; long i; IPHASH_ENT *tmp; /* open */ datafile=fopen(filename,"wt"); if (!datafile) { /* this error is not fatal */ printf("ERROR - cannot write IP hints %s (non-fatal)\n",filename); return; } /* write */ for (i=0;i<IPHASH_SIZE;i++) for (tmp=iphash[i];tmp;tmp=tmp->next) fprintf(datafile,"%ld.%ld.%ld.%ld\t%s\n", tmp->ip&0xFF,(tmp->ip>>8)&0xFF,(tmp->ip>>16)&0xFF,tmp->ip>>24, tmp->name); /* close */ fclose(datafile); } /* OK, this is the good part */ void dump_web_blocks(void) { long table1_start,table1_end,table2_start,table2_end; long i,j; unsigned long ip,hash; short mask; char length; /* find Table 1 in cyber.not */ memcpy(&table1_start,cyber_not+0x0010,4); /* Table 1 offset */ memcpy(&table1_end,cyber_not+0x0014,4); /* Table 1 length */ table1_end+=table1_start; table1_start+=2; /* "SD" marker */ table1_end-=2; /* "ED" marker */ /* find Table 2 in cyber.not */ memcpy(&table2_start,cyber_not+0x001A,4); /* Table 2 offset */ memcpy(&table2_end,cyber_not+0x001E,4); /* Table 2 length */ table2_end+=table2_start; table2_start+=2; /* "SD" marker */ table2_end-=2; /* "ED" marker */ /* step through the table 1, printing out the blocks*/ for (i=table1_start;i<table1_end;) { puts("************************************" "************************************"); clear_blockmask_key(); memcpy(&ip,cyber_not+i,4); i+=4; /* skip IP address */ memcpy(&mask,cyber_not+i,2); /* category mask */ i+=2; /* skip over mask */ /* print the IP address and synonyms */ putchar(' '); putchar(' '); print_ip(ip); putchar('\n'); for (j=table2_start;j<table2_end;) { memcpy(&hash,cyber_not+j,4); j+=4; length=cyber_not[j++]; if (hash==ip) { for (;length>0;length--) { memcpy(&hash,cyber_not+j,4); j+=4; putchar('='); putchar(' '); print_ip(hash); putchar('\n'); } } else j+=(length*4); } /* print the individual blocks */ if (mask==0) { putchar('\n'); for (;(length=cyber_not[i]);) { /* yes, this should be assignment */ i++; /* skip length */ memcpy(&mask,cyber_not+i,2); /* category mask */ i+=2; /* skip over mask */ add_blockmask(mask); printf("%04X http://%ld.%ld.%ld.%ld/",mask, ip&0xFF,(ip>>8)&0xFF,(ip>>16)&0xFF,ip>>24); for (length-=3;length>0;length-=4) { memcpy(&hash,cyber_not+i,4); i+=4; print_revhash(hash); putchar('/'); } putchar('\n'); } i++; /* skip terminating length */ putchar('\n'); } else { /* block on entire site */ add_blockmask(mask); puts("ENTIRE SITE:"); } print_blockmask_key(); } } /**************************************************************************/ /* Main program */ int main(int argc,char **argv) { puts("cndecode - Cyber Patrol 4 cyber.not decoder"); puts("By Matthew Skala\n"); #ifdef REVERSE_DNS start_time=time(NULL); #endif if (argc<2) { puts("ERROR - no cyber.not file specified"); return 1; } load_cyber_not(argv[1]); find_hashes_to_reverse(); if (argc>=4) /* supplemental */ load_dictionary(argv[3]); if (argc>=3) /* main */ load_dictionary(argv[2]); if (argc>=5) load_iphints(argv[4]); else load_iphints(NULL); dump_newsgroup_blocks(); dump_web_blocks(); if (argc>=4) save_dictionary(argv[3]); if (argc>=5) save_iphints(argv[4]); return 0; } # distributed via <nettime>: no commercial use without permission # <nettime> is a moderated mailing list for net criticism, # collaborative text filtering and cultural politics of the nets # more info: majordomo@bbs.thing.net and "info nettime-l" in the msg body # archive: http://www.nettime.org contact: nettime@bbs.thing.net