/* * CIDR block calculator. * * Takes a set of dotted-quad IP addresses, ranges, or CIDR blocks, on * stdin; at EOF, prints a minimal set of CIDR ranges to stdout. * * Input consists of a stream of dotted-quads, pairs of dotted-quads * separated by a dash, or dotted-quads with /number widths after * them. Dash-separated ranges refer to all addresses between the * two, inclusive; an address with a /number after it refers to a * CIDR-style block. It is not an error for an address to be * specified in the input more than once. Whitespace may appear * anywhere except within a dotted-quad or CIDR width. Characters * other than digits, dots, dashes, and whitespace are errors. If a * number in a dotted-quad is greater than 255, or a CIDR width is * greater than 32, or other syntax errors occur (such as too many * dots without whitespace, dash, or slash) a complaint is printed and * the dotted-quad, range, or block in which it appears is skipped. * * Output consists of zero or more lines, each a dotted-quad/width CIDR * net-with-mask, including all and only the addresses in the input. * It will be a minimal set, in that no two blocks in the output can * be collapsed without resorting to noncontiguous netmasks. * * Compile-time options: * * -DNO_PROGNAME * Provide __progname, for systems that don't have it. If * it shows up undefined at link time, try compiling with * this turned on. * * This file is in the public domain. */ #include #include extern const char *__progname; #ifdef NO_PROGNAME const char *__progname; int main(int, char **); int main_(int, char **); int main(int ac, char **av) { __progname = av[0]; return(main_()); } #define main main_ #endif /* * We store the input as a binary tree. Conceptually, the tree is a * fully-populated depth-32 binary tree, with each leaf marked as * either present or absent in the input. Of course, that's a totally * impractical representation. What we actually do is to store that * tree, but whenever a subtree has all its leaves absent, the pointer * that would normally point to it is replaced with a NONE pointer; if * a subtree has all its leaves present, an ALL pointer. (Leaf * pointers are always either NONE or ALL, according as the leaf in * question is absent or present.) * * This could probably be stored more efficiently by allowing a single * C structure to represent multiple levels in the tree when there's * only one non-NONE path down through those levels, but the * additional code complexity isn't worth it. * * Since we don't have to store "up" pointers, we don't, and a node * consists of nothing but two child pointers. We use an array[2] * rather than two separate struct elements because at one place it's * convenient to use a computed index, which would otherwise need to * be a ? : expression. * * The reason for choosing this data structure is that it makes * extracting CIDR netblocks - our desired output - trivial. All we * have to do is collapse every node with two ALL children into an ALL * node itself; when this process can go no farther, an optimal CIDR * set consists of the address/mask values corresponding to the ALL * nodes. (We actually do the collapsing as we build the tree, rather * than deferring it until everything's done.) * * We could use a nil pointer for NONE or ALL, but don't, if only for * error checking. */ typedef struct node NODE; struct node { NODE *sub[2]; } ; /* The root of the tree. */ static NODE *root; /* * The NONE and ALL pointers. I know of no portable way of generating * distinctive NODE * values without actually allocating NODEs for * them to point to, except for the nil pointer. Fortunately, NODEs * are small enough that statically allocating two isn't a big deal. */ static NODE none; #define NONE (&none) static NODE all; #define ALL (&all) /* * Free a node and all nodes under it. Useful when we're setting ALL * at a point relatively far up in the tree (which happens if a range * or block subsumes some already-entered individual addresses). */ static void free_tree(NODE *n) { if ((n == NONE) || (n == ALL)) return; free_tree(n->sub[0]); free_tree(n->sub[1]); free(n); } /* * Add an address to a node. Conceptually, you pass a node to this * routine. But since it may want to replace the node with ALL, it * needs to actually be passed an additional level of pointer, NODE ** * instead of NODE *. This has the convenient property that this * routine can also handle replacing NONE nodes with real nodes. a is * the address being added. bit says how far down in the tree this * node is, or more accurately how far up; 31 corresponds to the root, * 0 to the last level of internal nodes, and -1 to leaves. end is a * value which describes how large a block is being added; it is -1 to * add a single leaf (a /32), 0 to add a pair of addresses (a /31), * etc. * * Algorithm: Recursive. If the node is already ALL, everything we * want to add is already present, so do nothing. Otherwise, if we've * reached the level at which we want to operate (bit <= end), free * the subtree if it's not NONE (it can't be ALL; we already checked) * and replace it with ALL, and we're done. Otherwise, we have to * walk down either the 0 link or the 1 link. If this node is * presently NONE, we have to create a real node; then we recurse down * whichever branch of the tree corresponds to the appropriate bit of * a. After adding, we check, and if both our subtrees are ALL, we * collapse this node into an ALL. (If further collapsing is possible * at the next level up, our caller will take care of it.) */ static void add_to_node(NODE **np, unsigned long int a, int bit, int end) { NODE *n; n = *np; if (n == ALL) return; if (bit <= end) { if (n != NONE) free_tree(n); *np = ALL; return; } if (n == NONE) { n = malloc(sizeof(NODE)); n->sub[0] = NONE; n->sub[1] = NONE; *np = n; } add_to_node(&n->sub[(a>>bit)&1],a,bit-1,end); if ((n->sub[0] == ALL) && (n->sub[1] == ALL)) { free(n); *np = ALL; } } /* * Dump output. This dumps out whatever output is appropriate for a * given NODE. If the node is NONE, there's nothing under it, so * don't do anything. If it's ALL, we've found a CIDR block; print it * and return. Otherwise, we recurse, first down the 0 branch, then * the 1 branch. v is the address-so-far, maintained as part of the * recursive calls. * * The abort() is a can't-happen; it indicates that we have a node * that's not NONE or ALL at the bottom level of the tree, which is * supposed to hold only leaves. */ static void dump_tree(NODE *n, unsigned long int v, int bit) { if (n == NONE) return; if (n == ALL) { printf("%lu.%lu.%lu.%lu/%d\n",v>>24,(v>>16)&0xff,(v>>8)&0xff,v&0xff,31-bit); return; } if (bit < 0) abort(); dump_tree(n->sub[0],v,bit-1); dump_tree(n->sub[1],v|(1< a2) { fprintf(stderr,"%s: invalid range (ends reversed)\n",__progname); return; } while (a1 <= a2) { m = (a1 - 1) & ~a1; while (a1+m > a2) m >>= 1; for (bit=-1,t=m;t;bit++,t>>=1) ; add_to_node(&root,a1,31,bit); a1 += m+1; } } /* * Add a CIDR-style block. This matches our storage method so well * it's just a single call to add_to_node. The reason for the ?: * operator is that C doesn't promise that << by 32 actually shifts; * 32-bit machines often use only the low five bits of the shift * count. */ static void save_cidr(unsigned long int a, int n) { add_to_node(&root,n?a&0xffffffff&(0xffffffff<<(32-n)):0,31,31-n); } /* * Read input. Implementation is a simple state machine. * * State values for the various input syntaxes (a=10, b=11, etc): * * input 1 2 3 . 4 5 . 6 7 . 8 9 1 2 3 . 4 5 ... * state 1 1 2 2 2 3 4 4 5 6 6 7 8 8 9 9 9 2 2 2 3 4 4 ... * * input 1 2 3 . 4 5 . 6 7 . 8 9 - 1 1 . 2 2 . 3 3 . 4 4 ... * state 1 1 2 2 2 3 4 4 5 6 6 7 8 8 9 a a b b c d d e f f g h h 1 1 ... * * input 1 2 3 . 4 5 . 6 7 . 8 9 / 1 5 ... * state 1 1 2 2 2 3 4 4 5 6 6 7 8 8 9 i i j j 1 1 ... * * a holds the address being constructed (or, for states 9, i, j, the * address just constructed); n holds the number being accumulated. * a1 is used to hold the first address when a range is being read * (the second address is accumulated into a). * * If an error occurs, n is set to -1, and further errors are * suppressed; we stay this way until we begin a new dotted-quad, by * entering state 2 from state 9 or by entering state 1 upon seeing * whitespace in most other states. * * The "default: abort();" cases are can't-happen firewalls. */ static void read_input(void) { unsigned long int a1; unsigned long int a; int line; int n; int c; int state; state = 1; line = 1; n = 0; while (1) { c = getchar(); if (c == EOF) break; switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': switch (state) { default: abort(); break; case 1: n = c - '0'; state = 2; break; case 3: case 5: case 7: case 12: case 14: case 16: state ++; case 2: case 4: case 6: case 8: case 11: case 13: case 15: case 17: if (n < 0) break; n = (n * 10) + (c - '0'); if (n > 255) { fprintf(stderr,"%s: line %d: out-of-range number in input\n",__progname,line); n = -1; } break; case 9: if (n >= 0) { save_one_addr(a); n = c - '0'; } state = 2; break; case 10: case 18: if (n >= 0) n = c - '0'; state ++; break; case 19: if (n < 0) break; n = (n * 10) + (c - '0'); if (n > 32) { fprintf(stderr,"%s: line %d: out-of-range width in input\n",__progname,line); n = -1; } break; } break; case '.': switch (state) { default: abort(); break; case 1: case 3: case 5: case 7 ... 8: case 10: case 12: case 14: case 16 ... 19: if (n >= 0) fprintf(stderr,"%s: line %d: . at an inappropriate place\n",__progname,line); n = -1; break; case 2: case 11: a = 0; case 4: case 6: case 13: case 15: if (n >= 0) { a = (a << 8) | n; n = 0; } state ++; break; case 9: if (n >= 0) { save_one_addr(a); fprintf(stderr,"%s: line %d: . at an inappropriate place\n",__progname,line); } n = -1; break; } break; case '-': switch (state) { default: abort(); break; case 1 ... 7: case 10 ... 19: fprintf(stderr,"%s: line %d: - at an inappropriate place\n",__progname,line); n = -1; break; case 8: if (n >= 0) a1 = (a << 8) | n; state = 10; break; case 9: a1 = a; state = 10; break; } break; case '/': switch (state) { default: abort(); break; case 1 ... 7: case 10 ... 19: fprintf(stderr,"%s: line %d: / at an inappropriate place\n",__progname,line); n = -1; break; case 8: if (n >= 0) a = (a << 8) | n; state = 18; break; case 9: state = 18; break; } break; case '\n': line ++; case ' ': case '\t': case '\r': switch (state) { default: abort(); break; case 1: case 9 ... 10: case 18: break; case 2 ... 7: case 11 ... 16: if (n >= 0) fprintf(stderr,"%s: line %d: whitespace at an inappropriate place\n",__progname,line); state = 1; break; case 8: if (n >= 0) a = (a << 8) | n; state = 9; break; case 17: if (n >= 0) save_range(a1,(a<<8)|n); state = 1; break; case 19: if (n >= 0) save_cidr(a,n); state = 1; break; } break; default: fprintf(stderr,"%s: invalid character 0x%02x in input\n",__progname,c); n = -1; state = 2; break; } } switch (state) { default: abort(); break; case 1: break; case 2 ... 7: case 10 ... 16: if (n >= 0) fprintf(stderr,"%s: line %d: EOF at an inappropriate place\n",__progname,line); break; case 8: if (n >= 0) save_one_addr((a<<8)|n); break; case 9: if (n >= 0) save_one_addr(a); break; case 17: if (n >= 0) save_range(a1,(a<<8)|n); break; } } /* * After accumulating all input, dump out the resulting CIDR blocks. * Because we collapse when possible during tree construction, there * is nothing to do here but call dump_tree to walk the tree and print * a line when it finds an ALL node. */ static void dump_output(void) { dump_tree(root,0,31); } /* * By this point, main() is pretty much trivial. */ int main(void); int main(void) { root = NONE; read_input(); dump_output(); exit(0); }