Annotation of embedaddon/bird2/nest/rt-fib.c, revision 1.1.1.1
1.1 misho 1: /*
2: * BIRD -- Forwarding Information Base -- Data Structures
3: *
4: * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5: *
6: * Can be freely distributed and used under the terms of the GNU GPL.
7: */
8:
9: /**
10: * DOC: Forwarding Information Base
11: *
12: * FIB is a data structure designed for storage of routes indexed by their
13: * network prefixes. It supports insertion, deletion, searching by prefix,
14: * `routing' (in CIDR sense, that is searching for a longest prefix matching
15: * a given IP address) and (which makes the structure very tricky to implement)
16: * asynchronous reading, that is enumerating the contents of a FIB while other
17: * modules add, modify or remove entries.
18: *
19: * Internally, each FIB is represented as a collection of nodes of type &fib_node
20: * indexed using a sophisticated hashing mechanism.
21: * We use two-stage hashing where we calculate a 16-bit primary hash key independent
22: * on hash table size and then we just divide the primary keys modulo table size
23: * to get a real hash key used for determining the bucket containing the node.
24: * The lists of nodes in each bucket are sorted according to the primary hash
25: * key, hence if we keep the total number of buckets to be a power of two,
26: * re-hashing of the structure keeps the relative order of the nodes.
27: *
28: * To get the asynchronous reading consistent over node deletions, we need to
29: * keep a list of readers for each node. When a node gets deleted, its readers
30: * are automatically moved to the next node in the table.
31: *
32: * Basic FIB operations are performed by functions defined by this module,
33: * enumerating of FIB contents is accomplished by using the FIB_WALK() macro
34: * or FIB_ITERATE_START() if you want to do it asynchronously.
35: *
36: * For simple iteration just place the body of the loop between FIB_WALK() and
37: * FIB_WALK_END(). You can't modify the FIB during the iteration (you can modify
38: * data in the node, but not add or remove nodes).
39: *
40: * If you need more freedom, you can use the FIB_ITERATE_*() group of macros.
41: * First, you initialize an iterator with FIB_ITERATE_INIT(). Then you can put
42: * the loop body in between FIB_ITERATE_START() and FIB_ITERATE_END(). In
43: * addition, the iteration can be suspended by calling FIB_ITERATE_PUT().
44: * This'll link the iterator inside the FIB. While suspended, you may modify the
45: * FIB, exit the current function, etc. To resume the iteration, enter the loop
46: * again. You can use FIB_ITERATE_UNLINK() to unlink the iterator (while
47: * iteration is suspended) in cases like premature end of FIB iteration.
48: *
49: * Note that the iterator must not be destroyed when the iteration is suspended,
50: * the FIB would then contain a pointer to invalid memory. Therefore, after each
51: * FIB_ITERATE_INIT() or FIB_ITERATE_PUT() there must be either
52: * FIB_ITERATE_START() or FIB_ITERATE_UNLINK() before the iterator is destroyed.
53: */
54:
55: #undef LOCAL_DEBUG
56:
57: #include "nest/bird.h"
58: #include "nest/route.h"
59: #include "lib/string.h"
60:
61: /*
62: * The FIB rehash values are maintaining FIB count between N/5 and 2N. What
63: * does it mean?
64: *
65: * +------------+--------+---------+-----------+----------+-----------+
66: * | Table size | Memory | Min cnt | net + rte | Max cnt | net + rte |
67: * +------------+--------+---------+-----------+----------+-----------+
68: * | 1k | 8k | 0 | 0 | 2k | 192 k |
69: * | 2k | 16k | 409 | 38.3k | 4k | 384 k |
70: * | 4k | 32k | 819 | 76.8k | 8k | 768 k |
71: * | 8k | 64k | 1.6k | 153.6k | 16k | 1.5M |
72: * | 16k | 128k | 3.2k | 307.1k | 32k | 3 M |
73: * | 32k | 256k | 6.4k | 614.3k | 64k | 6 M |
74: * | 64k | 512k | 12.8k | 1.2M | 128k | 12 M |
75: * | 128k | 1024k | 25.6k | 2.4M | 256k | 24 M |
76: * | 256k | 2M | 51.2k | 4.8M | 512k | 48 M |
77: * | 512k | 4M | 102.4k | 9.6M | 1M | 96 M |
78: * | 1M | 8M | 204.8k | 19.2M | 2M | 192 M |
79: * | 2M | 16M | 409.6k | 38.4M | 4M | 384 M |
80: * | 4M | 32M | 819.2k | 76.8M | 8M | 768 M |
81: * | 8M | 64M | 1.6M | 153.6M | infinity | infinity |
82: * +------------+--------+---------+-----------+----------+-----------+
83: *
84: * Table size shows how many slots are in FIB table.
85: * Memory shows how much memory is eaten by FIB table.
86: * Min cnt minimal number of nets in table of given size
87: * Max cnt maximal number of nets in table of given size
88: * net + rte memory eaten by 1 net and one route in it for min cnt and max cnt
89: *
90: * Example: If we have 750,000 network entries in a table:
91: * * the table size may be 512k if we have never had more
92: * * the table size may be 1M or 2M if we at least happened to have more
93: * * 256k is too small, 8M is too big
94: *
95: * When growing, rehash is done on demand so we do it on every power of 2.
96: * When shrinking, rehash is done on delete which is done (in global tables)
97: * in a scheduled event. Rehashing down 2 steps.
98: *
99: */
100:
101:
102: #define HASH_DEF_ORDER 10
103: #define HASH_HI_MARK * 2
104: #define HASH_HI_STEP 1
105: #define HASH_HI_MAX 24
106: #define HASH_LO_MARK / 5
107: #define HASH_LO_STEP 2
108: #define HASH_LO_MIN 10
109:
110:
111: static void
112: fib_ht_alloc(struct fib *f)
113: {
114: f->hash_size = 1 << f->hash_order;
115: f->hash_shift = 32 - f->hash_order;
116: if (f->hash_order > HASH_HI_MAX - HASH_HI_STEP)
117: f->entries_max = ~0;
118: else
119: f->entries_max = f->hash_size HASH_HI_MARK;
120: if (f->hash_order < HASH_LO_MIN + HASH_LO_STEP)
121: f->entries_min = 0;
122: else
123: f->entries_min = f->hash_size HASH_LO_MARK;
124: DBG("Allocating FIB hash of order %d: %d entries, %d low, %d high\n",
125: f->hash_order, f->hash_size, f->entries_min, f->entries_max);
126: f->hash_table = mb_alloc(f->fib_pool, f->hash_size * sizeof(struct fib_node *));
127: }
128:
129: static inline void
130: fib_ht_free(struct fib_node **h)
131: {
132: mb_free(h);
133: }
134:
135:
136: static inline u32 fib_hash(struct fib *f, const net_addr *a);
137:
138: /**
139: * fib_init - initialize a new FIB
140: * @f: the FIB to be initialized (the structure itself being allocated by the caller)
141: * @p: pool to allocate the nodes in
142: * @node_size: node size to be used (each node consists of a standard header &fib_node
143: * followed by user data)
144: * @hash_order: initial hash order (a binary logarithm of hash table size), 0 to use default order
145: * (recommended)
146: * @init: pointer a function to be called to initialize a newly created node
147: *
148: * This function initializes a newly allocated FIB and prepares it for use.
149: */
150: void
151: fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init)
152: {
153: uint addr_length = net_addr_length[addr_type];
154:
155: if (!hash_order)
156: hash_order = HASH_DEF_ORDER;
157: f->fib_pool = p;
158: f->fib_slab = addr_length ? sl_new(p, node_size + addr_length) : NULL;
159: f->addr_type = addr_type;
160: f->node_size = node_size;
161: f->node_offset = node_offset;
162: f->hash_order = hash_order;
163: fib_ht_alloc(f);
164: bzero(f->hash_table, f->hash_size * sizeof(struct fib_node *));
165: f->entries = 0;
166: f->entries_min = 0;
167: f->init = init;
168: }
169:
170: static void
171: fib_rehash(struct fib *f, int step)
172: {
173: unsigned old, new, oldn, newn, ni, nh;
174: struct fib_node **n, *e, *x, **t, **m, **h;
175:
176: old = f->hash_order;
177: oldn = f->hash_size;
178: new = old + step;
179: m = h = f->hash_table;
180: DBG("Re-hashing FIB from order %d to %d\n", old, new);
181: f->hash_order = new;
182: fib_ht_alloc(f);
183: t = n = f->hash_table;
184: newn = f->hash_size;
185: ni = 0;
186:
187: while (oldn--)
188: {
189: x = *h++;
190: while (e = x)
191: {
192: x = e->next;
193: nh = fib_hash(f, e->addr);
194: while (nh > ni)
195: {
196: *t = NULL;
197: ni++;
198: t = ++n;
199: }
200: *t = e;
201: t = &e->next;
202: }
203: }
204: while (ni < newn)
205: {
206: *t = NULL;
207: ni++;
208: t = ++n;
209: }
210: fib_ht_free(m);
211: }
212:
213: #define CAST(t) (const net_addr_##t *)
214: #define CAST2(t) (net_addr_##t *)
215:
216: #define FIB_HASH(f,a,t) (net_hash_##t(CAST(t) a) >> f->hash_shift)
217:
218: #define FIB_FIND(f,a,t) \
219: ({ \
220: struct fib_node *e = f->hash_table[FIB_HASH(f, a, t)]; \
221: while (e && !net_equal_##t(CAST(t) e->addr, CAST(t) a)) \
222: e = e->next; \
223: fib_node_to_user(f, e); \
224: })
225:
226: #define FIB_INSERT(f,a,e,t) \
227: ({ \
228: u32 h = net_hash_##t(CAST(t) a); \
229: struct fib_node **ee = f->hash_table + (h >> f->hash_shift); \
230: struct fib_node *g; \
231: \
232: while ((g = *ee) && (net_hash_##t(CAST(t) g->addr) < h)) \
233: ee = &g->next; \
234: \
235: net_copy_##t(CAST2(t) e->addr, CAST(t) a); \
236: e->next = *ee; \
237: *ee = e; \
238: })
239:
240:
241: static inline u32
242: fib_hash(struct fib *f, const net_addr *a)
243: {
244: /* Same as FIB_HASH() */
245: return net_hash(a) >> f->hash_shift;
246: }
247:
248: void *
249: fib_get_chain(struct fib *f, const net_addr *a)
250: {
251: ASSERT(f->addr_type == a->type);
252:
253: struct fib_node *e = f->hash_table[fib_hash(f, a)];
254: return e;
255: }
256:
257: /**
258: * fib_find - search for FIB node by prefix
259: * @f: FIB to search in
260: * @n: network address
261: *
262: * Search for a FIB node corresponding to the given prefix, return
263: * a pointer to it or %NULL if no such node exists.
264: */
265: void *
266: fib_find(struct fib *f, const net_addr *a)
267: {
268: ASSERT(f->addr_type == a->type);
269:
270: switch (f->addr_type)
271: {
272: case NET_IP4: return FIB_FIND(f, a, ip4);
273: case NET_IP6: return FIB_FIND(f, a, ip6);
274: case NET_VPN4: return FIB_FIND(f, a, vpn4);
275: case NET_VPN6: return FIB_FIND(f, a, vpn6);
276: case NET_ROA4: return FIB_FIND(f, a, roa4);
277: case NET_ROA6: return FIB_FIND(f, a, roa6);
278: case NET_FLOW4: return FIB_FIND(f, a, flow4);
279: case NET_FLOW6: return FIB_FIND(f, a, flow6);
280: case NET_IP6_SADR: return FIB_FIND(f, a, ip6_sadr);
281: case NET_MPLS: return FIB_FIND(f, a, mpls);
282: default: bug("invalid type");
283: }
284: }
285:
286: static void
287: fib_insert(struct fib *f, const net_addr *a, struct fib_node *e)
288: {
289: ASSERT(f->addr_type == a->type);
290:
291: switch (f->addr_type)
292: {
293: case NET_IP4: FIB_INSERT(f, a, e, ip4); return;
294: case NET_IP6: FIB_INSERT(f, a, e, ip6); return;
295: case NET_VPN4: FIB_INSERT(f, a, e, vpn4); return;
296: case NET_VPN6: FIB_INSERT(f, a, e, vpn6); return;
297: case NET_ROA4: FIB_INSERT(f, a, e, roa4); return;
298: case NET_ROA6: FIB_INSERT(f, a, e, roa6); return;
299: case NET_FLOW4: FIB_INSERT(f, a, e, flow4); return;
300: case NET_FLOW6: FIB_INSERT(f, a, e, flow6); return;
301: case NET_IP6_SADR: FIB_INSERT(f, a, e, ip6_sadr); return;
302: case NET_MPLS: FIB_INSERT(f, a, e, mpls); return;
303: default: bug("invalid type");
304: }
305: }
306:
307:
308: /**
309: * fib_get - find or create a FIB node
310: * @f: FIB to work with
311: * @n: network address
312: *
313: * Search for a FIB node corresponding to the given prefix and
314: * return a pointer to it. If no such node exists, create it.
315: */
316: void *
317: fib_get(struct fib *f, const net_addr *a)
318: {
319: void *b = fib_find(f, a);
320: if (b)
321: return b;
322:
323: if (f->fib_slab)
324: b = sl_alloc(f->fib_slab);
325: else
326: b = mb_alloc(f->fib_pool, f->node_size + a->length);
327:
328: struct fib_node *e = fib_user_to_node(f, b);
329: e->readers = NULL;
330: e->flags = 0;
331: fib_insert(f, a, e);
332:
333: memset(b, 0, f->node_offset);
334: if (f->init)
335: f->init(b);
336:
337: if (f->entries++ > f->entries_max)
338: fib_rehash(f, HASH_HI_STEP);
339:
340: return b;
341: }
342:
343: static inline void *
344: fib_route_ip4(struct fib *f, net_addr_ip4 *n)
345: {
346: void *r;
347:
348: while (!(r = fib_find(f, (net_addr *) n)) && (n->pxlen > 0))
349: {
350: n->pxlen--;
351: ip4_clrbit(&n->prefix, n->pxlen);
352: }
353:
354: return r;
355: }
356:
357: static inline void *
358: fib_route_ip6(struct fib *f, net_addr_ip6 *n)
359: {
360: void *r;
361:
362: while (!(r = fib_find(f, (net_addr *) n)) && (n->pxlen > 0))
363: {
364: n->pxlen--;
365: ip6_clrbit(&n->prefix, n->pxlen);
366: }
367:
368: return r;
369: }
370:
371: /**
372: * fib_route - CIDR routing lookup
373: * @f: FIB to search in
374: * @n: network address
375: *
376: * Search for a FIB node with longest prefix matching the given
377: * network, that is a node which a CIDR router would use for routing
378: * that network.
379: */
380: void *
381: fib_route(struct fib *f, const net_addr *n)
382: {
383: ASSERT(f->addr_type == n->type);
384:
385: net_addr *n0 = alloca(n->length);
386: net_copy(n0, n);
387:
388: switch (n->type)
389: {
390: case NET_IP4:
391: case NET_VPN4:
392: case NET_ROA4:
393: case NET_FLOW4:
394: return fib_route_ip4(f, (net_addr_ip4 *) n0);
395:
396: case NET_IP6:
397: case NET_VPN6:
398: case NET_ROA6:
399: case NET_FLOW6:
400: return fib_route_ip6(f, (net_addr_ip6 *) n0);
401:
402: default:
403: return NULL;
404: }
405: }
406:
407:
408: static inline void
409: fib_merge_readers(struct fib_iterator *i, struct fib_node *to)
410: {
411: if (to)
412: {
413: struct fib_iterator *j = to->readers;
414: if (!j)
415: {
416: /* Fast path */
417: to->readers = i;
418: i->prev = (struct fib_iterator *) to;
419: }
420: else
421: {
422: /* Really merging */
423: while (j->next)
424: j = j->next;
425: j->next = i;
426: i->prev = j;
427: }
428: while (i && i->node)
429: {
430: i->node = NULL;
431: i = i->next;
432: }
433: }
434: else /* No more nodes */
435: while (i)
436: {
437: i->prev = NULL;
438: i = i->next;
439: }
440: }
441:
442: /**
443: * fib_delete - delete a FIB node
444: * @f: FIB to delete from
445: * @E: entry to delete
446: *
447: * This function removes the given entry from the FIB,
448: * taking care of all the asynchronous readers by shifting
449: * them to the next node in the canonical reading order.
450: */
451: void
452: fib_delete(struct fib *f, void *E)
453: {
454: struct fib_node *e = fib_user_to_node(f, E);
455: uint h = fib_hash(f, e->addr);
456: struct fib_node **ee = f->hash_table + h;
457: struct fib_iterator *it;
458:
459: while (*ee)
460: {
461: if (*ee == e)
462: {
463: *ee = e->next;
464: if (it = e->readers)
465: {
466: struct fib_node *l = e->next;
467: while (!l)
468: {
469: h++;
470: if (h >= f->hash_size)
471: break;
472: else
473: l = f->hash_table[h];
474: }
475: fib_merge_readers(it, l);
476: }
477:
478: if (f->fib_slab)
479: sl_free(f->fib_slab, E);
480: else
481: mb_free(E);
482:
483: if (f->entries-- < f->entries_min)
484: fib_rehash(f, -HASH_LO_STEP);
485: return;
486: }
487: ee = &((*ee)->next);
488: }
489: bug("fib_delete() called for invalid node");
490: }
491:
492: /**
493: * fib_free - delete a FIB
494: * @f: FIB to be deleted
495: *
496: * This function deletes a FIB -- it frees all memory associated
497: * with it and all its entries.
498: */
499: void
500: fib_free(struct fib *f)
501: {
502: fib_ht_free(f->hash_table);
503: rfree(f->fib_slab);
504: }
505:
506: void
507: fit_init(struct fib_iterator *i, struct fib *f)
508: {
509: unsigned h;
510: struct fib_node *n;
511:
512: i->efef = 0xff;
513: for(h=0; h<f->hash_size; h++)
514: if (n = f->hash_table[h])
515: {
516: i->prev = (struct fib_iterator *) n;
517: if (i->next = n->readers)
518: i->next->prev = i;
519: n->readers = i;
520: i->node = n;
521: return;
522: }
523: /* The fib is empty, nothing to do */
524: i->prev = i->next = NULL;
525: i->node = NULL;
526: }
527:
528: struct fib_node *
529: fit_get(struct fib *f, struct fib_iterator *i)
530: {
531: struct fib_node *n;
532: struct fib_iterator *j, *k;
533:
534: if (!i->prev)
535: {
536: /* We are at the end */
537: i->hash = ~0 - 1;
538: return NULL;
539: }
540: if (!(n = i->node))
541: {
542: /* No node info available, we are a victim of merging. Try harder. */
543: j = i;
544: while (j->efef == 0xff)
545: j = j->prev;
546: n = (struct fib_node *) j;
547: }
548: j = i->prev;
549: if (k = i->next)
550: k->prev = j;
551: j->next = k;
552: i->hash = fib_hash(f, n->addr);
553: return n;
554: }
555:
556: void
557: fit_put(struct fib_iterator *i, struct fib_node *n)
558: {
559: struct fib_iterator *j;
560:
561: i->node = n;
562: if (j = n->readers)
563: j->prev = i;
564: i->next = j;
565: n->readers = i;
566: i->prev = (struct fib_iterator *) n;
567: }
568:
569: void
570: fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos)
571: {
572: if (n = n->next)
573: goto found;
574:
575: while (++hpos < f->hash_size)
576: if (n = f->hash_table[hpos])
577: goto found;
578:
579: /* We are at the end */
580: i->prev = i->next = NULL;
581: i->node = NULL;
582: return;
583:
584: found:
585: fit_put(i, n);
586: }
587:
588: #ifdef DEBUGGING
589:
590: /**
591: * fib_check - audit a FIB
592: * @f: FIB to be checked
593: *
594: * This debugging function audits a FIB by checking its internal consistency.
595: * Use when you suspect somebody of corrupting innocent data structures.
596: */
597: void
598: fib_check(struct fib *f)
599: {
600: uint i, ec, nulls;
601:
602: ec = 0;
603: for(i=0; i<f->hash_size; i++)
604: {
605: struct fib_node *n;
606: for(n=f->hash_table[i]; n; n=n->next)
607: {
608: struct fib_iterator *j, *j0;
609: uint h0 = fib_hash(f, n->addr);
610: if (h0 != i)
611: bug("fib_check: mishashed %x->%x (order %d)", h0, i, f->hash_order);
612: j0 = (struct fib_iterator *) n;
613: nulls = 0;
614: for(j=n->readers; j; j=j->next)
615: {
616: if (j->prev != j0)
617: bug("fib_check: iterator->prev mismatch");
618: j0 = j;
619: if (!j->node)
620: nulls++;
621: else if (nulls)
622: bug("fib_check: iterator nullified");
623: else if (j->node != n)
624: bug("fib_check: iterator->node mismatch");
625: }
626: ec++;
627: }
628: }
629: if (ec != f->entries)
630: bug("fib_check: invalid entry count (%d != %d)", ec, f->entries);
631: return;
632: }
633:
634: /*
635: int
636: fib_histogram(struct fib *f)
637: {
638: log(L_WARN "Histogram dump start %d %d", f->hash_size, f->entries);
639:
640: int i, j;
641: struct fib_node *e;
642:
643: for (i = 0; i < f->hash_size; i++)
644: {
645: j = 0;
646: for (e = f->hash_table[i]; e != NULL; e = e->next)
647: j++;
648: if (j > 0)
649: log(L_WARN "Histogram line %d: %d", i, j);
650: }
651:
652: log(L_WARN "Histogram dump end");
653: }
654: */
655:
656: #endif
657:
658: #ifdef TEST
659:
660: #include "lib/resource.h"
661:
662: struct fib f;
663:
664: void dump(char *m)
665: {
666: uint i;
667:
668: debug("%s ... order=%d, size=%d, entries=%d\n", m, f.hash_order, f.hash_size, f.hash_size);
669: for(i=0; i<f.hash_size; i++)
670: {
671: struct fib_node *n;
672: struct fib_iterator *j;
673: for(n=f.hash_table[i]; n; n=n->next)
674: {
675: debug("%04x %08x %p %N", i, ipa_hash(n->prefix), n, n->addr);
676: for(j=n->readers; j; j=j->next)
677: debug(" %p[%p]", j, j->node);
678: debug("\n");
679: }
680: }
681: fib_check(&f);
682: debug("-----\n");
683: }
684:
685: void init(struct fib_node *n)
686: {
687: }
688:
689: int main(void)
690: {
691: struct fib_node *n;
692: struct fib_iterator i, j;
693: ip_addr a;
694: int c;
695:
696: log_init_debug(NULL);
697: resource_init();
698: fib_init(&f, &root_pool, sizeof(struct fib_node), 4, init);
699: dump("init");
700:
701: a = ipa_from_u32(0x01020304); n = fib_get(&f, &a, 32);
702: a = ipa_from_u32(0x02030405); n = fib_get(&f, &a, 32);
703: a = ipa_from_u32(0x03040506); n = fib_get(&f, &a, 32);
704: a = ipa_from_u32(0x00000000); n = fib_get(&f, &a, 32);
705: a = ipa_from_u32(0x00000c01); n = fib_get(&f, &a, 32);
706: a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32);
707: dump("fill");
708:
709: fit_init(&i, &f);
710: dump("iter init");
711:
712: fib_rehash(&f, 1);
713: dump("rehash up");
714:
715: fib_rehash(&f, -1);
716: dump("rehash down");
717:
718: next:
719: c = 0;
720: FIB_ITERATE_START(&f, &i, z)
721: {
722: if (c)
723: {
724: FIB_ITERATE_PUT(&i, z);
725: dump("iter");
726: goto next;
727: }
728: c = 1;
729: debug("got %p\n", z);
730: }
731: FIB_ITERATE_END(z);
732: dump("iter end");
733:
734: fit_init(&i, &f);
735: fit_init(&j, &f);
736: dump("iter init 2");
737:
738: n = fit_get(&f, &i);
739: dump("iter step 2");
740:
741: fit_put(&i, n->next);
742: dump("iter step 3");
743:
744: a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32);
745: fib_delete(&f, n);
746: dump("iter step 3");
747:
748: return 0;
749: }
750:
751: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>