FreeBSD 3.x kernels use a linked list to store all available host IP addresses. The list is searched every time a packet is received on a network interface to determine if the packet is directed at "us". With the number of IP addresses exceeding 600, the linear search leads to significant performance degradation. This ugly patch adds a fast hash table to the linked list described above. The table allows for negligible lookup overhead regardless of the IP addresses in use. We have tested the patch with 5000 IP addresses per box and measured performance matching the case with a single IP address and an unpatched kernel. This patch is probably buggy and should not be used on general purpose hosts or production systems. Other, better patches are available to solve similar problems (but those patches were too big or rigid for our purposes; plus the author wanted to write his first kernel hack). You must enable IF_ADDR_HASH kernel configuration option for this patch to have any effect. Index: src/sys/conf/options =================================================================== RCS file: /usr/local/FreeBSD/ncvs/src/sys/conf/options,v retrieving revision 1.121.2.1 diff -u -r1.121.2.1 options --- options 1999/02/08 19:05:55 1.121.2.1 +++ options 1999/10/28 21:45:51 @@ -209,6 +209,7 @@ IPFILTER opt_ipfilter.h IPFILTER_LOG opt_ipfilter.h IPFILTER_LKM opt_ipfilter.h +IF_ADDR_HASH opt_ipaddrhash.h # ATM (HARP version) ATM_CORE opt_atm.h Index: src/sys/netinet/ip_input.c =================================================================== RCS file: /usr/local/FreeBSD/ncvs/src/sys/netinet/ip_input.c,v retrieving revision 1.111 diff -u -r1.111 ip_input.c --- ip_input.c 1999/01/12 12:25:00 1.111 +++ ip_input.c 1999/10/28 21:46:15 @@ -42,6 +42,7 @@ #include "opt_ipdn.h" #include "opt_ipdivert.h" #include "opt_ipfilter.h" +#include "opt_ipaddrhash.h" #include @@ -124,6 +125,26 @@ SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD, &ipstat, ipstat, ""); +#ifdef IF_ADDR_HASH +/* global variable to indicate that ip address hash is in sync + * with in_ifaddrhead queue + */ +int ip_addr_hash_status = 0; /* -1: disabled; 0: needs sync; 1: in sync */ +/* a wrapper to avoid if_addr modifications */ +struct ip_addr_hash_entry { + struct in_ifaddr *ia; + struct ip_addr_hash_entry *next; +}; +typedef struct ip_addr_hash_entry **ip_addr_hash_pos; +static struct ip_addr_hash_entry **ip_addr_hash = 0; +static struct ip_addr_hash_entry *ip_addr_hash_entry_cache = 0; +static int ip_addr_hash_cap = 0; /* slots in a hash */ +static void ip_addr_hash_reset(void); +static void ip_addr_hash_clean(void); +static int ip_addr_hash_find(struct in_addr addr, ip_addr_hash_pos *pos); +static struct in_ifaddr *ip_addr_hash_has(struct in_addr addr); +#endif /* IF_ADDR_HASH */ + /* Packet reassembly stuff */ #define IPREASS_NHASH_LOG2 6 #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) @@ -466,6 +487,14 @@ /* * Check our list of addresses, to see if the packet is for us. */ +#ifdef IF_ADDR_HASH + if (ip_addr_hash_status == 0) + ip_addr_hash_reset(); + if (ip_addr_hash_status > 0) { + if (ip_addr_hash_has(ip->ip_dst)) + goto ours; + } else /* XXX: error prone */ +#endif for (ia = TAILQ_FIRST(&in_ifaddrhead); ia; ia = TAILQ_NEXT(ia, ia_link)) { #define satosin(sa) ((struct sockaddr_in *)(sa)) @@ -1635,3 +1664,96 @@ } return 0; } + + +#ifdef IF_ADDR_HASH + +/* destroys old address hash if any and computes new hash if possible */ +static void +ip_addr_hash_reset(void) { + int new_cap = 0; + int hash_count, add_count; + struct in_ifaddr *ia; + + if (ip_addr_hash_status < 0) + panic("ip_addr_hash_reset -- called for a disabled hash"); + + printf("ip_addr_hash_reset: starting (old: capacity: %d)\n", ip_addr_hash_cap); + + /* calculate new hash capacity */ + hash_count = 0; + for (ia = TAILQ_FIRST(&in_ifaddrhead); ia; ia = TAILQ_NEXT(ia, ia_link)) + hash_count++; + new_cap = 2*hash_count + 1; /* XXX: ideally, this should be a prime number */ + + /* do we need to allocate a new hash? */ + if (ip_addr_hash_cap != new_cap) { + if (ip_addr_hash) + ip_addr_hash_clean(); + ip_addr_hash = malloc(sizeof(*ip_addr_hash)*new_cap, M_TEMP, M_WAITOK); + ip_addr_hash_entry_cache = malloc(sizeof(*ip_addr_hash_entry_cache)*hash_count, M_TEMP, M_WAITOK); + if (!ip_addr_hash || !ip_addr_hash_entry_cache) + panic("ip_addr_hash_reset -- cannot allocate hash"); + ip_addr_hash_cap = new_cap; + } + + /* initialize the hash with new values */ + bzero(ip_addr_hash, sizeof(*ip_addr_hash)*ip_addr_hash_cap); + bzero(ip_addr_hash_entry_cache, sizeof(*ip_addr_hash_entry_cache)*hash_count); + add_count = 0; + for (ia = TAILQ_FIRST(&in_ifaddrhead); ia; ia = TAILQ_NEXT(ia, ia_link)) { + const struct in_addr addr = IA_SIN(ia)->sin_addr; + struct ip_addr_hash_entry *e; + ip_addr_hash_pos pos; + + if (ip_addr_hash_find(addr, &pos)) { + printf("ip_addr_hash_reset: %s conflicts with ", inet_ntoa(IA_SIN(*pos)->sin_addr)); + printf("%s, disabling hash optimization due to internal error\n", inet_ntoa(addr)); + ip_addr_hash_clean(); + ip_addr_hash_status = -1; + return; + } + if (add_count >= hash_count) + panic("ip_addr_hash_reset -- ia list grew?!"); + e = ip_addr_hash_entry_cache + add_count; + e->ia = ia; + e->next = 0; + *pos = e; + add_count++; + } + + ip_addr_hash_status = 1; + /* XXX: remove this message? */ + printf("ip_addr_hash_reset: done (%d entries, %d bytes)\n", + hash_count, sizeof(*ip_addr_hash)*ip_addr_hash_cap + + sizeof(*ip_addr_hash_entry_cache)*hash_count); +} + +static void +ip_addr_hash_clean(void) { + FREE(ip_addr_hash_entry_cache, M_TEMP); + FREE(ip_addr_hash, M_TEMP); + ip_addr_hash = 0; + ip_addr_hash_entry_cache = 0; + ip_addr_hash_cap = 0; +} + +static int +ip_addr_hash_find(struct in_addr addr, ip_addr_hash_pos *posp) { + *posp = &ip_addr_hash[addr.s_addr % ip_addr_hash_cap]; + /* scan collision chain */ + while (**posp && IA_SIN((**posp)->ia)->sin_addr.s_addr != addr.s_addr) + *posp = &(**posp)->next; + return **posp != 0; +} + +static struct in_ifaddr * +ip_addr_hash_has(struct in_addr addr) { + ip_addr_hash_pos pos; + if (ip_addr_hash_find(addr, &pos)) + return (*pos)->ia; + else + return 0; +} + +#endif /* IF_ADDR_HASH */ Index: src/sys/netinet/in.c =================================================================== RCS file: /usr/local/FreeBSD/ncvs/src/sys/netinet/in.c,v retrieving revision 1.39 diff -u -r1.39 in.c --- in.c Mon Nov 1 12:55:01 1999 +++ in.c Fri Oct 29 10:11:24 1999 @@ -34,6 +34,8 @@ * $Id: in.c,v 1.39 1998/12/07 05:41:10 eivind Exp $ */ +#include "opt_ipaddrhash.h" + #include #include #include @@ -51,6 +53,10 @@ #include +#ifdef IF_ADDR_HASH +extern int ip_addr_hash_status; /* see ip_input.c */ +#endif + static MALLOC_DEFINE(M_IPMADDR, "in_multi", "internet multicast address"); static void in_socktrim __P((struct sockaddr_in *)); @@ -221,6 +227,9 @@ TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link); ifa = &ia->ia_ifa; TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); +#ifdef IF_ADDR_HASH + ip_addr_hash_status = 0; /* mark hash as stale */ +#endif ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; @@ -352,6 +361,9 @@ oia = ia; TAILQ_REMOVE(&in_ifaddrhead, oia, ia_link); IFAFREE(&oia->ia_ifa); +#ifdef IF_ADDR_HASH + ip_addr_hash_status = 0; /* mark hash as stale */ +#endif splx(s); break;