root/net/radix_mpath.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rn_mpath_capable
  2. rn_mpath_next
  3. rn_mpath_count
  4. rt_mpath_matchgate
  5. rt_mpath_conflict
  6. rtalloc_mpath
  7. rn_mpath_inithead
  8. rn_mpath_hash

    1 /*      $OpenBSD: radix_mpath.c,v 1.7 2006/06/18 12:03:19 pascoe Exp $  */
    2 /*      $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $    */
    3 
    4 /*
    5  * Copyright (C) 2001 WIDE Project.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the project nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
   32  * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
   33  * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
   34  * PROPERTIES.
   35  */
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/malloc.h>
   40 #include <sys/socket.h>
   41 #define M_DONTWAIT M_NOWAIT
   42 #include <sys/domain.h>
   43 #include <sys/syslog.h>
   44 #include <net/radix.h>
   45 #include <net/radix_mpath.h>
   46 #include <net/route.h>
   47 #include <dev/rndvar.h>
   48 
   49 #include <netinet/in.h>
   50 
   51 extern int ipmultipath;
   52 extern int ip6_multipath;
   53 
   54 u_int32_t rn_mpath_hash(struct route *, u_int32_t *);
   55 
   56 /*
   57  * give some jitter to hash, to avoid synchronization between routers
   58  */
   59 static u_int32_t hashjitter;
   60 
   61 int
   62 rn_mpath_capable(struct radix_node_head *rnh)
   63 {
   64         return rnh->rnh_multipath;
   65 }
   66 
   67 struct radix_node *
   68 rn_mpath_next(struct radix_node *rn)
   69 {
   70         struct radix_node *next;
   71 
   72         if (!rn->rn_dupedkey)
   73                 return NULL;
   74         next = rn->rn_dupedkey;
   75         if (rn->rn_mask == next->rn_mask)
   76                 return next;
   77         else
   78                 return NULL;
   79 }
   80 
   81 int
   82 rn_mpath_count(struct radix_node *rn)
   83 {
   84         int i;
   85 
   86         i = 1;
   87         while ((rn = rn_mpath_next(rn)) != NULL)
   88                 i++;
   89         return i;
   90 }
   91 
   92 struct rtentry *
   93 rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
   94 {
   95         struct radix_node *rn;
   96 
   97         if (!rn_mpath_next((struct radix_node *)rt))
   98                 return rt;
   99 
  100         if (!gate)
  101                 return NULL;
  102         /* beyond here, we use rn as the master copy */
  103         rn = (struct radix_node *)rt;
  104         do {
  105                 rt = (struct rtentry *)rn;
  106                 if (rt->rt_gateway->sa_len == gate->sa_len &&
  107                     !memcmp(rt->rt_gateway, gate, gate->sa_len))
  108                         break;
  109         } while ((rn = rn_mpath_next(rn)) != NULL);
  110         if (!rn)
  111                 return NULL;
  112 
  113         return (struct rtentry *)rn;
  114 }
  115 
  116 /*
  117  * check if we have the same key/mask/gateway on the table already.
  118  */
  119 int
  120 rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
  121                    struct sockaddr *netmask, int mpathok)
  122 {
  123         struct radix_node *rn, *rn1;
  124         struct rtentry *rt1;
  125         char *p, *q, *eq;
  126         int same, l, skip;
  127 
  128         rn = (struct radix_node *)rt;
  129         rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
  130         if (!rn1 || rn1->rn_flags & RNF_ROOT)
  131                 return 0;
  132 
  133         /*
  134          * unlike other functions we have in this file, we have to check
  135          * all key/mask/gateway as rnh_lookup can match less specific entry.
  136          */
  137         rt1 = (struct rtentry *)rn1;
  138 
  139         /* compare key. */
  140         if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
  141             bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
  142                 goto different;
  143 
  144         /* key was the same.  compare netmask.  hairy... */
  145         if (rt_mask(rt1) && netmask) {
  146                 skip = rnh->rnh_treetop->rn_off;
  147                 if (rt_mask(rt1)->sa_len > netmask->sa_len) {
  148                         /*
  149                          * as rt_mask(rt1) is made optimal by radix.c,
  150                          * there must be some 1-bits on rt_mask(rt1)
  151                          * after netmask->sa_len.  therefore, in
  152                          * this case, the entries are different.
  153                          */
  154                         if (rt_mask(rt1)->sa_len > skip)
  155                                 goto different;
  156                         else {
  157                                 /* no bits to compare, i.e. same*/
  158                                 goto maskmatched;
  159                         }
  160                 }
  161 
  162                 l = rt_mask(rt1)->sa_len;
  163                 if (skip > l) {
  164                         /* no bits to compare, i.e. same */
  165                         goto maskmatched;
  166                 }
  167                 p = (char *)rt_mask(rt1);
  168                 q = (char *)netmask;
  169                 if (bcmp(p + skip, q + skip, l - skip))
  170                         goto different;
  171                 /*
  172                  * need to go through all the bit, as netmask is not
  173                  * optimal and can contain trailing 0s
  174                  */
  175                 eq = (char *)netmask + netmask->sa_len;
  176                 q += l;
  177                 same = 1;
  178                 while (eq > q)
  179                         if (*q++) {
  180                                 same = 0;
  181                                 break;
  182                         }
  183                 if (!same)
  184                         goto different;
  185         } else if (!rt_mask(rt1) && !netmask)
  186                 ; /* no mask to compare, i.e. same */
  187         else {
  188                 /* one has mask and the other does not, different */
  189                 goto different;
  190         }
  191 
  192  maskmatched:
  193         if (!mpathok)
  194                 return EEXIST;
  195 
  196         /* key/mask were the same.  compare gateway for all multipaths */
  197         do {
  198                 rt1 = (struct rtentry *)rn1;
  199 
  200                 /* sanity: no use in comparing the same thing */
  201                 if (rn1 == rn)
  202                         continue;
  203 
  204                 if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
  205                     bcmp(rt1->rt_gateway, rt->rt_gateway,
  206                     rt1->rt_gateway->sa_len))
  207                         continue;
  208 
  209                 /* all key/mask/gateway are the same.  conflicting entry. */
  210                 return EEXIST;
  211         } while ((rn1 = rn_mpath_next(rn1)) != NULL);
  212 
  213  different:
  214         return 0;
  215 }
  216 
  217 /*
  218  * allocate a route, potentially using multipath to select the peer.
  219  */
  220 void
  221 rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp, u_int tableid)
  222 {
  223 #if defined(INET) || defined(INET6)
  224         struct radix_node *rn;
  225         int hash, npaths, threshold;
  226 #endif
  227 
  228         /*
  229          * return a cached entry if it is still valid, otherwise we increase
  230          * the risk of disrupting local flows.
  231          */
  232         if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
  233                 return;
  234         ro->ro_rt = rtalloc1(&ro->ro_dst, 1, tableid);
  235 
  236         /* if the route does not exist or it is not multipath, don't care */
  237         if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH))
  238                 return;
  239 
  240         /* check if multipath routing is enabled for the specified protocol */
  241         if (!(0
  242 #ifdef INET
  243             || (ipmultipath && ro->ro_dst.sa_family == AF_INET)
  244 #endif
  245 #ifdef INET6
  246             || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6)
  247 #endif
  248             ))
  249                 return;
  250 
  251 #if defined(INET) || defined(INET6)
  252         /* gw selection by Hash-Threshold (RFC 2992) */
  253         rn = (struct radix_node *)ro->ro_rt;
  254         npaths = rn_mpath_count(rn);
  255         hash = rn_mpath_hash(ro, srcaddrp) & 0xffff;
  256         threshold = 1 + (0xffff / npaths);
  257         while (hash > threshold && rn) {
  258                 /* stay within the multipath routes */
  259                 if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
  260                         break;
  261                 rn = rn->rn_dupedkey;
  262                 hash -= threshold;
  263         }
  264 
  265         /* XXX try filling rt_gwroute and avoid unreachable gw  */
  266 
  267         /* if gw selection fails, use the first match (default) */
  268         if (!rn)
  269                 return;
  270 
  271         rtfree(ro->ro_rt);
  272         ro->ro_rt = (struct rtentry *)rn;
  273         ro->ro_rt->rt_refcnt++;
  274 #endif
  275 }
  276 
  277 int
  278 rn_mpath_inithead(void **head, int off)
  279 {
  280         struct radix_node_head *rnh;
  281 
  282         while (hashjitter == 0)
  283                 hashjitter = arc4random();
  284         if (rn_inithead(head, off) == 1) {
  285                 rnh = (struct radix_node_head *)*head;
  286                 rnh->rnh_multipath = 1;
  287                 return 1;
  288         } else
  289                 return 0;
  290 }
  291 
  292 /*
  293  * hash function based on pf_hash in pf.c
  294  */
  295 #define mix(a,b,c) \
  296         do {                                    \
  297                 a -= b; a -= c; a ^= (c >> 13); \
  298                 b -= c; b -= a; b ^= (a << 8);  \
  299                 c -= a; c -= b; c ^= (b >> 13); \
  300                 a -= b; a -= c; a ^= (c >> 12); \
  301                 b -= c; b -= a; b ^= (a << 16); \
  302                 c -= a; c -= b; c ^= (b >> 5);  \
  303                 a -= b; a -= c; a ^= (c >> 3);  \
  304                 b -= c; b -= a; b ^= (a << 10); \
  305                 c -= a; c -= b; c ^= (b >> 15); \
  306         } while (0)
  307 
  308 u_int32_t
  309 rn_mpath_hash(struct route *ro, u_int32_t *srcaddrp)
  310 {
  311         u_int32_t a, b, c;
  312 
  313         a = b = 0x9e3779b9;
  314         c = hashjitter;
  315 
  316         switch (ro->ro_dst.sa_family) {
  317 #ifdef INET
  318         case AF_INET:
  319             {
  320                 struct sockaddr_in *sin_dst;
  321 
  322                 sin_dst = (struct sockaddr_in *)&ro->ro_dst;
  323                 a += sin_dst->sin_addr.s_addr;
  324                 b += srcaddrp ? srcaddrp[0] : 0;
  325                 mix(a, b, c);
  326                 break;
  327             }
  328 #endif /* INET */
  329 #ifdef INET6
  330         case AF_INET6:
  331             {
  332                 struct sockaddr_in6 *sin6_dst;
  333 
  334                 sin6_dst = (struct sockaddr_in6 *)&ro->ro_dst;
  335                 a += sin6_dst->sin6_addr.s6_addr32[0];
  336                 b += sin6_dst->sin6_addr.s6_addr32[2];
  337                 c += srcaddrp ? srcaddrp[0] : 0;
  338                 mix(a, b, c);
  339                 a += sin6_dst->sin6_addr.s6_addr32[1];
  340                 b += sin6_dst->sin6_addr.s6_addr32[3];
  341                 c += srcaddrp ? srcaddrp[1] : 0;
  342                 mix(a, b, c);
  343                 a += sin6_dst->sin6_addr.s6_addr32[2];
  344                 b += sin6_dst->sin6_addr.s6_addr32[1];
  345                 c += srcaddrp ? srcaddrp[2] : 0;
  346                 mix(a, b, c);
  347                 a += sin6_dst->sin6_addr.s6_addr32[3];
  348                 b += sin6_dst->sin6_addr.s6_addr32[0];
  349                 c += srcaddrp ? srcaddrp[3] : 0;
  350                 mix(a, b, c);
  351                 break;
  352             }
  353 #endif /* INET6 */
  354         }
  355 
  356         return c;
  357 }

/* [<][>][^][v][top][bottom][index][help] */