1 /* $OpenBSD: radix_mpath.c,v 1.7 2006/06/18 12:03:19 pascoe Exp $ */
2 /* $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $ */
3
4 /*
5 * Copyright (C) 2001 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
32 * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
33 * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
34 * PROPERTIES.
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/malloc.h>
40 #include <sys/socket.h>
41 #define M_DONTWAIT M_NOWAIT
42 #include <sys/domain.h>
43 #include <sys/syslog.h>
44 #include <net/radix.h>
45 #include <net/radix_mpath.h>
46 #include <net/route.h>
47 #include <dev/rndvar.h>
48
49 #include <netinet/in.h>
50
51 extern int ipmultipath;
52 extern int ip6_multipath;
53
54 u_int32_t rn_mpath_hash(struct route *, u_int32_t *);
55
56 /*
57 * give some jitter to hash, to avoid synchronization between routers
58 */
59 static u_int32_t hashjitter;
60
61 int
62 rn_mpath_capable(struct radix_node_head *rnh)
63 {
64 return rnh->rnh_multipath;
65 }
66
67 struct radix_node *
68 rn_mpath_next(struct radix_node *rn)
69 {
70 struct radix_node *next;
71
72 if (!rn->rn_dupedkey)
73 return NULL;
74 next = rn->rn_dupedkey;
75 if (rn->rn_mask == next->rn_mask)
76 return next;
77 else
78 return NULL;
79 }
80
81 int
82 rn_mpath_count(struct radix_node *rn)
83 {
84 int i;
85
86 i = 1;
87 while ((rn = rn_mpath_next(rn)) != NULL)
88 i++;
89 return i;
90 }
91
92 struct rtentry *
93 rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
94 {
95 struct radix_node *rn;
96
97 if (!rn_mpath_next((struct radix_node *)rt))
98 return rt;
99
100 if (!gate)
101 return NULL;
102 /* beyond here, we use rn as the master copy */
103 rn = (struct radix_node *)rt;
104 do {
105 rt = (struct rtentry *)rn;
106 if (rt->rt_gateway->sa_len == gate->sa_len &&
107 !memcmp(rt->rt_gateway, gate, gate->sa_len))
108 break;
109 } while ((rn = rn_mpath_next(rn)) != NULL);
110 if (!rn)
111 return NULL;
112
113 return (struct rtentry *)rn;
114 }
115
116 /*
117 * check if we have the same key/mask/gateway on the table already.
118 */
119 int
120 rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
121 struct sockaddr *netmask, int mpathok)
122 {
123 struct radix_node *rn, *rn1;
124 struct rtentry *rt1;
125 char *p, *q, *eq;
126 int same, l, skip;
127
128 rn = (struct radix_node *)rt;
129 rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
130 if (!rn1 || rn1->rn_flags & RNF_ROOT)
131 return 0;
132
133 /*
134 * unlike other functions we have in this file, we have to check
135 * all key/mask/gateway as rnh_lookup can match less specific entry.
136 */
137 rt1 = (struct rtentry *)rn1;
138
139 /* compare key. */
140 if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
141 bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
142 goto different;
143
144 /* key was the same. compare netmask. hairy... */
145 if (rt_mask(rt1) && netmask) {
146 skip = rnh->rnh_treetop->rn_off;
147 if (rt_mask(rt1)->sa_len > netmask->sa_len) {
148 /*
149 * as rt_mask(rt1) is made optimal by radix.c,
150 * there must be some 1-bits on rt_mask(rt1)
151 * after netmask->sa_len. therefore, in
152 * this case, the entries are different.
153 */
154 if (rt_mask(rt1)->sa_len > skip)
155 goto different;
156 else {
157 /* no bits to compare, i.e. same*/
158 goto maskmatched;
159 }
160 }
161
162 l = rt_mask(rt1)->sa_len;
163 if (skip > l) {
164 /* no bits to compare, i.e. same */
165 goto maskmatched;
166 }
167 p = (char *)rt_mask(rt1);
168 q = (char *)netmask;
169 if (bcmp(p + skip, q + skip, l - skip))
170 goto different;
171 /*
172 * need to go through all the bit, as netmask is not
173 * optimal and can contain trailing 0s
174 */
175 eq = (char *)netmask + netmask->sa_len;
176 q += l;
177 same = 1;
178 while (eq > q)
179 if (*q++) {
180 same = 0;
181 break;
182 }
183 if (!same)
184 goto different;
185 } else if (!rt_mask(rt1) && !netmask)
186 ; /* no mask to compare, i.e. same */
187 else {
188 /* one has mask and the other does not, different */
189 goto different;
190 }
191
192 maskmatched:
193 if (!mpathok)
194 return EEXIST;
195
196 /* key/mask were the same. compare gateway for all multipaths */
197 do {
198 rt1 = (struct rtentry *)rn1;
199
200 /* sanity: no use in comparing the same thing */
201 if (rn1 == rn)
202 continue;
203
204 if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
205 bcmp(rt1->rt_gateway, rt->rt_gateway,
206 rt1->rt_gateway->sa_len))
207 continue;
208
209 /* all key/mask/gateway are the same. conflicting entry. */
210 return EEXIST;
211 } while ((rn1 = rn_mpath_next(rn1)) != NULL);
212
213 different:
214 return 0;
215 }
216
217 /*
218 * allocate a route, potentially using multipath to select the peer.
219 */
220 void
221 rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp, u_int tableid)
222 {
223 #if defined(INET) || defined(INET6)
224 struct radix_node *rn;
225 int hash, npaths, threshold;
226 #endif
227
228 /*
229 * return a cached entry if it is still valid, otherwise we increase
230 * the risk of disrupting local flows.
231 */
232 if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
233 return;
234 ro->ro_rt = rtalloc1(&ro->ro_dst, 1, tableid);
235
236 /* if the route does not exist or it is not multipath, don't care */
237 if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH))
238 return;
239
240 /* check if multipath routing is enabled for the specified protocol */
241 if (!(0
242 #ifdef INET
243 || (ipmultipath && ro->ro_dst.sa_family == AF_INET)
244 #endif
245 #ifdef INET6
246 || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6)
247 #endif
248 ))
249 return;
250
251 #if defined(INET) || defined(INET6)
252 /* gw selection by Hash-Threshold (RFC 2992) */
253 rn = (struct radix_node *)ro->ro_rt;
254 npaths = rn_mpath_count(rn);
255 hash = rn_mpath_hash(ro, srcaddrp) & 0xffff;
256 threshold = 1 + (0xffff / npaths);
257 while (hash > threshold && rn) {
258 /* stay within the multipath routes */
259 if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
260 break;
261 rn = rn->rn_dupedkey;
262 hash -= threshold;
263 }
264
265 /* XXX try filling rt_gwroute and avoid unreachable gw */
266
267 /* if gw selection fails, use the first match (default) */
268 if (!rn)
269 return;
270
271 rtfree(ro->ro_rt);
272 ro->ro_rt = (struct rtentry *)rn;
273 ro->ro_rt->rt_refcnt++;
274 #endif
275 }
276
277 int
278 rn_mpath_inithead(void **head, int off)
279 {
280 struct radix_node_head *rnh;
281
282 while (hashjitter == 0)
283 hashjitter = arc4random();
284 if (rn_inithead(head, off) == 1) {
285 rnh = (struct radix_node_head *)*head;
286 rnh->rnh_multipath = 1;
287 return 1;
288 } else
289 return 0;
290 }
291
292 /*
293 * hash function based on pf_hash in pf.c
294 */
295 #define mix(a,b,c) \
296 do { \
297 a -= b; a -= c; a ^= (c >> 13); \
298 b -= c; b -= a; b ^= (a << 8); \
299 c -= a; c -= b; c ^= (b >> 13); \
300 a -= b; a -= c; a ^= (c >> 12); \
301 b -= c; b -= a; b ^= (a << 16); \
302 c -= a; c -= b; c ^= (b >> 5); \
303 a -= b; a -= c; a ^= (c >> 3); \
304 b -= c; b -= a; b ^= (a << 10); \
305 c -= a; c -= b; c ^= (b >> 15); \
306 } while (0)
307
308 u_int32_t
309 rn_mpath_hash(struct route *ro, u_int32_t *srcaddrp)
310 {
311 u_int32_t a, b, c;
312
313 a = b = 0x9e3779b9;
314 c = hashjitter;
315
316 switch (ro->ro_dst.sa_family) {
317 #ifdef INET
318 case AF_INET:
319 {
320 struct sockaddr_in *sin_dst;
321
322 sin_dst = (struct sockaddr_in *)&ro->ro_dst;
323 a += sin_dst->sin_addr.s_addr;
324 b += srcaddrp ? srcaddrp[0] : 0;
325 mix(a, b, c);
326 break;
327 }
328 #endif /* INET */
329 #ifdef INET6
330 case AF_INET6:
331 {
332 struct sockaddr_in6 *sin6_dst;
333
334 sin6_dst = (struct sockaddr_in6 *)&ro->ro_dst;
335 a += sin6_dst->sin6_addr.s6_addr32[0];
336 b += sin6_dst->sin6_addr.s6_addr32[2];
337 c += srcaddrp ? srcaddrp[0] : 0;
338 mix(a, b, c);
339 a += sin6_dst->sin6_addr.s6_addr32[1];
340 b += sin6_dst->sin6_addr.s6_addr32[3];
341 c += srcaddrp ? srcaddrp[1] : 0;
342 mix(a, b, c);
343 a += sin6_dst->sin6_addr.s6_addr32[2];
344 b += sin6_dst->sin6_addr.s6_addr32[1];
345 c += srcaddrp ? srcaddrp[2] : 0;
346 mix(a, b, c);
347 a += sin6_dst->sin6_addr.s6_addr32[3];
348 b += sin6_dst->sin6_addr.s6_addr32[0];
349 c += srcaddrp ? srcaddrp[3] : 0;
350 mix(a, b, c);
351 break;
352 }
353 #endif /* INET6 */
354 }
355
356 return c;
357 }