1 /* $OpenBSD: ip6_output.c,v 1.99 2007/06/01 00:52:38 henning Exp $ */
2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
62 */
63
64 #include "pf.h"
65
66 #include <sys/param.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/errno.h>
70 #include <sys/protosw.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75
76 #include <net/if.h>
77 #include <net/route.h>
78
79 #include <netinet/in.h>
80 #include <netinet/in_var.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/ip.h>
83 #include <netinet/in_pcb.h>
84
85 #include <netinet/ip6.h>
86 #include <netinet/icmp6.h>
87 #include <netinet6/ip6_var.h>
88 #include <netinet6/nd6.h>
89 #include <netinet6/ip6protosw.h>
90
91 #if NPF > 0
92 #include <net/pfvar.h>
93 #endif
94
95 #ifdef IPSEC
96 #include <netinet/ip_ipsp.h>
97 #include <netinet/ip_ah.h>
98 #include <netinet/ip_esp.h>
99 #include <netinet/udp.h>
100 #include <netinet/tcp.h>
101 #include <net/pfkeyv2.h>
102
103 extern u_int8_t get_sa_require(struct inpcb *);
104
105 extern int ipsec_auth_default_level;
106 extern int ipsec_esp_trans_default_level;
107 extern int ipsec_esp_network_default_level;
108 extern int ipsec_ipcomp_default_level;
109 #endif /* IPSEC */
110
111 struct ip6_exthdrs {
112 struct mbuf *ip6e_ip6;
113 struct mbuf *ip6e_hbh;
114 struct mbuf *ip6e_dest1;
115 struct mbuf *ip6e_rthdr;
116 struct mbuf *ip6e_dest2;
117 };
118
119 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int, int);
120 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *);
121 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct mbuf **);
122 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int, int,
123 int, int);
124 static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *);
125 static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
126 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
127 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
128 struct ip6_frag **);
129 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
130 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
131 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
132 struct ifnet *, struct in6_addr *, u_long *, int *);
133 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
134
135 /*
136 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
137 * header (with pri, len, nxt, hlim, src, dst).
138 * This function may modify ver and hlim only.
139 * The mbuf chain containing the packet will be freed.
140 * The mbuf opt, if present, will not be freed.
141 *
142 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
143 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
144 * which is rt_rmx.rmx_mtu.
145 */
146 int
147 ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
148 struct mbuf *m0;
149 struct ip6_pktopts *opt;
150 struct route_in6 *ro;
151 int flags;
152 struct ip6_moptions *im6o;
153 struct ifnet **ifpp; /* XXX: just for statistics */
154 struct inpcb *inp;
155 {
156 struct ip6_hdr *ip6, *mhip6;
157 struct ifnet *ifp, *origifp = NULL;
158 struct mbuf *m = m0;
159 int hlen, tlen, len, off;
160 struct route_in6 ip6route;
161 struct rtentry *rt = NULL;
162 struct sockaddr_in6 *dst, dstsock;
163 int error = 0;
164 struct in6_ifaddr *ia = NULL;
165 u_long mtu;
166 int alwaysfrag, dontfrag;
167 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
168 struct ip6_exthdrs exthdrs;
169 struct in6_addr finaldst;
170 struct route_in6 *ro_pmtu = NULL;
171 int hdrsplit = 0;
172 u_int8_t sproto = 0;
173 #ifdef IPSEC
174 struct m_tag *mtag;
175 union sockaddr_union sdst;
176 struct tdb_ident *tdbi;
177 u_int32_t sspi;
178 struct tdb *tdb;
179 int s;
180 #endif /* IPSEC */
181
182 #ifdef IPSEC
183 if (inp && (inp->inp_flags & INP_IPV6) == 0)
184 panic("ip6_output: IPv4 pcb is passed");
185 #endif /* IPSEC */
186
187 ip6 = mtod(m, struct ip6_hdr *);
188 finaldst = ip6->ip6_dst;
189
190 #define MAKE_EXTHDR(hp, mp) \
191 do { \
192 if (hp) { \
193 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
194 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
195 ((eh)->ip6e_len + 1) << 3); \
196 if (error) \
197 goto freehdrs; \
198 } \
199 } while (0)
200
201 bzero(&exthdrs, sizeof(exthdrs));
202
203 if (opt) {
204 /* Hop-by-Hop options header */
205 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
206 /* Destination options header(1st part) */
207 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
208 /* Routing header */
209 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
210 /* Destination options header(2nd part) */
211 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
212 }
213
214 #ifdef IPSEC
215 if (!ipsec_in_use && !inp)
216 goto done_spd;
217
218 /*
219 * splnet is chosen over spltdb because we are not allowed to
220 * lower the level, and udp6_output calls us in splnet(). XXX check
221 */
222 s = splnet();
223
224 /*
225 * Check if there was an outgoing SA bound to the flow
226 * from a transport protocol.
227 */
228 ip6 = mtod(m, struct ip6_hdr *);
229
230 /* Do we have any pending SAs to apply ? */
231 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
232 if (mtag != NULL) {
233 #ifdef DIAGNOSTIC
234 if (mtag->m_tag_len != sizeof (struct tdb_ident))
235 panic("ip6_output: tag of length %d (should be %d",
236 mtag->m_tag_len, sizeof (struct tdb_ident));
237 #endif
238 tdbi = (struct tdb_ident *)(mtag + 1);
239 tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto);
240 if (tdb == NULL)
241 error = -EINVAL;
242 m_tag_delete(m, mtag);
243 } else
244 tdb = ipsp_spd_lookup(m, AF_INET6, sizeof(struct ip6_hdr),
245 &error, IPSP_DIRECTION_OUT, NULL, inp);
246
247 if (tdb == NULL) {
248 splx(s);
249
250 if (error == 0) {
251 /*
252 * No IPsec processing required, we'll just send the
253 * packet out.
254 */
255 sproto = 0;
256
257 /* Fall through to routing/multicast handling */
258 } else {
259 /*
260 * -EINVAL is used to indicate that the packet should
261 * be silently dropped, typically because we've asked
262 * key management for an SA.
263 */
264 if (error == -EINVAL) /* Should silently drop packet */
265 error = 0;
266
267 goto freehdrs;
268 }
269 } else {
270 /* Loop detection */
271 for (mtag = m_tag_first(m); mtag != NULL;
272 mtag = m_tag_next(m, mtag)) {
273 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
274 mtag->m_tag_id !=
275 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
276 continue;
277 tdbi = (struct tdb_ident *)(mtag + 1);
278 if (tdbi->spi == tdb->tdb_spi &&
279 tdbi->proto == tdb->tdb_sproto &&
280 !bcmp(&tdbi->dst, &tdb->tdb_dst,
281 sizeof(union sockaddr_union))) {
282 splx(s);
283 sproto = 0; /* mark as no-IPsec-needed */
284 goto done_spd;
285 }
286 }
287
288 /* We need to do IPsec */
289 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst));
290 sspi = tdb->tdb_spi;
291 sproto = tdb->tdb_sproto;
292 splx(s);
293 }
294
295 /* Fall through to the routing/multicast handling code */
296 done_spd:
297 #endif /* IPSEC */
298
299 /*
300 * Calculate the total length of the extension header chain.
301 * Keep the length of the unfragmentable part for fragmentation.
302 */
303 optlen = 0;
304 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
305 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
306 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
307 unfragpartlen = optlen + sizeof(struct ip6_hdr);
308 /* NOTE: we don't add AH/ESP length here. do that later. */
309 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
310
311 /*
312 * If we need IPsec, or there is at least one extension header,
313 * separate IP6 header from the payload.
314 */
315 if ((sproto || optlen) && !hdrsplit) {
316 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
317 m = NULL;
318 goto freehdrs;
319 }
320 m = exthdrs.ip6e_ip6;
321 hdrsplit++;
322 }
323
324 /* adjust pointer */
325 ip6 = mtod(m, struct ip6_hdr *);
326
327 /* adjust mbuf packet header length */
328 m->m_pkthdr.len += optlen;
329 plen = m->m_pkthdr.len - sizeof(*ip6);
330
331 /* If this is a jumbo payload, insert a jumbo payload option. */
332 if (plen > IPV6_MAXPACKET) {
333 if (!hdrsplit) {
334 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
335 m = NULL;
336 goto freehdrs;
337 }
338 m = exthdrs.ip6e_ip6;
339 hdrsplit++;
340 }
341 /* adjust pointer */
342 ip6 = mtod(m, struct ip6_hdr *);
343 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
344 goto freehdrs;
345 ip6->ip6_plen = 0;
346 } else
347 ip6->ip6_plen = htons(plen);
348
349 /*
350 * Concatenate headers and fill in next header fields.
351 * Here we have, on "m"
352 * IPv6 payload
353 * and we insert headers accordingly. Finally, we should be getting:
354 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
355 *
356 * during the header composing process, "m" points to IPv6 header.
357 * "mprev" points to an extension header prior to esp.
358 */
359 {
360 u_char *nexthdrp = &ip6->ip6_nxt;
361 struct mbuf *mprev = m;
362
363 /*
364 * we treat dest2 specially. this makes IPsec processing
365 * much easier. the goal here is to make mprev point the
366 * mbuf prior to dest2.
367 *
368 * result: IPv6 dest2 payload
369 * m and mprev will point to IPv6 header.
370 */
371 if (exthdrs.ip6e_dest2) {
372 if (!hdrsplit)
373 panic("assumption failed: hdr not split");
374 exthdrs.ip6e_dest2->m_next = m->m_next;
375 m->m_next = exthdrs.ip6e_dest2;
376 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
377 ip6->ip6_nxt = IPPROTO_DSTOPTS;
378 }
379
380 #define MAKE_CHAIN(m, mp, p, i)\
381 do {\
382 if (m) {\
383 if (!hdrsplit) \
384 panic("assumption failed: hdr not split"); \
385 *mtod((m), u_char *) = *(p);\
386 *(p) = (i);\
387 p = mtod((m), u_char *);\
388 (m)->m_next = (mp)->m_next;\
389 (mp)->m_next = (m);\
390 (mp) = (m);\
391 }\
392 } while (0)
393 /*
394 * result: IPv6 hbh dest1 rthdr dest2 payload
395 * m will point to IPv6 header. mprev will point to the
396 * extension header prior to dest2 (rthdr in the above case).
397 */
398 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
399 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
400 IPPROTO_DSTOPTS);
401 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
402 IPPROTO_ROUTING);
403 }
404
405 /*
406 * If there is a routing header, replace the destination address field
407 * with the first hop of the routing header.
408 */
409 if (exthdrs.ip6e_rthdr) {
410 struct ip6_rthdr *rh;
411 struct ip6_rthdr0 *rh0;
412 struct in6_addr *addr;
413
414 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
415 struct ip6_rthdr *));
416 switch (rh->ip6r_type) {
417 case IPV6_RTHDR_TYPE_0:
418 rh0 = (struct ip6_rthdr0 *)rh;
419 addr = (struct in6_addr *)(rh0 + 1);
420 ip6->ip6_dst = addr[0];
421 bcopy(&addr[1], &addr[0],
422 sizeof(struct in6_addr) * (rh0->ip6r0_segleft - 1));
423 addr[rh0->ip6r0_segleft - 1] = finaldst;
424 break;
425 default: /* is it possible? */
426 error = EINVAL;
427 goto bad;
428 }
429 }
430
431 /* Source address validation */
432 if (!(flags & IPV6_UNSPECSRC) &&
433 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
434 /*
435 * XXX: we can probably assume validation in the caller, but
436 * we explicitly check the address here for safety.
437 */
438 error = EOPNOTSUPP;
439 ip6stat.ip6s_badscope++;
440 goto bad;
441 }
442 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
443 error = EOPNOTSUPP;
444 ip6stat.ip6s_badscope++;
445 goto bad;
446 }
447
448 ip6stat.ip6s_localout++;
449
450 /*
451 * Route packet.
452 */
453 /* initialize cached route */
454 if (ro == 0) {
455 ro = &ip6route;
456 bzero((caddr_t)ro, sizeof(*ro));
457 }
458 ro_pmtu = ro;
459 if (opt && opt->ip6po_rthdr)
460 ro = &opt->ip6po_route;
461 dst = (struct sockaddr_in6 *)&ro->ro_dst;
462
463 /*
464 * if specified, try to fill in the traffic class field.
465 * do not override if a non-zero value is already set.
466 * we check the diffserv field and the ecn field separately.
467 */
468 if (opt && opt->ip6po_tclass >= 0) {
469 int mask = 0;
470
471 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
472 mask |= 0xfc;
473 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
474 mask |= 0x03;
475 if (mask != 0)
476 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
477 }
478
479 /* fill in or override the hop limit field, if necessary. */
480 if (opt && opt->ip6po_hlim != -1)
481 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
482 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
483 if (im6o != NULL)
484 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
485 else
486 ip6->ip6_hlim = ip6_defmcasthlim;
487 }
488
489 #ifdef IPSEC
490 /*
491 * Check if the packet needs encapsulation.
492 * ipsp_process_packet will never come back to here.
493 */
494 if (sproto != 0) {
495 s = splnet();
496
497 /*
498 * XXX what should we do if ip6_hlim == 0 and the
499 * packet gets tunneled?
500 */
501
502 tdb = gettdb(sspi, &sdst, sproto);
503 if (tdb == NULL) {
504 splx(s);
505 error = EHOSTUNREACH;
506 m_freem(m);
507 goto done;
508 }
509
510 /* Latch to PCB */
511 if (inp)
512 tdb_add_inp(tdb, inp, 0);
513
514 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
515
516 /* Callee frees mbuf */
517 /*
518 * if we are source-routing, do not attempt to tunnel the
519 * packet just because ip6_dst is different from what tdb has.
520 * XXX
521 */
522 error = ipsp_process_packet(m, tdb, AF_INET6,
523 exthdrs.ip6e_rthdr ? 1 : 0);
524 splx(s);
525
526 return error; /* Nothing more to be done */
527 }
528 #endif /* IPSEC */
529
530 bzero(&dstsock, sizeof(dstsock));
531 dstsock.sin6_family = AF_INET6;
532 dstsock.sin6_addr = ip6->ip6_dst;
533 dstsock.sin6_len = sizeof(dstsock);
534 if ((error = in6_selectroute(&dstsock, opt, im6o, ro, &ifp,
535 &rt)) != 0) {
536 switch (error) {
537 case EHOSTUNREACH:
538 ip6stat.ip6s_noroute++;
539 break;
540 case EADDRNOTAVAIL:
541 default:
542 break; /* XXX statistics? */
543 }
544 if (ifp != NULL)
545 in6_ifstat_inc(ifp, ifs6_out_discard);
546 goto bad;
547 }
548 if (rt == NULL) {
549 /*
550 * If in6_selectroute() does not return a route entry,
551 * dst may not have been updated.
552 */
553 *dst = dstsock; /* XXX */
554 }
555
556 /*
557 * then rt (for unicast) and ifp must be non-NULL valid values.
558 */
559 if (rt) {
560 ia = (struct in6_ifaddr *)(rt->rt_ifa);
561 rt->rt_use++;
562 }
563
564 if ((flags & IPV6_FORWARDING) == 0) {
565 /* XXX: the FORWARDING flag can be set for mrouting. */
566 in6_ifstat_inc(ifp, ifs6_out_request);
567 }
568
569 /*
570 * The outgoing interface must be in the zone of source and
571 * destination addresses. We should use ia_ifp to support the
572 * case of sending packets to an address of our own.
573 */
574 if (ia != NULL && ia->ia_ifp)
575 origifp = ia->ia_ifp;
576 else
577 origifp = ifp;
578
579 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
580 if (opt && opt->ip6po_nextroute.ro_rt) {
581 /*
582 * The nexthop is explicitly specified by the
583 * application. We assume the next hop is an IPv6
584 * address.
585 */
586 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
587 } else if ((rt->rt_flags & RTF_GATEWAY))
588 dst = (struct sockaddr_in6 *)rt->rt_gateway;
589 }
590
591 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
592 /* Unicast */
593
594 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
595 } else {
596 /* Multicast */
597 struct in6_multi *in6m;
598
599 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
600
601 in6_ifstat_inc(ifp, ifs6_out_mcast);
602
603 /*
604 * Confirm that the outgoing interface supports multicast.
605 */
606 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
607 ip6stat.ip6s_noroute++;
608 in6_ifstat_inc(ifp, ifs6_out_discard);
609 error = ENETUNREACH;
610 goto bad;
611 }
612 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
613 if (in6m != NULL &&
614 (im6o == NULL || im6o->im6o_multicast_loop)) {
615 /*
616 * If we belong to the destination multicast group
617 * on the outgoing interface, and the caller did not
618 * forbid loopback, loop back a copy.
619 */
620 ip6_mloopback(ifp, m, dst);
621 } else {
622 /*
623 * If we are acting as a multicast router, perform
624 * multicast forwarding as if the packet had just
625 * arrived on the interface to which we are about
626 * to send. The multicast forwarding function
627 * recursively calls this function, using the
628 * IPV6_FORWARDING flag to prevent infinite recursion.
629 *
630 * Multicasts that are looped back by ip6_mloopback(),
631 * above, will be forwarded by the ip6_input() routine,
632 * if necessary.
633 */
634 #ifdef MROUTING
635 if (ip6_mforwarding && ip6_mrouter &&
636 (flags & IPV6_FORWARDING) == 0) {
637 if (ip6_mforward(ip6, ifp, m) != 0) {
638 m_freem(m);
639 goto done;
640 }
641 }
642 #endif
643 }
644 /*
645 * Multicasts with a hoplimit of zero may be looped back,
646 * above, but must not be transmitted on a network.
647 * Also, multicasts addressed to the loopback interface
648 * are not sent -- the above call to ip6_mloopback() will
649 * loop back a copy if this host actually belongs to the
650 * destination group on the loopback interface.
651 */
652 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
653 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
654 m_freem(m);
655 goto done;
656 }
657 }
658
659 /*
660 * Fill the outgoing interface to tell the upper layer
661 * to increment per-interface statistics.
662 */
663 if (ifpp)
664 *ifpp = ifp;
665
666 /* Determine path MTU. */
667 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
668 &alwaysfrag)) != 0)
669 goto bad;
670
671 /*
672 * The caller of this function may specify to use the minimum MTU
673 * in some cases.
674 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
675 * setting. The logic is a bit complicated; by default, unicast
676 * packets will follow path MTU while multicast packets will be sent at
677 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
678 * including unicast ones will be sent at the minimum MTU. Multicast
679 * packets will always be sent at the minimum MTU unless
680 * IP6PO_MINMTU_DISABLE is explicitly specified.
681 * See RFC 3542 for more details.
682 */
683 if (mtu > IPV6_MMTU) {
684 if ((flags & IPV6_MINMTU))
685 mtu = IPV6_MMTU;
686 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
687 mtu = IPV6_MMTU;
688 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
689 (opt == NULL ||
690 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
691 mtu = IPV6_MMTU;
692 }
693 }
694
695 /* Fake scoped addresses */
696 if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
697 /*
698 * If source or destination address is a scoped address, and
699 * the packet is going to be sent to a loopback interface,
700 * we should keep the original interface.
701 */
702
703 /*
704 * XXX: this is a very experimental and temporary solution.
705 * We eventually have sockaddr_in6 and use the sin6_scope_id
706 * field of the structure here.
707 * We rely on the consistency between two scope zone ids
708 * of source add destination, which should already be assured
709 * Larger scopes than link will be supported in the near
710 * future.
711 */
712 origifp = NULL;
713 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
714 origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])];
715 else if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
716 origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])];
717 /*
718 * XXX: origifp can be NULL even in those two cases above.
719 * For example, if we remove the (only) link-local address
720 * from the loopback interface, and try to send a link-local
721 * address without link-id information. Then the source
722 * address is ::1, and the destination address is the
723 * link-local address with its s6_addr16[1] being zero.
724 * What is worse, if the packet goes to the loopback interface
725 * by a default rejected route, the null pointer would be
726 * passed to looutput, and the kernel would hang.
727 * The following last resort would prevent such disaster.
728 */
729 if (origifp == NULL)
730 origifp = ifp;
731 } else
732 origifp = ifp;
733 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
734 ip6->ip6_src.s6_addr16[1] = 0;
735 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
736 ip6->ip6_dst.s6_addr16[1] = 0;
737
738 /*
739 * If the outgoing packet contains a hop-by-hop options header,
740 * it must be examined and processed even by the source node.
741 * (RFC 2460, section 4.)
742 */
743 if (exthdrs.ip6e_hbh) {
744 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
745 u_int32_t dummy1; /* XXX unused */
746 u_int32_t dummy2; /* XXX unused */
747
748 /*
749 * XXX: if we have to send an ICMPv6 error to the sender,
750 * we need the M_LOOP flag since icmp6_error() expects
751 * the IPv6 and the hop-by-hop options header are
752 * continuous unless the flag is set.
753 */
754 m->m_flags |= M_LOOP;
755 m->m_pkthdr.rcvif = ifp;
756 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
757 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
758 &dummy1, &dummy2) < 0) {
759 /* m was already freed at this point */
760 error = EINVAL;/* better error? */
761 goto done;
762 }
763 m->m_flags &= ~M_LOOP; /* XXX */
764 m->m_pkthdr.rcvif = NULL;
765 }
766
767 #if NPF > 0
768 if (pf_test6(PF_OUT, ifp, &m, NULL) != PF_PASS) {
769 error = EHOSTUNREACH;
770 m_freem(m);
771 goto done;
772 }
773 if (m == NULL)
774 goto done;
775 ip6 = mtod(m, struct ip6_hdr *);
776 #endif
777
778 /*
779 * Send the packet to the outgoing interface.
780 * If necessary, do IPv6 fragmentation before sending.
781 *
782 * the logic here is rather complex:
783 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
784 * 1-a: send as is if tlen <= path mtu
785 * 1-b: fragment if tlen > path mtu
786 *
787 * 2: if user asks us not to fragment (dontfrag == 1)
788 * 2-a: send as is if tlen <= interface mtu
789 * 2-b: error if tlen > interface mtu
790 *
791 * 3: if we always need to attach fragment header (alwaysfrag == 1)
792 * always fragment
793 *
794 * 4: if dontfrag == 1 && alwaysfrag == 1
795 * error, as we cannot handle this conflicting request
796 */
797 tlen = m->m_pkthdr.len;
798
799 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
800 dontfrag = 1;
801 else
802 dontfrag = 0;
803 if (dontfrag && alwaysfrag) { /* case 4 */
804 /* conflicting request - can't transmit */
805 error = EMSGSIZE;
806 goto bad;
807 }
808 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
809 /*
810 * Even if the DONTFRAG option is specified, we cannot send the
811 * packet when the data length is larger than the MTU of the
812 * outgoing interface.
813 * Notify the error by sending IPV6_PATHMTU ancillary data as
814 * well as returning an error code (the latter is not described
815 * in the API spec.)
816 */
817 #if 0
818 u_int32_t mtu32;
819 struct ip6ctlparam ip6cp;
820
821 mtu32 = (u_int32_t)mtu;
822 bzero(&ip6cp, sizeof(ip6cp));
823 ip6cp.ip6c_cmdarg = (void *)&mtu32;
824 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
825 (void *)&ip6cp);
826 #endif
827
828 error = EMSGSIZE;
829 goto bad;
830 }
831
832 /*
833 * transmit packet without fragmentation
834 */
835 if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
836 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
837 goto done;
838 }
839
840 /*
841 * try to fragment the packet. case 1-b and 3
842 */
843 if (mtu < IPV6_MMTU) {
844 /* path MTU cannot be less than IPV6_MMTU */
845 error = EMSGSIZE;
846 in6_ifstat_inc(ifp, ifs6_out_fragfail);
847 goto bad;
848 } else if (ip6->ip6_plen == 0) {
849 /* jumbo payload cannot be fragmented */
850 error = EMSGSIZE;
851 in6_ifstat_inc(ifp, ifs6_out_fragfail);
852 goto bad;
853 } else {
854 struct mbuf **mnext, *m_frgpart;
855 struct ip6_frag *ip6f;
856 u_int32_t id = htonl(ip6_randomid());
857 u_char nextproto;
858 #if 0
859 struct ip6ctlparam ip6cp;
860 u_int32_t mtu32;
861 #endif
862
863 /*
864 * Too large for the destination or interface;
865 * fragment if possible.
866 * Must be able to put at least 8 bytes per fragment.
867 */
868 hlen = unfragpartlen;
869 if (mtu > IPV6_MAXPACKET)
870 mtu = IPV6_MAXPACKET;
871
872 #if 0
873 /* Notify a proper path MTU to applications. */
874 mtu32 = (u_int32_t)mtu;
875 bzero(&ip6cp, sizeof(ip6cp));
876 ip6cp.ip6c_cmdarg = (void *)&mtu32;
877 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
878 (void *)&ip6cp);
879 #endif
880
881 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
882 if (len < 8) {
883 error = EMSGSIZE;
884 in6_ifstat_inc(ifp, ifs6_out_fragfail);
885 goto bad;
886 }
887
888 mnext = &m->m_nextpkt;
889
890 /*
891 * Change the next header field of the last header in the
892 * unfragmentable part.
893 */
894 if (exthdrs.ip6e_rthdr) {
895 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
896 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
897 } else if (exthdrs.ip6e_dest1) {
898 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
899 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
900 } else if (exthdrs.ip6e_hbh) {
901 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
902 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
903 } else {
904 nextproto = ip6->ip6_nxt;
905 ip6->ip6_nxt = IPPROTO_FRAGMENT;
906 }
907
908 /*
909 * Loop through length of segment after first fragment,
910 * make new header and copy data of each part and link onto
911 * chain.
912 */
913 m0 = m;
914 for (off = hlen; off < tlen; off += len) {
915 struct mbuf *mlast;
916
917 MGETHDR(m, M_DONTWAIT, MT_HEADER);
918 if (!m) {
919 error = ENOBUFS;
920 ip6stat.ip6s_odropped++;
921 goto sendorfree;
922 }
923 m->m_pkthdr.rcvif = NULL;
924 m->m_flags = m0->m_flags & M_COPYFLAGS;
925 *mnext = m;
926 mnext = &m->m_nextpkt;
927 m->m_data += max_linkhdr;
928 mhip6 = mtod(m, struct ip6_hdr *);
929 *mhip6 = *ip6;
930 m->m_len = sizeof(*mhip6);
931 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
932 if (error) {
933 ip6stat.ip6s_odropped++;
934 goto sendorfree;
935 }
936 ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
937 if (off + len >= tlen)
938 len = tlen - off;
939 else
940 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
941 mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
942 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
943 if ((m_frgpart = m_copy(m0, off, len)) == 0) {
944 error = ENOBUFS;
945 ip6stat.ip6s_odropped++;
946 goto sendorfree;
947 }
948 for (mlast = m; mlast->m_next; mlast = mlast->m_next)
949 ;
950 mlast->m_next = m_frgpart;
951 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
952 m->m_pkthdr.rcvif = (struct ifnet *)0;
953 ip6f->ip6f_reserved = 0;
954 ip6f->ip6f_ident = id;
955 ip6f->ip6f_nxt = nextproto;
956 ip6stat.ip6s_ofragments++;
957 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
958 }
959
960 in6_ifstat_inc(ifp, ifs6_out_fragok);
961 }
962
963 /*
964 * Remove leading garbages.
965 */
966 sendorfree:
967 m = m0->m_nextpkt;
968 m0->m_nextpkt = 0;
969 m_freem(m0);
970 for (m0 = m; m; m = m0) {
971 m0 = m->m_nextpkt;
972 m->m_nextpkt = 0;
973 if (error == 0) {
974 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
975 } else
976 m_freem(m);
977 }
978
979 if (error == 0)
980 ip6stat.ip6s_fragmented++;
981
982 done:
983 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
984 RTFREE(ro->ro_rt);
985 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
986 RTFREE(ro_pmtu->ro_rt);
987 }
988
989 return (error);
990
991 freehdrs:
992 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
993 m_freem(exthdrs.ip6e_dest1);
994 m_freem(exthdrs.ip6e_rthdr);
995 m_freem(exthdrs.ip6e_dest2);
996 /* FALLTHROUGH */
997 bad:
998 m_freem(m);
999 goto done;
1000 }
1001
1002 static int
1003 ip6_copyexthdr(mp, hdr, hlen)
1004 struct mbuf **mp;
1005 caddr_t hdr;
1006 int hlen;
1007 {
1008 struct mbuf *m;
1009
1010 if (hlen > MCLBYTES)
1011 return (ENOBUFS); /* XXX */
1012
1013 MGET(m, M_DONTWAIT, MT_DATA);
1014 if (!m)
1015 return (ENOBUFS);
1016
1017 if (hlen > MLEN) {
1018 MCLGET(m, M_DONTWAIT);
1019 if ((m->m_flags & M_EXT) == 0) {
1020 m_free(m);
1021 return (ENOBUFS);
1022 }
1023 }
1024 m->m_len = hlen;
1025 if (hdr)
1026 bcopy(hdr, mtod(m, caddr_t), hlen);
1027
1028 *mp = m;
1029 return (0);
1030 }
1031
1032 /*
1033 * Insert jumbo payload option.
1034 */
1035 static int
1036 ip6_insert_jumboopt(exthdrs, plen)
1037 struct ip6_exthdrs *exthdrs;
1038 u_int32_t plen;
1039 {
1040 struct mbuf *mopt;
1041 u_int8_t *optbuf;
1042 u_int32_t v;
1043
1044 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1045
1046 /*
1047 * If there is no hop-by-hop options header, allocate new one.
1048 * If there is one but it doesn't have enough space to store the
1049 * jumbo payload option, allocate a cluster to store the whole options.
1050 * Otherwise, use it to store the options.
1051 */
1052 if (exthdrs->ip6e_hbh == 0) {
1053 MGET(mopt, M_DONTWAIT, MT_DATA);
1054 if (mopt == 0)
1055 return (ENOBUFS);
1056 mopt->m_len = JUMBOOPTLEN;
1057 optbuf = mtod(mopt, u_int8_t *);
1058 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1059 exthdrs->ip6e_hbh = mopt;
1060 } else {
1061 struct ip6_hbh *hbh;
1062
1063 mopt = exthdrs->ip6e_hbh;
1064 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1065 /*
1066 * XXX assumption:
1067 * - exthdrs->ip6e_hbh is not referenced from places
1068 * other than exthdrs.
1069 * - exthdrs->ip6e_hbh is not an mbuf chain.
1070 */
1071 int oldoptlen = mopt->m_len;
1072 struct mbuf *n;
1073
1074 /*
1075 * XXX: give up if the whole (new) hbh header does
1076 * not fit even in an mbuf cluster.
1077 */
1078 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1079 return (ENOBUFS);
1080
1081 /*
1082 * As a consequence, we must always prepare a cluster
1083 * at this point.
1084 */
1085 MGET(n, M_DONTWAIT, MT_DATA);
1086 if (n) {
1087 MCLGET(n, M_DONTWAIT);
1088 if ((n->m_flags & M_EXT) == 0) {
1089 m_freem(n);
1090 n = NULL;
1091 }
1092 }
1093 if (!n)
1094 return (ENOBUFS);
1095 n->m_len = oldoptlen + JUMBOOPTLEN;
1096 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1097 oldoptlen);
1098 optbuf = mtod(n, u_int8_t *) + oldoptlen;
1099 m_freem(mopt);
1100 mopt = exthdrs->ip6e_hbh = n;
1101 } else {
1102 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1103 mopt->m_len += JUMBOOPTLEN;
1104 }
1105 optbuf[0] = IP6OPT_PADN;
1106 optbuf[1] = 0;
1107
1108 /*
1109 * Adjust the header length according to the pad and
1110 * the jumbo payload option.
1111 */
1112 hbh = mtod(mopt, struct ip6_hbh *);
1113 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1114 }
1115
1116 /* fill in the option. */
1117 optbuf[2] = IP6OPT_JUMBO;
1118 optbuf[3] = 4;
1119 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1120 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1121
1122 /* finally, adjust the packet header length */
1123 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1124
1125 return (0);
1126 #undef JUMBOOPTLEN
1127 }
1128
1129 /*
1130 * Insert fragment header and copy unfragmentable header portions.
1131 */
1132 static int
1133 ip6_insertfraghdr(m0, m, hlen, frghdrp)
1134 struct mbuf *m0, *m;
1135 int hlen;
1136 struct ip6_frag **frghdrp;
1137 {
1138 struct mbuf *n, *mlast;
1139
1140 if (hlen > sizeof(struct ip6_hdr)) {
1141 n = m_copym(m0, sizeof(struct ip6_hdr),
1142 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1143 if (n == 0)
1144 return (ENOBUFS);
1145 m->m_next = n;
1146 } else
1147 n = m;
1148
1149 /* Search for the last mbuf of unfragmentable part. */
1150 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1151 ;
1152
1153 if ((mlast->m_flags & M_EXT) == 0 &&
1154 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1155 /* use the trailing space of the last mbuf for the fragment hdr */
1156 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1157 mlast->m_len);
1158 mlast->m_len += sizeof(struct ip6_frag);
1159 m->m_pkthdr.len += sizeof(struct ip6_frag);
1160 } else {
1161 /* allocate a new mbuf for the fragment header */
1162 struct mbuf *mfrg;
1163
1164 MGET(mfrg, M_DONTWAIT, MT_DATA);
1165 if (mfrg == 0)
1166 return (ENOBUFS);
1167 mfrg->m_len = sizeof(struct ip6_frag);
1168 *frghdrp = mtod(mfrg, struct ip6_frag *);
1169 mlast->m_next = mfrg;
1170 }
1171
1172 return (0);
1173 }
1174
1175 static int
1176 ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1177 struct route_in6 *ro_pmtu, *ro;
1178 struct ifnet *ifp;
1179 struct in6_addr *dst;
1180 u_long *mtup;
1181 int *alwaysfragp;
1182 {
1183 u_int32_t mtu = 0;
1184 int alwaysfrag = 0;
1185 int error = 0;
1186
1187 if (ro_pmtu != ro) {
1188 /* The first hop and the final destination may differ. */
1189 struct sockaddr_in6 *sa6_dst =
1190 (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1191 if (ro_pmtu->ro_rt &&
1192 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1193 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1194 RTFREE(ro_pmtu->ro_rt);
1195 ro_pmtu->ro_rt = (struct rtentry *)NULL;
1196 }
1197 if (ro_pmtu->ro_rt == 0) {
1198 bzero(sa6_dst, sizeof(*sa6_dst));
1199 sa6_dst->sin6_family = AF_INET6;
1200 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1201 sa6_dst->sin6_addr = *dst;
1202
1203 rtalloc((struct route *)ro_pmtu);
1204 }
1205 }
1206 if (ro_pmtu->ro_rt) {
1207 u_int32_t ifmtu;
1208
1209 if (ifp == NULL)
1210 ifp = ro_pmtu->ro_rt->rt_ifp;
1211 ifmtu = IN6_LINKMTU(ifp);
1212 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1213 if (mtu == 0)
1214 mtu = ifmtu;
1215 else if (mtu < IPV6_MMTU) {
1216 /*
1217 * RFC2460 section 5, last paragraph:
1218 * if we record ICMPv6 too big message with
1219 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1220 * or smaller, with fragment header attached.
1221 * (fragment header is needed regardless from the
1222 * packet size, for translators to identify packets)
1223 */
1224 alwaysfrag = 1;
1225 mtu = IPV6_MMTU;
1226 } else if (mtu > ifmtu) {
1227 /*
1228 * The MTU on the route is larger than the MTU on
1229 * the interface! This shouldn't happen, unless the
1230 * MTU of the interface has been changed after the
1231 * interface was brought up. Change the MTU in the
1232 * route to match the interface MTU (as long as the
1233 * field isn't locked).
1234 */
1235 mtu = ifmtu;
1236 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
1237 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1238 }
1239 } else if (ifp) {
1240 mtu = IN6_LINKMTU(ifp);
1241 } else
1242 error = EHOSTUNREACH; /* XXX */
1243
1244 *mtup = mtu;
1245 if (alwaysfragp)
1246 *alwaysfragp = alwaysfrag;
1247 return (error);
1248 }
1249
1250 /*
1251 * IP6 socket option processing.
1252 */
1253 int
1254 ip6_ctloutput(op, so, level, optname, mp)
1255 int op;
1256 struct socket *so;
1257 int level, optname;
1258 struct mbuf **mp;
1259 {
1260 int privileged, optdatalen, uproto;
1261 void *optdata;
1262 struct inpcb *inp = sotoinpcb(so);
1263 struct mbuf *m = *mp;
1264 int error, optval;
1265 int optlen;
1266 #ifdef IPSEC
1267 struct proc *p = curproc; /* XXX */
1268 struct tdb *tdb;
1269 struct tdb_ident *tdbip, tdbi;
1270 int s;
1271 #endif
1272
1273 optlen = m ? m->m_len : 0;
1274 error = optval = 0;
1275
1276 privileged = (inp->inp_socket->so_state & SS_PRIV);
1277 uproto = (int)so->so_proto->pr_protocol;
1278
1279 if (level == IPPROTO_IPV6) {
1280 switch (op) {
1281 case PRCO_SETOPT:
1282 switch (optname) {
1283 case IPV6_2292PKTOPTIONS:
1284 {
1285 error = ip6_pcbopts(&inp->inp_outputopts6,
1286 m, so);
1287 break;
1288 }
1289
1290 /*
1291 * Use of some Hop-by-Hop options or some
1292 * Destination options, might require special
1293 * privilege. That is, normal applications
1294 * (without special privilege) might be forbidden
1295 * from setting certain options in outgoing packets,
1296 * and might never see certain options in received
1297 * packets. [RFC 2292 Section 6]
1298 * KAME specific note:
1299 * KAME prevents non-privileged users from sending or
1300 * receiving ANY hbh/dst options in order to avoid
1301 * overhead of parsing options in the kernel.
1302 */
1303 case IPV6_RECVHOPOPTS:
1304 case IPV6_RECVDSTOPTS:
1305 case IPV6_RECVRTHDRDSTOPTS:
1306 if (!privileged) {
1307 error = EPERM;
1308 break;
1309 }
1310 /* FALLTHROUGH */
1311 case IPV6_UNICAST_HOPS:
1312 case IPV6_HOPLIMIT:
1313 case IPV6_FAITH:
1314
1315 case IPV6_RECVPKTINFO:
1316 case IPV6_RECVHOPLIMIT:
1317 case IPV6_RECVRTHDR:
1318 case IPV6_RECVPATHMTU:
1319 case IPV6_RECVTCLASS:
1320 case IPV6_V6ONLY:
1321 case IPV6_AUTOFLOWLABEL:
1322 if (optlen != sizeof(int)) {
1323 error = EINVAL;
1324 break;
1325 }
1326 optval = *mtod(m, int *);
1327 switch (optname) {
1328
1329 case IPV6_UNICAST_HOPS:
1330 if (optval < -1 || optval >= 256)
1331 error = EINVAL;
1332 else {
1333 /* -1 = kernel default */
1334 inp->inp_hops = optval;
1335 }
1336 break;
1337 #define OPTSET(bit) \
1338 do { \
1339 if (optval) \
1340 inp->inp_flags |= (bit); \
1341 else \
1342 inp->inp_flags &= ~(bit); \
1343 } while (/*CONSTCOND*/ 0)
1344 #define OPTSET2292(bit) \
1345 do { \
1346 inp->inp_flags |= IN6P_RFC2292; \
1347 if (optval) \
1348 inp->inp_flags |= (bit); \
1349 else \
1350 inp->inp_flags &= ~(bit); \
1351 } while (/*CONSTCOND*/ 0)
1352 #define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0)
1353
1354 case IPV6_RECVPKTINFO:
1355 /* cannot mix with RFC2292 */
1356 if (OPTBIT(IN6P_RFC2292)) {
1357 error = EINVAL;
1358 break;
1359 }
1360 OPTSET(IN6P_PKTINFO);
1361 break;
1362
1363 case IPV6_HOPLIMIT:
1364 {
1365 struct ip6_pktopts **optp;
1366
1367 /* cannot mix with RFC2292 */
1368 if (OPTBIT(IN6P_RFC2292)) {
1369 error = EINVAL;
1370 break;
1371 }
1372 optp = &inp->inp_outputopts6;
1373 error = ip6_pcbopt(IPV6_HOPLIMIT,
1374 (u_char *)&optval,
1375 sizeof(optval),
1376 optp,
1377 privileged, uproto);
1378 break;
1379 }
1380
1381 case IPV6_RECVHOPLIMIT:
1382 /* cannot mix with RFC2292 */
1383 if (OPTBIT(IN6P_RFC2292)) {
1384 error = EINVAL;
1385 break;
1386 }
1387 OPTSET(IN6P_HOPLIMIT);
1388 break;
1389
1390 case IPV6_RECVHOPOPTS:
1391 /* cannot mix with RFC2292 */
1392 if (OPTBIT(IN6P_RFC2292)) {
1393 error = EINVAL;
1394 break;
1395 }
1396 OPTSET(IN6P_HOPOPTS);
1397 break;
1398
1399 case IPV6_RECVDSTOPTS:
1400 /* cannot mix with RFC2292 */
1401 if (OPTBIT(IN6P_RFC2292)) {
1402 error = EINVAL;
1403 break;
1404 }
1405 OPTSET(IN6P_DSTOPTS);
1406 break;
1407
1408 case IPV6_RECVRTHDRDSTOPTS:
1409 /* cannot mix with RFC2292 */
1410 if (OPTBIT(IN6P_RFC2292)) {
1411 error = EINVAL;
1412 break;
1413 }
1414 OPTSET(IN6P_RTHDRDSTOPTS);
1415 break;
1416
1417 case IPV6_RECVRTHDR:
1418 /* cannot mix with RFC2292 */
1419 if (OPTBIT(IN6P_RFC2292)) {
1420 error = EINVAL;
1421 break;
1422 }
1423 OPTSET(IN6P_RTHDR);
1424 break;
1425
1426 case IPV6_FAITH:
1427 OPTSET(IN6P_FAITH);
1428 break;
1429
1430 case IPV6_RECVPATHMTU:
1431 /*
1432 * We ignore this option for TCP
1433 * sockets.
1434 * (RFC3542 leaves this case
1435 * unspecified.)
1436 */
1437 if (uproto != IPPROTO_TCP)
1438 OPTSET(IN6P_MTU);
1439 break;
1440
1441 case IPV6_V6ONLY:
1442 /*
1443 * make setsockopt(IPV6_V6ONLY)
1444 * available only prior to bind(2).
1445 * see ipng mailing list, Jun 22 2001.
1446 */
1447 if (inp->inp_lport ||
1448 !IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
1449 error = EINVAL;
1450 break;
1451 }
1452 if ((ip6_v6only && optval) ||
1453 (!ip6_v6only && !optval))
1454 error = 0;
1455 else
1456 error = EINVAL;
1457 break;
1458 case IPV6_RECVTCLASS:
1459 /* cannot mix with RFC2292 XXX */
1460 if (OPTBIT(IN6P_RFC2292)) {
1461 error = EINVAL;
1462 break;
1463 }
1464 OPTSET(IN6P_TCLASS);
1465 break;
1466 case IPV6_AUTOFLOWLABEL:
1467 OPTSET(IN6P_AUTOFLOWLABEL);
1468 break;
1469
1470 }
1471 break;
1472
1473 case IPV6_TCLASS:
1474 case IPV6_DONTFRAG:
1475 case IPV6_USE_MIN_MTU:
1476 if (optlen != sizeof(optval)) {
1477 error = EINVAL;
1478 break;
1479 }
1480 optval = *mtod(m, int *);
1481 {
1482 struct ip6_pktopts **optp;
1483 optp = &inp->inp_outputopts6;
1484 error = ip6_pcbopt(optname,
1485 (u_char *)&optval,
1486 sizeof(optval),
1487 optp,
1488 privileged, uproto);
1489 break;
1490 }
1491
1492 case IPV6_2292PKTINFO:
1493 case IPV6_2292HOPLIMIT:
1494 case IPV6_2292HOPOPTS:
1495 case IPV6_2292DSTOPTS:
1496 case IPV6_2292RTHDR:
1497 /* RFC 2292 */
1498 if (optlen != sizeof(int)) {
1499 error = EINVAL;
1500 break;
1501 }
1502 optval = *mtod(m, int *);
1503 switch (optname) {
1504 case IPV6_2292PKTINFO:
1505 OPTSET2292(IN6P_PKTINFO);
1506 break;
1507 case IPV6_2292HOPLIMIT:
1508 OPTSET2292(IN6P_HOPLIMIT);
1509 break;
1510 case IPV6_2292HOPOPTS:
1511 /*
1512 * Check super-user privilege.
1513 * See comments for IPV6_RECVHOPOPTS.
1514 */
1515 if (!privileged)
1516 return (EPERM);
1517 OPTSET2292(IN6P_HOPOPTS);
1518 break;
1519 case IPV6_2292DSTOPTS:
1520 if (!privileged)
1521 return (EPERM);
1522 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1523 break;
1524 case IPV6_2292RTHDR:
1525 OPTSET2292(IN6P_RTHDR);
1526 break;
1527 }
1528 break;
1529 case IPV6_PKTINFO:
1530 case IPV6_HOPOPTS:
1531 case IPV6_RTHDR:
1532 case IPV6_DSTOPTS:
1533 case IPV6_RTHDRDSTOPTS:
1534 case IPV6_NEXTHOP:
1535 {
1536 /* new advanced API (RFC3542) */
1537 u_char *optbuf;
1538 int optbuflen;
1539 struct ip6_pktopts **optp;
1540
1541 /* cannot mix with RFC2292 */
1542 if (OPTBIT(IN6P_RFC2292)) {
1543 error = EINVAL;
1544 break;
1545 }
1546
1547 if (m && m->m_next) {
1548 error = EINVAL; /* XXX */
1549 break;
1550 }
1551 if (m) {
1552 optbuf = mtod(m, u_char *);
1553 optbuflen = m->m_len;
1554 } else {
1555 optbuf = NULL;
1556 optbuflen = 0;
1557 }
1558 optp = &inp->inp_outputopts6;
1559 error = ip6_pcbopt(optname,
1560 optbuf, optbuflen,
1561 optp, privileged, uproto);
1562 break;
1563 }
1564 #undef OPTSET
1565
1566 case IPV6_MULTICAST_IF:
1567 case IPV6_MULTICAST_HOPS:
1568 case IPV6_MULTICAST_LOOP:
1569 case IPV6_JOIN_GROUP:
1570 case IPV6_LEAVE_GROUP:
1571 error = ip6_setmoptions(optname,
1572 &inp->inp_moptions6,
1573 m);
1574 break;
1575
1576 case IPV6_PORTRANGE:
1577 optval = *mtod(m, int *);
1578
1579 switch (optval) {
1580 case IPV6_PORTRANGE_DEFAULT:
1581 inp->inp_flags &= ~(IN6P_LOWPORT);
1582 inp->inp_flags &= ~(IN6P_HIGHPORT);
1583 break;
1584
1585 case IPV6_PORTRANGE_HIGH:
1586 inp->inp_flags &= ~(IN6P_LOWPORT);
1587 inp->inp_flags |= IN6P_HIGHPORT;
1588 break;
1589
1590 case IPV6_PORTRANGE_LOW:
1591 inp->inp_flags &= ~(IN6P_HIGHPORT);
1592 inp->inp_flags |= IN6P_LOWPORT;
1593 break;
1594
1595 default:
1596 error = EINVAL;
1597 break;
1598 }
1599 break;
1600
1601 case IPSEC6_OUTSA:
1602 #ifndef IPSEC
1603 error = EINVAL;
1604 #else
1605 s = spltdb();
1606 if (m == 0 || m->m_len != sizeof(struct tdb_ident)) {
1607 error = EINVAL;
1608 } else {
1609 tdbip = mtod(m, struct tdb_ident *);
1610 tdb = gettdb(tdbip->spi, &tdbip->dst,
1611 tdbip->proto);
1612 if (tdb == NULL)
1613 error = ESRCH;
1614 else
1615 tdb_add_inp(tdb, inp, 0);
1616 }
1617 splx(s);
1618 #endif
1619 break;
1620
1621 case IPV6_AUTH_LEVEL:
1622 case IPV6_ESP_TRANS_LEVEL:
1623 case IPV6_ESP_NETWORK_LEVEL:
1624 case IPV6_IPCOMP_LEVEL:
1625 #ifndef IPSEC
1626 error = EINVAL;
1627 #else
1628 if (m == 0 || m->m_len != sizeof(int)) {
1629 error = EINVAL;
1630 break;
1631 }
1632 optval = *mtod(m, int *);
1633
1634 if (optval < IPSEC_LEVEL_BYPASS ||
1635 optval > IPSEC_LEVEL_UNIQUE) {
1636 error = EINVAL;
1637 break;
1638 }
1639
1640 switch (optname) {
1641 case IPV6_AUTH_LEVEL:
1642 if (optval < ipsec_auth_default_level &&
1643 suser(p, 0)) {
1644 error = EACCES;
1645 break;
1646 }
1647 inp->inp_seclevel[SL_AUTH] = optval;
1648 break;
1649
1650 case IPV6_ESP_TRANS_LEVEL:
1651 if (optval < ipsec_esp_trans_default_level &&
1652 suser(p, 0)) {
1653 error = EACCES;
1654 break;
1655 }
1656 inp->inp_seclevel[SL_ESP_TRANS] = optval;
1657 break;
1658
1659 case IPV6_ESP_NETWORK_LEVEL:
1660 if (optval < ipsec_esp_network_default_level &&
1661 suser(p, 0)) {
1662 error = EACCES;
1663 break;
1664 }
1665 inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1666 break;
1667
1668 case IPV6_IPCOMP_LEVEL:
1669 if (optval < ipsec_ipcomp_default_level &&
1670 suser(p, 0)) {
1671 error = EACCES;
1672 break;
1673 }
1674 inp->inp_seclevel[SL_IPCOMP] = optval;
1675 break;
1676 }
1677 if (!error)
1678 inp->inp_secrequire = get_sa_require(inp);
1679 #endif
1680 break;
1681
1682 default:
1683 error = ENOPROTOOPT;
1684 break;
1685 }
1686 if (m)
1687 (void)m_free(m);
1688 break;
1689
1690 case PRCO_GETOPT:
1691 switch (optname) {
1692
1693 case IPV6_2292PKTOPTIONS:
1694 /*
1695 * RFC3542 (effectively) deprecated the
1696 * semantics of the 2292-style pktoptions.
1697 * Since it was not reliable in nature (i.e.,
1698 * applications had to expect the lack of some
1699 * information after all), it would make sense
1700 * to simplify this part by always returning
1701 * empty data.
1702 */
1703 *mp = m_get(M_WAIT, MT_SOOPTS);
1704 (*mp)->m_len = 0;
1705 break;
1706
1707 case IPV6_RECVHOPOPTS:
1708 case IPV6_RECVDSTOPTS:
1709 case IPV6_RECVRTHDRDSTOPTS:
1710 case IPV6_UNICAST_HOPS:
1711 case IPV6_RECVPKTINFO:
1712 case IPV6_RECVHOPLIMIT:
1713 case IPV6_RECVRTHDR:
1714 case IPV6_RECVPATHMTU:
1715
1716 case IPV6_FAITH:
1717 case IPV6_V6ONLY:
1718 case IPV6_PORTRANGE:
1719 case IPV6_RECVTCLASS:
1720 case IPV6_AUTOFLOWLABEL:
1721 switch (optname) {
1722
1723 case IPV6_RECVHOPOPTS:
1724 optval = OPTBIT(IN6P_HOPOPTS);
1725 break;
1726
1727 case IPV6_RECVDSTOPTS:
1728 optval = OPTBIT(IN6P_DSTOPTS);
1729 break;
1730
1731 case IPV6_RECVRTHDRDSTOPTS:
1732 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1733 break;
1734
1735 case IPV6_UNICAST_HOPS:
1736 optval = inp->inp_hops;
1737 break;
1738
1739 case IPV6_RECVPKTINFO:
1740 optval = OPTBIT(IN6P_PKTINFO);
1741 break;
1742
1743 case IPV6_RECVHOPLIMIT:
1744 optval = OPTBIT(IN6P_HOPLIMIT);
1745 break;
1746
1747 case IPV6_RECVRTHDR:
1748 optval = OPTBIT(IN6P_RTHDR);
1749 break;
1750
1751 case IPV6_RECVPATHMTU:
1752 optval = OPTBIT(IN6P_MTU);
1753 break;
1754
1755 case IPV6_FAITH:
1756 optval = OPTBIT(IN6P_FAITH);
1757 break;
1758
1759 case IPV6_V6ONLY:
1760 optval = (ip6_v6only != 0); /* XXX */
1761 break;
1762
1763 case IPV6_PORTRANGE:
1764 {
1765 int flags;
1766 flags = inp->inp_flags;
1767 if (flags & IN6P_HIGHPORT)
1768 optval = IPV6_PORTRANGE_HIGH;
1769 else if (flags & IN6P_LOWPORT)
1770 optval = IPV6_PORTRANGE_LOW;
1771 else
1772 optval = 0;
1773 break;
1774 }
1775 case IPV6_RECVTCLASS:
1776 optval = OPTBIT(IN6P_TCLASS);
1777 break;
1778
1779 case IPV6_AUTOFLOWLABEL:
1780 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1781 break;
1782 }
1783 if (error)
1784 break;
1785 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1786 m->m_len = sizeof(int);
1787 *mtod(m, int *) = optval;
1788 break;
1789
1790 case IPV6_PATHMTU:
1791 {
1792 u_long pmtu = 0;
1793 struct ip6_mtuinfo mtuinfo;
1794 struct route_in6 *ro = (struct route_in6 *)&inp->inp_route6;
1795
1796 if (!(so->so_state & SS_ISCONNECTED))
1797 return (ENOTCONN);
1798 /*
1799 * XXX: we dot not consider the case of source
1800 * routing, or optional information to specify
1801 * the outgoing interface.
1802 */
1803 error = ip6_getpmtu(ro, NULL, NULL,
1804 &inp->inp_faddr6, &pmtu, NULL);
1805 if (error)
1806 break;
1807 if (pmtu > IPV6_MAXPACKET)
1808 pmtu = IPV6_MAXPACKET;
1809
1810 bzero(&mtuinfo, sizeof(mtuinfo));
1811 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1812 optdata = (void *)&mtuinfo;
1813 optdatalen = sizeof(mtuinfo);
1814 if (optdatalen > MCLBYTES)
1815 return (EMSGSIZE); /* XXX */
1816 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1817 if (optdatalen > MLEN)
1818 MCLGET(m, M_WAIT);
1819 m->m_len = optdatalen;
1820 bcopy(optdata, mtod(m, void *), optdatalen);
1821 break;
1822 }
1823
1824 case IPV6_2292PKTINFO:
1825 case IPV6_2292HOPLIMIT:
1826 case IPV6_2292HOPOPTS:
1827 case IPV6_2292RTHDR:
1828 case IPV6_2292DSTOPTS:
1829 switch (optname) {
1830 case IPV6_2292PKTINFO:
1831 optval = OPTBIT(IN6P_PKTINFO);
1832 break;
1833 case IPV6_2292HOPLIMIT:
1834 optval = OPTBIT(IN6P_HOPLIMIT);
1835 break;
1836 case IPV6_2292HOPOPTS:
1837 optval = OPTBIT(IN6P_HOPOPTS);
1838 break;
1839 case IPV6_2292RTHDR:
1840 optval = OPTBIT(IN6P_RTHDR);
1841 break;
1842 case IPV6_2292DSTOPTS:
1843 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1844 break;
1845 }
1846 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1847 m->m_len = sizeof(int);
1848 *mtod(m, int *) = optval;
1849 break;
1850 case IPV6_PKTINFO:
1851 case IPV6_HOPOPTS:
1852 case IPV6_RTHDR:
1853 case IPV6_DSTOPTS:
1854 case IPV6_RTHDRDSTOPTS:
1855 case IPV6_NEXTHOP:
1856 case IPV6_TCLASS:
1857 case IPV6_DONTFRAG:
1858 case IPV6_USE_MIN_MTU:
1859 error = ip6_getpcbopt(inp->inp_outputopts6,
1860 optname, mp);
1861 break;
1862
1863 case IPV6_MULTICAST_IF:
1864 case IPV6_MULTICAST_HOPS:
1865 case IPV6_MULTICAST_LOOP:
1866 case IPV6_JOIN_GROUP:
1867 case IPV6_LEAVE_GROUP:
1868 error = ip6_getmoptions(optname,
1869 inp->inp_moptions6, mp);
1870 break;
1871
1872 case IPSEC6_OUTSA:
1873 #ifndef IPSEC
1874 error = EINVAL;
1875 #else
1876 s = spltdb();
1877 if (inp->inp_tdb_out == NULL) {
1878 error = ENOENT;
1879 } else {
1880 tdbi.spi = inp->inp_tdb_out->tdb_spi;
1881 tdbi.dst = inp->inp_tdb_out->tdb_dst;
1882 tdbi.proto = inp->inp_tdb_out->tdb_sproto;
1883 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1884 m->m_len = sizeof(tdbi);
1885 bcopy((caddr_t)&tdbi, mtod(m, caddr_t),
1886 (unsigned)m->m_len);
1887 }
1888 splx(s);
1889 #endif
1890 break;
1891
1892 case IPV6_AUTH_LEVEL:
1893 case IPV6_ESP_TRANS_LEVEL:
1894 case IPV6_ESP_NETWORK_LEVEL:
1895 case IPV6_IPCOMP_LEVEL:
1896 #ifndef IPSEC
1897 m->m_len = sizeof(int);
1898 *mtod(m, int *) = IPSEC_LEVEL_NONE;
1899 #else
1900 m->m_len = sizeof(int);
1901 switch (optname) {
1902 case IPV6_AUTH_LEVEL:
1903 optval = inp->inp_seclevel[SL_AUTH];
1904 break;
1905
1906 case IPV6_ESP_TRANS_LEVEL:
1907 optval =
1908 inp->inp_seclevel[SL_ESP_TRANS];
1909 break;
1910
1911 case IPV6_ESP_NETWORK_LEVEL:
1912 optval =
1913 inp->inp_seclevel[SL_ESP_NETWORK];
1914 break;
1915
1916 case IPV6_IPCOMP_LEVEL:
1917 optval = inp->inp_seclevel[SL_IPCOMP];
1918 break;
1919 }
1920 *mtod(m, int *) = optval;
1921 #endif
1922 break;
1923
1924 default:
1925 error = ENOPROTOOPT;
1926 break;
1927 }
1928 break;
1929 }
1930 } else {
1931 error = EINVAL;
1932 if (op == PRCO_SETOPT && *mp)
1933 (void)m_free(*mp);
1934 }
1935 return (error);
1936 }
1937
1938 int
1939 ip6_raw_ctloutput(op, so, level, optname, mp)
1940 int op;
1941 struct socket *so;
1942 int level, optname;
1943 struct mbuf **mp;
1944 {
1945 int error = 0, optval, optlen;
1946 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
1947 struct inpcb *inp = sotoinpcb(so);
1948 struct mbuf *m = *mp;
1949
1950 optlen = m ? m->m_len : 0;
1951
1952 if (level != IPPROTO_IPV6) {
1953 if (op == PRCO_SETOPT && *mp)
1954 (void)m_free(*mp);
1955 return (EINVAL);
1956 }
1957
1958 switch (optname) {
1959 case IPV6_CHECKSUM:
1960 /*
1961 * For ICMPv6 sockets, no modification allowed for checksum
1962 * offset, permit "no change" values to help existing apps.
1963 *
1964 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
1965 * for an ICMPv6 socket will fail."
1966 * The current behavior does not meet RFC3542.
1967 */
1968 switch (op) {
1969 case PRCO_SETOPT:
1970 if (optlen != sizeof(int)) {
1971 error = EINVAL;
1972 break;
1973 }
1974 optval = *mtod(m, int *);
1975 if ((optval % 2) != 0) {
1976 /* the API assumes even offset values */
1977 error = EINVAL;
1978 } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
1979 if (optval != icmp6off)
1980 error = EINVAL;
1981 } else
1982 inp->in6p_cksum = optval;
1983 break;
1984
1985 case PRCO_GETOPT:
1986 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
1987 optval = icmp6off;
1988 else
1989 optval = inp->in6p_cksum;
1990
1991 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1992 m->m_len = sizeof(int);
1993 *mtod(m, int *) = optval;
1994 break;
1995
1996 default:
1997 error = EINVAL;
1998 break;
1999 }
2000 break;
2001
2002 default:
2003 error = ENOPROTOOPT;
2004 break;
2005 }
2006
2007 if (op == PRCO_SETOPT && m)
2008 (void)m_free(m);
2009
2010 return (error);
2011 }
2012
2013 /*
2014 * Set up IP6 options in pcb for insertion in output packets.
2015 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2016 * with destination address if source routed.
2017 */
2018 static int
2019 ip6_pcbopts(pktopt, m, so)
2020 struct ip6_pktopts **pktopt;
2021 struct mbuf *m;
2022 struct socket *so;
2023 {
2024 struct ip6_pktopts *opt = *pktopt;
2025 int error = 0;
2026 struct proc *p = curproc; /* XXX */
2027 int priv = 0;
2028
2029 /* turn off any old options. */
2030 if (opt)
2031 ip6_clearpktopts(opt, -1);
2032 else
2033 opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2034 *pktopt = 0;
2035
2036 if (!m || m->m_len == 0) {
2037 /*
2038 * Only turning off any previous options, regardless of
2039 * whether the opt is just created or given.
2040 */
2041 free(opt, M_IP6OPT);
2042 return (0);
2043 }
2044
2045 /* set options specified by user. */
2046 if (p && !suser(p, 0))
2047 priv = 1;
2048 if ((error = ip6_setpktopts(m, opt, NULL, priv,
2049 so->so_proto->pr_protocol)) != 0) {
2050 ip6_clearpktopts(opt, -1); /* XXX discard all options */
2051 free(opt, M_IP6OPT);
2052 return (error);
2053 }
2054 *pktopt = opt;
2055 return (0);
2056 }
2057
2058 /*
2059 * initialize ip6_pktopts. beware that there are non-zero default values in
2060 * the struct.
2061 */
2062 void
2063 ip6_initpktopts(opt)
2064 struct ip6_pktopts *opt;
2065 {
2066
2067 bzero(opt, sizeof(*opt));
2068 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2069 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2070 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2071 }
2072
2073 #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* XXX */
2074 static int
2075 ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2076 int optname, len, priv;
2077 u_char *buf;
2078 struct ip6_pktopts **pktopt;
2079 int uproto;
2080 {
2081 struct ip6_pktopts *opt;
2082
2083 if (*pktopt == NULL) {
2084 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2085 M_WAITOK);
2086 ip6_initpktopts(*pktopt);
2087 }
2088 opt = *pktopt;
2089
2090 return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
2091 }
2092
2093 static int
2094 ip6_getpcbopt(pktopt, optname, mp)
2095 struct ip6_pktopts *pktopt;
2096 int optname;
2097 struct mbuf **mp;
2098 {
2099 void *optdata = NULL;
2100 int optdatalen = 0;
2101 struct ip6_ext *ip6e;
2102 int error = 0;
2103 struct in6_pktinfo null_pktinfo;
2104 int deftclass = 0, on;
2105 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2106 struct mbuf *m;
2107
2108 switch (optname) {
2109 case IPV6_PKTINFO:
2110 if (pktopt && pktopt->ip6po_pktinfo)
2111 optdata = (void *)pktopt->ip6po_pktinfo;
2112 else {
2113 /* XXX: we don't have to do this every time... */
2114 bzero(&null_pktinfo, sizeof(null_pktinfo));
2115 optdata = (void *)&null_pktinfo;
2116 }
2117 optdatalen = sizeof(struct in6_pktinfo);
2118 break;
2119 case IPV6_TCLASS:
2120 if (pktopt && pktopt->ip6po_tclass >= 0)
2121 optdata = (void *)&pktopt->ip6po_tclass;
2122 else
2123 optdata = (void *)&deftclass;
2124 optdatalen = sizeof(int);
2125 break;
2126 case IPV6_HOPOPTS:
2127 if (pktopt && pktopt->ip6po_hbh) {
2128 optdata = (void *)pktopt->ip6po_hbh;
2129 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2130 optdatalen = (ip6e->ip6e_len + 1) << 3;
2131 }
2132 break;
2133 case IPV6_RTHDR:
2134 if (pktopt && pktopt->ip6po_rthdr) {
2135 optdata = (void *)pktopt->ip6po_rthdr;
2136 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2137 optdatalen = (ip6e->ip6e_len + 1) << 3;
2138 }
2139 break;
2140 case IPV6_RTHDRDSTOPTS:
2141 if (pktopt && pktopt->ip6po_dest1) {
2142 optdata = (void *)pktopt->ip6po_dest1;
2143 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2144 optdatalen = (ip6e->ip6e_len + 1) << 3;
2145 }
2146 break;
2147 case IPV6_DSTOPTS:
2148 if (pktopt && pktopt->ip6po_dest2) {
2149 optdata = (void *)pktopt->ip6po_dest2;
2150 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2151 optdatalen = (ip6e->ip6e_len + 1) << 3;
2152 }
2153 break;
2154 case IPV6_NEXTHOP:
2155 if (pktopt && pktopt->ip6po_nexthop) {
2156 optdata = (void *)pktopt->ip6po_nexthop;
2157 optdatalen = pktopt->ip6po_nexthop->sa_len;
2158 }
2159 break;
2160 case IPV6_USE_MIN_MTU:
2161 if (pktopt)
2162 optdata = (void *)&pktopt->ip6po_minmtu;
2163 else
2164 optdata = (void *)&defminmtu;
2165 optdatalen = sizeof(int);
2166 break;
2167 case IPV6_DONTFRAG:
2168 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2169 on = 1;
2170 else
2171 on = 0;
2172 optdata = (void *)&on;
2173 optdatalen = sizeof(on);
2174 break;
2175 default: /* should not happen */
2176 #ifdef DIAGNOSTIC
2177 panic("ip6_getpcbopt: unexpected option\n");
2178 #endif
2179 return (ENOPROTOOPT);
2180 }
2181
2182 if (optdatalen > MCLBYTES)
2183 return (EMSGSIZE); /* XXX */
2184 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2185 if (optdatalen > MLEN)
2186 MCLGET(m, M_WAIT);
2187 m->m_len = optdatalen;
2188 if (optdatalen)
2189 bcopy(optdata, mtod(m, void *), optdatalen);
2190
2191 return (error);
2192 }
2193
2194 void
2195 ip6_clearpktopts(pktopt, optname)
2196 struct ip6_pktopts *pktopt;
2197 int optname;
2198 {
2199 if (optname == -1 || optname == IPV6_PKTINFO) {
2200 if (pktopt->ip6po_pktinfo)
2201 free(pktopt->ip6po_pktinfo, M_IP6OPT);
2202 pktopt->ip6po_pktinfo = NULL;
2203 }
2204 if (optname == -1 || optname == IPV6_HOPLIMIT)
2205 pktopt->ip6po_hlim = -1;
2206 if (optname == -1 || optname == IPV6_TCLASS)
2207 pktopt->ip6po_tclass = -1;
2208 if (optname == -1 || optname == IPV6_NEXTHOP) {
2209 if (pktopt->ip6po_nextroute.ro_rt) {
2210 RTFREE(pktopt->ip6po_nextroute.ro_rt);
2211 pktopt->ip6po_nextroute.ro_rt = NULL;
2212 }
2213 if (pktopt->ip6po_nexthop)
2214 free(pktopt->ip6po_nexthop, M_IP6OPT);
2215 pktopt->ip6po_nexthop = NULL;
2216 }
2217 if (optname == -1 || optname == IPV6_HOPOPTS) {
2218 if (pktopt->ip6po_hbh)
2219 free(pktopt->ip6po_hbh, M_IP6OPT);
2220 pktopt->ip6po_hbh = NULL;
2221 }
2222 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2223 if (pktopt->ip6po_dest1)
2224 free(pktopt->ip6po_dest1, M_IP6OPT);
2225 pktopt->ip6po_dest1 = NULL;
2226 }
2227 if (optname == -1 || optname == IPV6_RTHDR) {
2228 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2229 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2230 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2231 if (pktopt->ip6po_route.ro_rt) {
2232 RTFREE(pktopt->ip6po_route.ro_rt);
2233 pktopt->ip6po_route.ro_rt = NULL;
2234 }
2235 }
2236 if (optname == -1 || optname == IPV6_DSTOPTS) {
2237 if (pktopt->ip6po_dest2)
2238 free(pktopt->ip6po_dest2, M_IP6OPT);
2239 pktopt->ip6po_dest2 = NULL;
2240 }
2241 }
2242
2243 #define PKTOPT_EXTHDRCPY(type) \
2244 do {\
2245 if (src->type) {\
2246 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2247 dst->type = malloc(hlen, M_IP6OPT, canwait);\
2248 if (dst->type == NULL && canwait == M_NOWAIT)\
2249 goto bad;\
2250 bcopy(src->type, dst->type, hlen);\
2251 }\
2252 } while (/*CONSTCOND*/ 0)
2253
2254 static int
2255 copypktopts(dst, src, canwait)
2256 struct ip6_pktopts *dst, *src;
2257 int canwait;
2258 {
2259 dst->ip6po_hlim = src->ip6po_hlim;
2260 dst->ip6po_tclass = src->ip6po_tclass;
2261 dst->ip6po_flags = src->ip6po_flags;
2262 if (src->ip6po_pktinfo) {
2263 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2264 M_IP6OPT, canwait);
2265 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2266 goto bad;
2267 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2268 }
2269 if (src->ip6po_nexthop) {
2270 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2271 M_IP6OPT, canwait);
2272 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2273 goto bad;
2274 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2275 src->ip6po_nexthop->sa_len);
2276 }
2277 PKTOPT_EXTHDRCPY(ip6po_hbh);
2278 PKTOPT_EXTHDRCPY(ip6po_dest1);
2279 PKTOPT_EXTHDRCPY(ip6po_dest2);
2280 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2281 return (0);
2282
2283 bad:
2284 if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2285 if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2286 if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2287 if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2288 if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2289 if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2290
2291 return (ENOBUFS);
2292 }
2293 #undef PKTOPT_EXTHDRCPY
2294
2295 struct ip6_pktopts *
2296 ip6_copypktopts(src, canwait)
2297 struct ip6_pktopts *src;
2298 int canwait;
2299 {
2300 int error;
2301 struct ip6_pktopts *dst;
2302
2303 dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2304 if (dst == NULL && canwait == M_NOWAIT)
2305 return (NULL);
2306 ip6_initpktopts(dst);
2307
2308 if ((error = copypktopts(dst, src, canwait)) != 0) {
2309 free(dst, M_IP6OPT);
2310 return (NULL);
2311 }
2312
2313 return (dst);
2314 }
2315
2316 void
2317 ip6_freepcbopts(pktopt)
2318 struct ip6_pktopts *pktopt;
2319 {
2320 if (pktopt == NULL)
2321 return;
2322
2323 ip6_clearpktopts(pktopt, -1);
2324
2325 free(pktopt, M_IP6OPT);
2326 }
2327
2328 /*
2329 * Set the IP6 multicast options in response to user setsockopt().
2330 */
2331 static int
2332 ip6_setmoptions(optname, im6op, m)
2333 int optname;
2334 struct ip6_moptions **im6op;
2335 struct mbuf *m;
2336 {
2337 int error = 0;
2338 u_int loop, ifindex;
2339 struct ipv6_mreq *mreq;
2340 struct ifnet *ifp;
2341 struct ip6_moptions *im6o = *im6op;
2342 struct route_in6 ro;
2343 struct sockaddr_in6 *dst;
2344 struct in6_multi_mship *imm;
2345 struct proc *p = curproc; /* XXX */
2346
2347 if (im6o == NULL) {
2348 /*
2349 * No multicast option buffer attached to the pcb;
2350 * allocate one and initialize to default values.
2351 */
2352 im6o = (struct ip6_moptions *)
2353 malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2354
2355 if (im6o == NULL)
2356 return (ENOBUFS);
2357 *im6op = im6o;
2358 im6o->im6o_multicast_ifp = NULL;
2359 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2360 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2361 LIST_INIT(&im6o->im6o_memberships);
2362 }
2363
2364 switch (optname) {
2365
2366 case IPV6_MULTICAST_IF:
2367 /*
2368 * Select the interface for outgoing multicast packets.
2369 */
2370 if (m == NULL || m->m_len != sizeof(u_int)) {
2371 error = EINVAL;
2372 break;
2373 }
2374 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2375 if (ifindex == 0)
2376 ifp = NULL;
2377 else {
2378 if (ifindex < 0 || if_indexlim <= ifindex ||
2379 !ifindex2ifnet[ifindex]) {
2380 error = ENXIO; /* XXX EINVAL? */
2381 break;
2382 }
2383 ifp = ifindex2ifnet[ifindex];
2384 if (ifp == NULL ||
2385 (ifp->if_flags & IFF_MULTICAST) == 0) {
2386 error = EADDRNOTAVAIL;
2387 break;
2388 }
2389 }
2390 im6o->im6o_multicast_ifp = ifp;
2391 break;
2392
2393 case IPV6_MULTICAST_HOPS:
2394 {
2395 /*
2396 * Set the IP6 hoplimit for outgoing multicast packets.
2397 */
2398 int optval;
2399 if (m == NULL || m->m_len != sizeof(int)) {
2400 error = EINVAL;
2401 break;
2402 }
2403 bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2404 if (optval < -1 || optval >= 256)
2405 error = EINVAL;
2406 else if (optval == -1)
2407 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2408 else
2409 im6o->im6o_multicast_hlim = optval;
2410 break;
2411 }
2412
2413 case IPV6_MULTICAST_LOOP:
2414 /*
2415 * Set the loopback flag for outgoing multicast packets.
2416 * Must be zero or one.
2417 */
2418 if (m == NULL || m->m_len != sizeof(u_int)) {
2419 error = EINVAL;
2420 break;
2421 }
2422 bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2423 if (loop > 1) {
2424 error = EINVAL;
2425 break;
2426 }
2427 im6o->im6o_multicast_loop = loop;
2428 break;
2429
2430 case IPV6_JOIN_GROUP:
2431 /*
2432 * Add a multicast group membership.
2433 * Group must be a valid IP6 multicast address.
2434 */
2435 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2436 error = EINVAL;
2437 break;
2438 }
2439 mreq = mtod(m, struct ipv6_mreq *);
2440 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2441 /*
2442 * We use the unspecified address to specify to accept
2443 * all multicast addresses. Only super user is allowed
2444 * to do this.
2445 */
2446 if (suser(p, 0))
2447 {
2448 error = EACCES;
2449 break;
2450 }
2451 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2452 error = EINVAL;
2453 break;
2454 }
2455
2456 /*
2457 * If no interface was explicitly specified, choose an
2458 * appropriate one according to the given multicast address.
2459 */
2460 if (mreq->ipv6mr_interface == 0) {
2461 /*
2462 * Look up the routing table for the
2463 * address, and choose the outgoing interface.
2464 * XXX: is it a good approach?
2465 */
2466 ro.ro_rt = NULL;
2467 dst = (struct sockaddr_in6 *)&ro.ro_dst;
2468 bzero(dst, sizeof(*dst));
2469 dst->sin6_len = sizeof(struct sockaddr_in6);
2470 dst->sin6_family = AF_INET6;
2471 dst->sin6_addr = mreq->ipv6mr_multiaddr;
2472 rtalloc((struct route *)&ro);
2473 if (ro.ro_rt == NULL) {
2474 error = EADDRNOTAVAIL;
2475 break;
2476 }
2477 ifp = ro.ro_rt->rt_ifp;
2478 rtfree(ro.ro_rt);
2479 } else {
2480 /*
2481 * If the interface is specified, validate it.
2482 */
2483 if (mreq->ipv6mr_interface < 0 ||
2484 if_indexlim <= mreq->ipv6mr_interface ||
2485 !ifindex2ifnet[mreq->ipv6mr_interface]) {
2486 error = ENXIO; /* XXX EINVAL? */
2487 break;
2488 }
2489 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2490 }
2491
2492 /*
2493 * See if we found an interface, and confirm that it
2494 * supports multicast
2495 */
2496 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2497 error = EADDRNOTAVAIL;
2498 break;
2499 }
2500 /*
2501 * Put interface index into the multicast address,
2502 * if the address has link/interface-local scope.
2503 */
2504 if (IN6_IS_SCOPE_EMBED(&mreq->ipv6mr_multiaddr)) {
2505 mreq->ipv6mr_multiaddr.s6_addr16[1] =
2506 htons(ifp->if_index);
2507 }
2508 /*
2509 * See if the membership already exists.
2510 */
2511 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain)
2512 if (imm->i6mm_maddr->in6m_ifp == ifp &&
2513 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2514 &mreq->ipv6mr_multiaddr))
2515 break;
2516 if (imm != NULL) {
2517 error = EADDRINUSE;
2518 break;
2519 }
2520 /*
2521 * Everything looks good; add a new record to the multicast
2522 * address list for the given interface.
2523 */
2524 imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error);
2525 if (!imm)
2526 break;
2527 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2528 break;
2529
2530 case IPV6_LEAVE_GROUP:
2531 /*
2532 * Drop a multicast group membership.
2533 * Group must be a valid IP6 multicast address.
2534 */
2535 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2536 error = EINVAL;
2537 break;
2538 }
2539 mreq = mtod(m, struct ipv6_mreq *);
2540 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2541 if (suser(p, 0))
2542 {
2543 error = EACCES;
2544 break;
2545 }
2546 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2547 error = EINVAL;
2548 break;
2549 }
2550 /*
2551 * If an interface address was specified, get a pointer
2552 * to its ifnet structure.
2553 */
2554 if (mreq->ipv6mr_interface == 0)
2555 ifp = NULL;
2556 else {
2557 if (mreq->ipv6mr_interface < 0 ||
2558 if_indexlim <= mreq->ipv6mr_interface ||
2559 !ifindex2ifnet[mreq->ipv6mr_interface]) {
2560 error = ENXIO; /* XXX EINVAL? */
2561 break;
2562 }
2563 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2564 }
2565
2566 /*
2567 * Put interface index into the multicast address,
2568 * if the address has link-local scope.
2569 */
2570 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2571 mreq->ipv6mr_multiaddr.s6_addr16[1] =
2572 htons(mreq->ipv6mr_interface);
2573 }
2574 /*
2575 * Find the membership in the membership list.
2576 */
2577 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
2578 if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2579 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2580 &mreq->ipv6mr_multiaddr))
2581 break;
2582 }
2583 if (imm == NULL) {
2584 /* Unable to resolve interface */
2585 error = EADDRNOTAVAIL;
2586 break;
2587 }
2588 /*
2589 * Give up the multicast address record to which the
2590 * membership points.
2591 */
2592 LIST_REMOVE(imm, i6mm_chain);
2593 in6_leavegroup(imm);
2594 break;
2595
2596 default:
2597 error = EOPNOTSUPP;
2598 break;
2599 }
2600
2601 /*
2602 * If all options have default values, no need to keep the option
2603 * structure.
2604 */
2605 if (im6o->im6o_multicast_ifp == NULL &&
2606 im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2607 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2608 LIST_EMPTY(&im6o->im6o_memberships)) {
2609 free(*im6op, M_IPMOPTS);
2610 *im6op = NULL;
2611 }
2612
2613 return (error);
2614 }
2615
2616 /*
2617 * Return the IP6 multicast options in response to user getsockopt().
2618 */
2619 static int
2620 ip6_getmoptions(optname, im6o, mp)
2621 int optname;
2622 struct ip6_moptions *im6o;
2623 struct mbuf **mp;
2624 {
2625 u_int *hlim, *loop, *ifindex;
2626
2627 *mp = m_get(M_WAIT, MT_SOOPTS);
2628
2629 switch (optname) {
2630
2631 case IPV6_MULTICAST_IF:
2632 ifindex = mtod(*mp, u_int *);
2633 (*mp)->m_len = sizeof(u_int);
2634 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2635 *ifindex = 0;
2636 else
2637 *ifindex = im6o->im6o_multicast_ifp->if_index;
2638 return (0);
2639
2640 case IPV6_MULTICAST_HOPS:
2641 hlim = mtod(*mp, u_int *);
2642 (*mp)->m_len = sizeof(u_int);
2643 if (im6o == NULL)
2644 *hlim = ip6_defmcasthlim;
2645 else
2646 *hlim = im6o->im6o_multicast_hlim;
2647 return (0);
2648
2649 case IPV6_MULTICAST_LOOP:
2650 loop = mtod(*mp, u_int *);
2651 (*mp)->m_len = sizeof(u_int);
2652 if (im6o == NULL)
2653 *loop = ip6_defmcasthlim;
2654 else
2655 *loop = im6o->im6o_multicast_loop;
2656 return (0);
2657
2658 default:
2659 return (EOPNOTSUPP);
2660 }
2661 }
2662
2663 /*
2664 * Discard the IP6 multicast options.
2665 */
2666 void
2667 ip6_freemoptions(im6o)
2668 struct ip6_moptions *im6o;
2669 {
2670 struct in6_multi_mship *imm;
2671
2672 if (im6o == NULL)
2673 return;
2674
2675 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2676 imm = LIST_FIRST(&im6o->im6o_memberships);
2677 LIST_REMOVE(imm, i6mm_chain);
2678 in6_leavegroup(imm);
2679 }
2680 free(im6o, M_IPMOPTS);
2681 }
2682
2683 /*
2684 * Set IPv6 outgoing packet options based on advanced API.
2685 */
2686 int
2687 ip6_setpktopts(control, opt, stickyopt, priv, uproto)
2688 struct mbuf *control;
2689 struct ip6_pktopts *opt, *stickyopt;
2690 int priv, uproto;
2691 {
2692 struct cmsghdr *cm = 0;
2693
2694 if (control == NULL || opt == NULL)
2695 return (EINVAL);
2696
2697 ip6_initpktopts(opt);
2698 if (stickyopt) {
2699 int error;
2700
2701 /*
2702 * If stickyopt is provided, make a local copy of the options
2703 * for this particular packet, then override them by ancillary
2704 * objects.
2705 * XXX: copypktopts() does not copy the cached route to a next
2706 * hop (if any). This is not very good in terms of efficiency,
2707 * but we can allow this since this option should be rarely
2708 * used.
2709 */
2710 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2711 return (error);
2712 }
2713
2714 /*
2715 * XXX: Currently, we assume all the optional information is stored
2716 * in a single mbuf.
2717 */
2718 if (control->m_next)
2719 return (EINVAL);
2720
2721 for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2722 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2723 int error;
2724
2725 if (control->m_len < CMSG_LEN(0))
2726 return (EINVAL);
2727
2728 cm = mtod(control, struct cmsghdr *);
2729 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2730 return (EINVAL);
2731 if (cm->cmsg_level != IPPROTO_IPV6)
2732 continue;
2733
2734 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2735 cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
2736 if (error)
2737 return (error);
2738 }
2739
2740 return (0);
2741 }
2742
2743 /*
2744 * Set a particular packet option, as a sticky option or an ancillary data
2745 * item. "len" can be 0 only when it's a sticky option.
2746 * We have 4 cases of combination of "sticky" and "cmsg":
2747 * "sticky=0, cmsg=0": impossible
2748 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2749 * "sticky=1, cmsg=0": RFC3542 socket option
2750 * "sticky=1, cmsg=1": RFC2292 socket option
2751 */
2752 static int
2753 ip6_setpktopt(optname, buf, len, opt, priv, sticky, cmsg, uproto)
2754 int optname, len, priv, sticky, cmsg, uproto;
2755 u_char *buf;
2756 struct ip6_pktopts *opt;
2757 {
2758 int minmtupolicy;
2759
2760 if (!sticky && !cmsg) {
2761 #ifdef DIAGNOSTIC
2762 printf("ip6_setpktopt: impossible case\n");
2763 #endif
2764 return (EINVAL);
2765 }
2766
2767 /*
2768 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2769 * not be specified in the context of RFC3542. Conversely,
2770 * RFC3542 types should not be specified in the context of RFC2292.
2771 */
2772 if (!cmsg) {
2773 switch (optname) {
2774 case IPV6_2292PKTINFO:
2775 case IPV6_2292HOPLIMIT:
2776 case IPV6_2292NEXTHOP:
2777 case IPV6_2292HOPOPTS:
2778 case IPV6_2292DSTOPTS:
2779 case IPV6_2292RTHDR:
2780 case IPV6_2292PKTOPTIONS:
2781 return (ENOPROTOOPT);
2782 }
2783 }
2784 if (sticky && cmsg) {
2785 switch (optname) {
2786 case IPV6_PKTINFO:
2787 case IPV6_HOPLIMIT:
2788 case IPV6_NEXTHOP:
2789 case IPV6_HOPOPTS:
2790 case IPV6_DSTOPTS:
2791 case IPV6_RTHDRDSTOPTS:
2792 case IPV6_RTHDR:
2793 case IPV6_USE_MIN_MTU:
2794 case IPV6_DONTFRAG:
2795 case IPV6_TCLASS:
2796 return (ENOPROTOOPT);
2797 }
2798 }
2799
2800 switch (optname) {
2801 case IPV6_2292PKTINFO:
2802 case IPV6_PKTINFO:
2803 {
2804 struct ifnet *ifp = NULL;
2805 struct in6_pktinfo *pktinfo;
2806
2807 if (len != sizeof(struct in6_pktinfo))
2808 return (EINVAL);
2809
2810 pktinfo = (struct in6_pktinfo *)buf;
2811
2812 /*
2813 * An application can clear any sticky IPV6_PKTINFO option by
2814 * doing a "regular" setsockopt with ipi6_addr being
2815 * in6addr_any and ipi6_ifindex being zero.
2816 * [RFC 3542, Section 6]
2817 */
2818 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2819 pktinfo->ipi6_ifindex == 0 &&
2820 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2821 ip6_clearpktopts(opt, optname);
2822 break;
2823 }
2824
2825 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2826 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2827 return (EINVAL);
2828 }
2829
2830 /* validate the interface index if specified. */
2831 if (pktinfo->ipi6_ifindex >= if_indexlim ||
2832 pktinfo->ipi6_ifindex < 0) {
2833 return (ENXIO);
2834 }
2835 if (pktinfo->ipi6_ifindex) {
2836 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
2837 if (ifp == NULL)
2838 return (ENXIO);
2839 }
2840
2841 /*
2842 * We store the address anyway, and let in6_selectsrc()
2843 * validate the specified address. This is because ipi6_addr
2844 * may not have enough information about its scope zone, and
2845 * we may need additional information (such as outgoing
2846 * interface or the scope zone of a destination address) to
2847 * disambiguate the scope.
2848 * XXX: the delay of the validation may confuse the
2849 * application when it is used as a sticky option.
2850 */
2851 if (opt->ip6po_pktinfo == NULL) {
2852 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2853 M_IP6OPT, M_NOWAIT);
2854 if (opt->ip6po_pktinfo == NULL)
2855 return (ENOBUFS);
2856 }
2857 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2858 break;
2859 }
2860
2861 case IPV6_2292HOPLIMIT:
2862 case IPV6_HOPLIMIT:
2863 {
2864 int *hlimp;
2865
2866 /*
2867 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2868 * to simplify the ordering among hoplimit options.
2869 */
2870 if (optname == IPV6_HOPLIMIT && sticky)
2871 return (ENOPROTOOPT);
2872
2873 if (len != sizeof(int))
2874 return (EINVAL);
2875 hlimp = (int *)buf;
2876 if (*hlimp < -1 || *hlimp > 255)
2877 return (EINVAL);
2878
2879 opt->ip6po_hlim = *hlimp;
2880 break;
2881 }
2882
2883 case IPV6_TCLASS:
2884 {
2885 int tclass;
2886
2887 if (len != sizeof(int))
2888 return (EINVAL);
2889 tclass = *(int *)buf;
2890 if (tclass < -1 || tclass > 255)
2891 return (EINVAL);
2892
2893 opt->ip6po_tclass = tclass;
2894 break;
2895 }
2896
2897 case IPV6_2292NEXTHOP:
2898 case IPV6_NEXTHOP:
2899 if (!priv)
2900 return (EPERM);
2901
2902 if (len == 0) { /* just remove the option */
2903 ip6_clearpktopts(opt, IPV6_NEXTHOP);
2904 break;
2905 }
2906
2907 /* check if cmsg_len is large enough for sa_len */
2908 if (len < sizeof(struct sockaddr) || len < *buf)
2909 return (EINVAL);
2910
2911 switch (((struct sockaddr *)buf)->sa_family) {
2912 case AF_INET6:
2913 {
2914 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2915
2916 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2917 return (EINVAL);
2918
2919 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
2920 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
2921 return (EINVAL);
2922 }
2923 if (IN6_IS_SCOPE_EMBED(&sa6->sin6_addr)) {
2924 if (sa6->sin6_scope_id < 0 ||
2925 if_indexlim <= sa6->sin6_scope_id ||
2926 !ifindex2ifnet[sa6->sin6_scope_id])
2927 return (EINVAL);
2928 sa6->sin6_addr.s6_addr16[1] =
2929 htonl(sa6->sin6_scope_id);
2930 } else if (sa6->sin6_scope_id)
2931 return (EINVAL);
2932 break;
2933 }
2934 case AF_LINK: /* eventually be supported? */
2935 default:
2936 return (EAFNOSUPPORT);
2937 }
2938
2939 /* turn off the previous option, then set the new option. */
2940 ip6_clearpktopts(opt, IPV6_NEXTHOP);
2941 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
2942 if (opt->ip6po_nexthop == NULL)
2943 return (ENOBUFS);
2944 bcopy(buf, opt->ip6po_nexthop, *buf);
2945 break;
2946
2947 case IPV6_2292HOPOPTS:
2948 case IPV6_HOPOPTS:
2949 {
2950 struct ip6_hbh *hbh;
2951 int hbhlen;
2952
2953 /*
2954 * XXX: We don't allow a non-privileged user to set ANY HbH
2955 * options, since per-option restriction has too much
2956 * overhead.
2957 */
2958 if (!priv)
2959 return (EPERM);
2960
2961 if (len == 0) {
2962 ip6_clearpktopts(opt, IPV6_HOPOPTS);
2963 break; /* just remove the option */
2964 }
2965
2966 /* message length validation */
2967 if (len < sizeof(struct ip6_hbh))
2968 return (EINVAL);
2969 hbh = (struct ip6_hbh *)buf;
2970 hbhlen = (hbh->ip6h_len + 1) << 3;
2971 if (len != hbhlen)
2972 return (EINVAL);
2973
2974 /* turn off the previous option, then set the new option. */
2975 ip6_clearpktopts(opt, IPV6_HOPOPTS);
2976 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
2977 if (opt->ip6po_hbh == NULL)
2978 return (ENOBUFS);
2979 bcopy(hbh, opt->ip6po_hbh, hbhlen);
2980
2981 break;
2982 }
2983
2984 case IPV6_2292DSTOPTS:
2985 case IPV6_DSTOPTS:
2986 case IPV6_RTHDRDSTOPTS:
2987 {
2988 struct ip6_dest *dest, **newdest = NULL;
2989 int destlen;
2990
2991 if (!priv) /* XXX: see the comment for IPV6_HOPOPTS */
2992 return (EPERM);
2993
2994 if (len == 0) {
2995 ip6_clearpktopts(opt, optname);
2996 break; /* just remove the option */
2997 }
2998
2999 /* message length validation */
3000 if (len < sizeof(struct ip6_dest))
3001 return (EINVAL);
3002 dest = (struct ip6_dest *)buf;
3003 destlen = (dest->ip6d_len + 1) << 3;
3004 if (len != destlen)
3005 return (EINVAL);
3006 /*
3007 * Determine the position that the destination options header
3008 * should be inserted; before or after the routing header.
3009 */
3010 switch (optname) {
3011 case IPV6_2292DSTOPTS:
3012 /*
3013 * The old advanced API is ambiguous on this point.
3014 * Our approach is to determine the position based
3015 * according to the existence of a routing header.
3016 * Note, however, that this depends on the order of the
3017 * extension headers in the ancillary data; the 1st
3018 * part of the destination options header must appear
3019 * before the routing header in the ancillary data,
3020 * too.
3021 * RFC3542 solved the ambiguity by introducing
3022 * separate ancillary data or option types.
3023 */
3024 if (opt->ip6po_rthdr == NULL)
3025 newdest = &opt->ip6po_dest1;
3026 else
3027 newdest = &opt->ip6po_dest2;
3028 break;
3029 case IPV6_RTHDRDSTOPTS:
3030 newdest = &opt->ip6po_dest1;
3031 break;
3032 case IPV6_DSTOPTS:
3033 newdest = &opt->ip6po_dest2;
3034 break;
3035 }
3036
3037 /* turn off the previous option, then set the new option. */
3038 ip6_clearpktopts(opt, optname);
3039 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3040 if (*newdest == NULL)
3041 return (ENOBUFS);
3042 bcopy(dest, *newdest, destlen);
3043
3044 break;
3045 }
3046
3047 case IPV6_2292RTHDR:
3048 case IPV6_RTHDR:
3049 {
3050 struct ip6_rthdr *rth;
3051 int rthlen;
3052
3053 if (len == 0) {
3054 ip6_clearpktopts(opt, IPV6_RTHDR);
3055 break; /* just remove the option */
3056 }
3057
3058 /* message length validation */
3059 if (len < sizeof(struct ip6_rthdr))
3060 return (EINVAL);
3061 rth = (struct ip6_rthdr *)buf;
3062 rthlen = (rth->ip6r_len + 1) << 3;
3063 if (len != rthlen)
3064 return (EINVAL);
3065
3066 switch (rth->ip6r_type) {
3067 case IPV6_RTHDR_TYPE_0:
3068 if (rth->ip6r_len == 0) /* must contain one addr */
3069 return (EINVAL);
3070 if (rth->ip6r_len % 2) /* length must be even */
3071 return (EINVAL);
3072 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3073 return (EINVAL);
3074 break;
3075 default:
3076 return (EINVAL); /* not supported */
3077 }
3078 /* turn off the previous option */
3079 ip6_clearpktopts(opt, IPV6_RTHDR);
3080 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3081 if (opt->ip6po_rthdr == NULL)
3082 return (ENOBUFS);
3083 bcopy(rth, opt->ip6po_rthdr, rthlen);
3084 break;
3085 }
3086
3087 case IPV6_USE_MIN_MTU:
3088 if (len != sizeof(int))
3089 return (EINVAL);
3090 minmtupolicy = *(int *)buf;
3091 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3092 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3093 minmtupolicy != IP6PO_MINMTU_ALL) {
3094 return (EINVAL);
3095 }
3096 opt->ip6po_minmtu = minmtupolicy;
3097 break;
3098
3099 case IPV6_DONTFRAG:
3100 if (len != sizeof(int))
3101 return (EINVAL);
3102
3103 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3104 /*
3105 * we ignore this option for TCP sockets.
3106 * (RFC3542 leaves this case unspecified.)
3107 */
3108 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3109 } else
3110 opt->ip6po_flags |= IP6PO_DONTFRAG;
3111 break;
3112
3113 default:
3114 return (ENOPROTOOPT);
3115 } /* end of switch */
3116
3117 return (0);
3118 }
3119
3120 /*
3121 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3122 * packet to the input queue of a specified interface. Note that this
3123 * calls the output routine of the loopback "driver", but with an interface
3124 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3125 */
3126 void
3127 ip6_mloopback(ifp, m, dst)
3128 struct ifnet *ifp;
3129 struct mbuf *m;
3130 struct sockaddr_in6 *dst;
3131 {
3132 struct mbuf *copym;
3133 struct ip6_hdr *ip6;
3134
3135 /*
3136 * Duplicate the packet.
3137 */
3138 copym = m_copy(m, 0, M_COPYALL);
3139 if (copym == NULL)
3140 return;
3141
3142 /*
3143 * Make sure to deep-copy IPv6 header portion in case the data
3144 * is in an mbuf cluster, so that we can safely override the IPv6
3145 * header portion later.
3146 */
3147 if ((copym->m_flags & M_EXT) != 0 ||
3148 copym->m_len < sizeof(struct ip6_hdr)) {
3149 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3150 if (copym == NULL)
3151 return;
3152 }
3153
3154 #ifdef DIAGNOSTIC
3155 if (copym->m_len < sizeof(*ip6)) {
3156 m_freem(copym);
3157 return;
3158 }
3159 #endif
3160
3161 ip6 = mtod(copym, struct ip6_hdr *);
3162 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
3163 ip6->ip6_src.s6_addr16[1] = 0;
3164 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
3165 ip6->ip6_dst.s6_addr16[1] = 0;
3166
3167 (void)looutput(ifp, copym, (struct sockaddr *)dst, NULL);
3168 }
3169
3170 /*
3171 * Chop IPv6 header off from the payload.
3172 */
3173 static int
3174 ip6_splithdr(m, exthdrs)
3175 struct mbuf *m;
3176 struct ip6_exthdrs *exthdrs;
3177 {
3178 struct mbuf *mh;
3179 struct ip6_hdr *ip6;
3180
3181 ip6 = mtod(m, struct ip6_hdr *);
3182 if (m->m_len > sizeof(*ip6)) {
3183 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3184 if (mh == 0) {
3185 m_freem(m);
3186 return ENOBUFS;
3187 }
3188 M_MOVE_PKTHDR(mh, m);
3189 MH_ALIGN(mh, sizeof(*ip6));
3190 m->m_len -= sizeof(*ip6);
3191 m->m_data += sizeof(*ip6);
3192 mh->m_next = m;
3193 m = mh;
3194 m->m_len = sizeof(*ip6);
3195 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3196 }
3197 exthdrs->ip6e_ip6 = m;
3198 return 0;
3199 }
3200
3201 /*
3202 * Compute IPv6 extension header length.
3203 */
3204 int
3205 ip6_optlen(inp)
3206 struct inpcb *inp;
3207 {
3208 int len;
3209
3210 if (!inp->inp_outputopts6)
3211 return 0;
3212
3213 len = 0;
3214 #define elen(x) \
3215 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3216
3217 len += elen(inp->inp_outputopts6->ip6po_hbh);
3218 len += elen(inp->inp_outputopts6->ip6po_dest1);
3219 len += elen(inp->inp_outputopts6->ip6po_rthdr);
3220 len += elen(inp->inp_outputopts6->ip6po_dest2);
3221 return len;
3222 #undef elen
3223 }