1 /* $OpenBSD: tcp_usrreq.c,v 1.91 2007/06/25 12:17:43 markus Exp $ */
2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1988, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
33 *
34 * NRL grants permission for redistribution and use in source and binary
35 * forms, with or without modification, of the software and documentation
36 * created at NRL provided that the following conditions are met:
37 *
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgements:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * This product includes software developed at the Information
48 * Technology Division, US Naval Research Laboratory.
49 * 4. Neither the name of the NRL nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 *
65 * The views and conclusions contained in the software and documentation
66 * are those of the authors and should not be interpreted as representing
67 * official policies, either expressed or implied, of the US Naval
68 * Research Laboratory (NRL).
69 */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/protosw.h>
77 #include <sys/stat.h>
78 #include <sys/sysctl.h>
79 #include <sys/domain.h>
80 #include <sys/kernel.h>
81
82 #include <dev/rndvar.h>
83
84 #include <net/if.h>
85 #include <net/route.h>
86
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/in_var.h>
90 #include <netinet/ip.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/ip_var.h>
93 #include <netinet/tcp.h>
94 #include <netinet/tcp_fsm.h>
95 #include <netinet/tcp_seq.h>
96 #include <netinet/tcp_timer.h>
97 #include <netinet/tcp_var.h>
98 #include <netinet/tcpip.h>
99 #include <netinet/tcp_debug.h>
100
101 /*
102 * TCP protocol interface to socket abstraction.
103 */
104 extern char *tcpstates[];
105 extern int tcptv_keep_init;
106
107 extern int tcp_rst_ppslim;
108
109 /* from in_pcb.c */
110 extern struct baddynamicports baddynamicports;
111
112 #ifndef TCP_SENDSPACE
113 #define TCP_SENDSPACE 1024*16
114 #endif
115 u_int tcp_sendspace = TCP_SENDSPACE;
116 #ifndef TCP_RECVSPACE
117 #define TCP_RECVSPACE 1024*16
118 #endif
119 u_int tcp_recvspace = TCP_RECVSPACE;
120
121 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS;
122
123 struct inpcbtable tcbtable;
124
125 int tcp_ident(void *, size_t *, void *, size_t, int);
126
127 #ifdef INET6
128 int
129 tcp6_usrreq(so, req, m, nam, control, p)
130 struct socket *so;
131 int req;
132 struct mbuf *m, *nam, *control;
133 struct proc *p;
134 {
135
136 return tcp_usrreq(so, req, m, nam, control);
137 }
138 #endif
139
140 /*
141 * Process a TCP user request for TCP tb. If this is a send request
142 * then m is the mbuf chain of send data. If this is a timer expiration
143 * (called from the software clock routine), then timertype tells which timer.
144 */
145 /*ARGSUSED*/
146 int
147 tcp_usrreq(so, req, m, nam, control)
148 struct socket *so;
149 int req;
150 struct mbuf *m, *nam, *control;
151 {
152 struct sockaddr_in *sin;
153 struct inpcb *inp;
154 struct tcpcb *tp = NULL;
155 int s;
156 int error = 0;
157 short ostate;
158
159 if (req == PRU_CONTROL) {
160 #ifdef INET6
161 if (sotopf(so) == PF_INET6)
162 return in6_control(so, (u_long)m, (caddr_t)nam,
163 (struct ifnet *)control, 0);
164 else
165 #endif /* INET6 */
166 return (in_control(so, (u_long)m, (caddr_t)nam,
167 (struct ifnet *)control));
168 }
169 if (control && control->m_len) {
170 m_freem(control);
171 if (m)
172 m_freem(m);
173 return (EINVAL);
174 }
175
176 s = splsoftnet();
177 inp = sotoinpcb(so);
178 /*
179 * When a TCP is attached to a socket, then there will be
180 * a (struct inpcb) pointed at by the socket, and this
181 * structure will point at a subsidiary (struct tcpcb).
182 */
183 if (inp == 0 && req != PRU_ATTACH) {
184 splx(s);
185 /*
186 * The following corrects an mbuf leak under rare
187 * circumstances
188 */
189 if (m && (req == PRU_SEND || req == PRU_SENDOOB))
190 m_freem(m);
191 return (EINVAL); /* XXX */
192 }
193 if (inp) {
194 tp = intotcpcb(inp);
195 /* WHAT IF TP IS 0? */
196 #ifdef KPROF
197 tcp_acounts[tp->t_state][req]++;
198 #endif
199 ostate = tp->t_state;
200 } else
201 ostate = 0;
202 switch (req) {
203
204 /*
205 * TCP attaches to socket via PRU_ATTACH, reserving space,
206 * and an internet control block.
207 */
208 case PRU_ATTACH:
209 if (inp) {
210 error = EISCONN;
211 break;
212 }
213 error = tcp_attach(so);
214 if (error)
215 break;
216 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
217 so->so_linger = TCP_LINGERTIME;
218 tp = sototcpcb(so);
219 break;
220
221 /*
222 * PRU_DETACH detaches the TCP protocol from the socket.
223 * If the protocol state is non-embryonic, then can't
224 * do this directly: have to initiate a PRU_DISCONNECT,
225 * which may finish later; embryonic TCB's can just
226 * be discarded here.
227 */
228 case PRU_DETACH:
229 tp = tcp_disconnect(tp);
230 break;
231
232 /*
233 * Give the socket an address.
234 */
235 case PRU_BIND:
236 #ifdef INET6
237 if (inp->inp_flags & INP_IPV6)
238 error = in6_pcbbind(inp, nam);
239 else
240 #endif
241 error = in_pcbbind(inp, nam);
242 if (error)
243 break;
244 break;
245
246 /*
247 * Prepare to accept connections.
248 */
249 case PRU_LISTEN:
250 if (inp->inp_lport == 0) {
251 #ifdef INET6
252 if (inp->inp_flags & INP_IPV6)
253 error = in6_pcbbind(inp, NULL);
254 else
255 #endif
256 error = in_pcbbind(inp, NULL);
257 }
258 /* If the in_pcbbind() above is called, the tp->pf
259 should still be whatever it was before. */
260 if (error == 0)
261 tp->t_state = TCPS_LISTEN;
262 break;
263
264 /*
265 * Initiate connection to peer.
266 * Create a template for use in transmissions on this connection.
267 * Enter SYN_SENT state, and mark socket as connecting.
268 * Start keep-alive timer, and seed output sequence space.
269 * Send initial segment on connection.
270 */
271 case PRU_CONNECT:
272 sin = mtod(nam, struct sockaddr_in *);
273
274 #ifdef INET6
275 if (sin->sin_family == AF_INET6) {
276 struct in6_addr *in6_addr = &mtod(nam,
277 struct sockaddr_in6 *)->sin6_addr;
278
279 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) ||
280 IN6_IS_ADDR_MULTICAST(in6_addr) ||
281 (IN6_IS_ADDR_V4MAPPED(in6_addr) &&
282 ((in6_addr->s6_addr32[3] == INADDR_ANY) ||
283 IN_MULTICAST(in6_addr->s6_addr32[3]) ||
284 in_broadcast(sin->sin_addr, NULL)))) {
285 error = EINVAL;
286 break;
287 }
288
289 if (inp->inp_lport == 0) {
290 error = in6_pcbbind(inp, NULL);
291 if (error)
292 break;
293 }
294 error = in6_pcbconnect(inp, nam);
295 } else if (sin->sin_family == AF_INET)
296 #endif /* INET6 */
297 {
298 if ((sin->sin_addr.s_addr == INADDR_ANY) ||
299 IN_MULTICAST(sin->sin_addr.s_addr) ||
300 in_broadcast(sin->sin_addr, NULL)) {
301 error = EINVAL;
302 break;
303 }
304
305 if (inp->inp_lport == 0) {
306 error = in_pcbbind(inp, NULL);
307 if (error)
308 break;
309 }
310 error = in_pcbconnect(inp, nam);
311 }
312
313 if (error)
314 break;
315
316 tp->t_template = tcp_template(tp);
317 if (tp->t_template == 0) {
318 in_pcbdisconnect(inp);
319 error = ENOBUFS;
320 break;
321 }
322
323 so->so_state |= SS_CONNECTOUT;
324
325 /* Compute window scaling to request. */
326 tcp_rscale(tp, so->so_rcv.sb_hiwat);
327
328 soisconnecting(so);
329 tcpstat.tcps_connattempt++;
330 tp->t_state = TCPS_SYN_SENT;
331 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
332 #ifdef TCP_COMPAT_42
333 tp->iss = tcp_iss;
334 tcp_iss += TCP_ISSINCR/2;
335 #else /* TCP_COMPAT_42 */
336 tcp_set_iss_tsm(tp);
337 #endif /* !TCP_COMPAT_42 */
338 tcp_sendseqinit(tp);
339 #if defined(TCP_SACK)
340 tp->snd_last = tp->snd_una;
341 #endif
342 #if defined(TCP_SACK) && defined(TCP_FACK)
343 tp->snd_fack = tp->snd_una;
344 tp->retran_data = 0;
345 tp->snd_awnd = 0;
346 #endif
347 error = tcp_output(tp);
348 break;
349
350 /*
351 * Create a TCP connection between two sockets.
352 */
353 case PRU_CONNECT2:
354 error = EOPNOTSUPP;
355 break;
356
357 /*
358 * Initiate disconnect from peer.
359 * If connection never passed embryonic stage, just drop;
360 * else if don't need to let data drain, then can just drop anyways,
361 * else have to begin TCP shutdown process: mark socket disconnecting,
362 * drain unread data, state switch to reflect user close, and
363 * send segment (e.g. FIN) to peer. Socket will be really disconnected
364 * when peer sends FIN and acks ours.
365 *
366 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
367 */
368 case PRU_DISCONNECT:
369 tp = tcp_disconnect(tp);
370 break;
371
372 /*
373 * Accept a connection. Essentially all the work is
374 * done at higher levels; just return the address
375 * of the peer, storing through addr.
376 */
377 case PRU_ACCEPT:
378 #ifdef INET6
379 if (inp->inp_flags & INP_IPV6)
380 in6_setpeeraddr(inp, nam);
381 else
382 #endif
383 in_setpeeraddr(inp, nam);
384 break;
385
386 /*
387 * Mark the connection as being incapable of further output.
388 */
389 case PRU_SHUTDOWN:
390 if (so->so_state & SS_CANTSENDMORE)
391 break;
392 socantsendmore(so);
393 tp = tcp_usrclosed(tp);
394 if (tp)
395 error = tcp_output(tp);
396 break;
397
398 /*
399 * After a receive, possibly send window update to peer.
400 */
401 case PRU_RCVD:
402 /*
403 * soreceive() calls this function when a user receives
404 * ancillary data on a listening socket. We don't call
405 * tcp_output in such a case, since there is no header
406 * template for a listening socket and hence the kernel
407 * will panic.
408 */
409 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
410 (void) tcp_output(tp);
411 break;
412
413 /*
414 * Do a send by putting data in output queue and updating urgent
415 * marker if URG set. Possibly send more data.
416 */
417 case PRU_SEND:
418 sbappendstream(&so->so_snd, m);
419 error = tcp_output(tp);
420 break;
421
422 /*
423 * Abort the TCP.
424 */
425 case PRU_ABORT:
426 tp = tcp_drop(tp, ECONNABORTED);
427 break;
428
429 case PRU_SENSE:
430 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
431 splx(s);
432 return (0);
433
434 case PRU_RCVOOB:
435 if ((so->so_oobmark == 0 &&
436 (so->so_state & SS_RCVATMARK) == 0) ||
437 so->so_options & SO_OOBINLINE ||
438 tp->t_oobflags & TCPOOB_HADDATA) {
439 error = EINVAL;
440 break;
441 }
442 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
443 error = EWOULDBLOCK;
444 break;
445 }
446 m->m_len = 1;
447 *mtod(m, caddr_t) = tp->t_iobc;
448 if (((long)nam & MSG_PEEK) == 0)
449 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
450 break;
451
452 case PRU_SENDOOB:
453 if (sbspace(&so->so_snd) < -512) {
454 m_freem(m);
455 error = ENOBUFS;
456 break;
457 }
458 /*
459 * According to RFC961 (Assigned Protocols),
460 * the urgent pointer points to the last octet
461 * of urgent data. We continue, however,
462 * to consider it to indicate the first octet
463 * of data past the urgent section.
464 * Otherwise, snd_up should be one lower.
465 */
466 sbappendstream(&so->so_snd, m);
467 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
468 tp->t_force = 1;
469 error = tcp_output(tp);
470 tp->t_force = 0;
471 break;
472
473 case PRU_SOCKADDR:
474 #ifdef INET6
475 if (inp->inp_flags & INP_IPV6)
476 in6_setsockaddr(inp, nam);
477 else
478 #endif
479 in_setsockaddr(inp, nam);
480 break;
481
482 case PRU_PEERADDR:
483 #ifdef INET6
484 if (inp->inp_flags & INP_IPV6)
485 in6_setpeeraddr(inp, nam);
486 else
487 #endif
488 in_setpeeraddr(inp, nam);
489 break;
490
491 default:
492 panic("tcp_usrreq");
493 }
494 if (tp && (so->so_options & SO_DEBUG))
495 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0);
496 splx(s);
497 return (error);
498 }
499
500 int
501 tcp_ctloutput(op, so, level, optname, mp)
502 int op;
503 struct socket *so;
504 int level, optname;
505 struct mbuf **mp;
506 {
507 int error = 0, s;
508 struct inpcb *inp;
509 struct tcpcb *tp;
510 struct mbuf *m;
511 int i;
512
513 s = splsoftnet();
514 inp = sotoinpcb(so);
515 if (inp == NULL) {
516 splx(s);
517 if (op == PRCO_SETOPT && *mp)
518 (void) m_free(*mp);
519 return (ECONNRESET);
520 }
521 #ifdef INET6
522 tp = intotcpcb(inp);
523 #endif /* INET6 */
524 if (level != IPPROTO_TCP) {
525 switch (so->so_proto->pr_domain->dom_family) {
526 #ifdef INET6
527 case PF_INET6:
528 error = ip6_ctloutput(op, so, level, optname, mp);
529 break;
530 #endif /* INET6 */
531 case PF_INET:
532 error = ip_ctloutput(op, so, level, optname, mp);
533 break;
534 default:
535 error = EAFNOSUPPORT; /*?*/
536 break;
537 }
538 splx(s);
539 return (error);
540 }
541 #ifndef INET6
542 tp = intotcpcb(inp);
543 #endif /* !INET6 */
544
545 switch (op) {
546
547 case PRCO_SETOPT:
548 m = *mp;
549 switch (optname) {
550
551 case TCP_NODELAY:
552 if (m == NULL || m->m_len < sizeof (int))
553 error = EINVAL;
554 else if (*mtod(m, int *))
555 tp->t_flags |= TF_NODELAY;
556 else
557 tp->t_flags &= ~TF_NODELAY;
558 break;
559
560 case TCP_MAXSEG:
561 if (m == NULL || m->m_len < sizeof (int)) {
562 error = EINVAL;
563 break;
564 }
565
566 i = *mtod(m, int *);
567 if (i > 0 && i <= tp->t_maxseg)
568 tp->t_maxseg = i;
569 else
570 error = EINVAL;
571 break;
572
573 #ifdef TCP_SACK
574 case TCP_SACK_ENABLE:
575 if (m == NULL || m->m_len < sizeof (int)) {
576 error = EINVAL;
577 break;
578 }
579
580 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
581 error = EPERM;
582 break;
583 }
584
585 if (tp->t_flags & TF_SIGNATURE) {
586 error = EPERM;
587 break;
588 }
589
590 if (*mtod(m, int *))
591 tp->sack_enable = 1;
592 else
593 tp->sack_enable = 0;
594 break;
595 #endif
596 #ifdef TCP_SIGNATURE
597 case TCP_MD5SIG:
598 if (m == NULL || m->m_len < sizeof (int)) {
599 error = EINVAL;
600 break;
601 }
602
603 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
604 error = EPERM;
605 break;
606 }
607
608 if (*mtod(m, int *)) {
609 tp->t_flags |= TF_SIGNATURE;
610 #ifdef TCP_SACK
611 tp->sack_enable = 0;
612 #endif /* TCP_SACK */
613 } else
614 tp->t_flags &= ~TF_SIGNATURE;
615 break;
616 #endif /* TCP_SIGNATURE */
617 default:
618 error = ENOPROTOOPT;
619 break;
620 }
621 if (m)
622 (void) m_free(m);
623 break;
624
625 case PRCO_GETOPT:
626 *mp = m = m_get(M_WAIT, MT_SOOPTS);
627 m->m_len = sizeof(int);
628
629 switch (optname) {
630 case TCP_NODELAY:
631 *mtod(m, int *) = tp->t_flags & TF_NODELAY;
632 break;
633 case TCP_MAXSEG:
634 *mtod(m, int *) = tp->t_maxseg;
635 break;
636 #ifdef TCP_SACK
637 case TCP_SACK_ENABLE:
638 *mtod(m, int *) = tp->sack_enable;
639 break;
640 #endif
641 #ifdef TCP_SIGNATURE
642 case TCP_MD5SIG:
643 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE;
644 break;
645 #endif
646 default:
647 error = ENOPROTOOPT;
648 break;
649 }
650 break;
651 }
652 splx(s);
653 return (error);
654 }
655
656 /*
657 * Attach TCP protocol to socket, allocating
658 * internet protocol control block, tcp control block,
659 * bufer space, and entering LISTEN state if to accept connections.
660 */
661 int
662 tcp_attach(so)
663 struct socket *so;
664 {
665 struct tcpcb *tp;
666 struct inpcb *inp;
667 int error;
668
669 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
670 error = soreserve(so, tcp_sendspace, tcp_recvspace);
671 if (error)
672 return (error);
673 }
674 error = in_pcballoc(so, &tcbtable);
675 if (error)
676 return (error);
677 inp = sotoinpcb(so);
678 tp = tcp_newtcpcb(inp);
679 if (tp == NULL) {
680 int nofd = so->so_state & SS_NOFDREF; /* XXX */
681
682 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
683 in_pcbdetach(inp);
684 so->so_state |= nofd;
685 return (ENOBUFS);
686 }
687 tp->t_state = TCPS_CLOSED;
688 #ifdef INET6
689 /* we disallow IPv4 mapped address completely. */
690 if (inp->inp_flags & INP_IPV6)
691 tp->pf = PF_INET6;
692 else
693 tp->pf = PF_INET;
694 #else
695 tp->pf = PF_INET;
696 #endif
697 return (0);
698 }
699
700 /*
701 * Initiate (or continue) disconnect.
702 * If embryonic state, just send reset (once).
703 * If in ``let data drain'' option and linger null, just drop.
704 * Otherwise (hard), mark socket disconnecting and drop
705 * current input data; switch states based on user close, and
706 * send segment to peer (with FIN).
707 */
708 struct tcpcb *
709 tcp_disconnect(tp)
710 struct tcpcb *tp;
711 {
712 struct socket *so = tp->t_inpcb->inp_socket;
713
714 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
715 tp = tcp_close(tp);
716 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
717 tp = tcp_drop(tp, 0);
718 else {
719 soisdisconnecting(so);
720 sbflush(&so->so_rcv);
721 tp = tcp_usrclosed(tp);
722 if (tp)
723 (void) tcp_output(tp);
724 }
725 return (tp);
726 }
727
728 /*
729 * User issued close, and wish to trail through shutdown states:
730 * if never received SYN, just forget it. If got a SYN from peer,
731 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
732 * If already got a FIN from peer, then almost done; go to LAST_ACK
733 * state. In all other cases, have already sent FIN to peer (e.g.
734 * after PRU_SHUTDOWN), and just have to play tedious game waiting
735 * for peer to send FIN or not respond to keep-alives, etc.
736 * We can let the user exit from the close as soon as the FIN is acked.
737 */
738 struct tcpcb *
739 tcp_usrclosed(tp)
740 struct tcpcb *tp;
741 {
742
743 switch (tp->t_state) {
744
745 case TCPS_CLOSED:
746 case TCPS_LISTEN:
747 case TCPS_SYN_SENT:
748 tp->t_state = TCPS_CLOSED;
749 tp = tcp_close(tp);
750 break;
751
752 case TCPS_SYN_RECEIVED:
753 case TCPS_ESTABLISHED:
754 tp->t_state = TCPS_FIN_WAIT_1;
755 break;
756
757 case TCPS_CLOSE_WAIT:
758 tp->t_state = TCPS_LAST_ACK;
759 break;
760 }
761 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
762 soisdisconnected(tp->t_inpcb->inp_socket);
763 /*
764 * If we are in FIN_WAIT_2, we arrived here because the
765 * application did a shutdown of the send side. Like the
766 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
767 * a full close, we start a timer to make sure sockets are
768 * not left in FIN_WAIT_2 forever.
769 */
770 if (tp->t_state == TCPS_FIN_WAIT_2)
771 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
772 }
773 return (tp);
774 }
775
776 /*
777 * Look up a socket for ident or tcpdrop, ...
778 */
779 int
780 tcp_ident(oldp, oldlenp, newp, newlen, dodrop)
781 void *oldp;
782 size_t *oldlenp;
783 void *newp;
784 size_t newlen;
785 int dodrop;
786 {
787 int error = 0, s;
788 struct tcp_ident_mapping tir;
789 struct inpcb *inp;
790 struct tcpcb *tp = NULL;
791 struct sockaddr_in *fin, *lin;
792 #ifdef INET6
793 struct sockaddr_in6 *fin6, *lin6;
794 struct in6_addr f6, l6;
795 #endif
796 if (dodrop) {
797 if (oldp != NULL || *oldlenp != 0)
798 return (EINVAL);
799 if (newp == NULL)
800 return (EPERM);
801 if (newlen < sizeof(tir))
802 return (ENOMEM);
803 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 )
804 return (error);
805 } else {
806 if (oldp == NULL)
807 return (EINVAL);
808 if (*oldlenp < sizeof(tir))
809 return (ENOMEM);
810 if (newp != NULL || newlen != 0)
811 return (EINVAL);
812 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
813 return (error);
814 }
815 switch (tir.faddr.ss_family) {
816 #ifdef INET6
817 case AF_INET6:
818 fin6 = (struct sockaddr_in6 *)&tir.faddr;
819 error = in6_embedscope(&f6, fin6, NULL, NULL);
820 if (error)
821 return EINVAL; /*?*/
822 lin6 = (struct sockaddr_in6 *)&tir.laddr;
823 error = in6_embedscope(&l6, lin6, NULL, NULL);
824 if (error)
825 return EINVAL; /*?*/
826 break;
827 #endif
828 case AF_INET:
829 fin = (struct sockaddr_in *)&tir.faddr;
830 lin = (struct sockaddr_in *)&tir.laddr;
831 break;
832 default:
833 return (EINVAL);
834 }
835
836 s = splsoftnet();
837 switch (tir.faddr.ss_family) {
838 #ifdef INET6
839 case AF_INET6:
840 inp = in6_pcbhashlookup(&tcbtable, &f6,
841 fin6->sin6_port, &l6, lin6->sin6_port);
842 break;
843 #endif
844 case AF_INET:
845 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr,
846 fin->sin_port, lin->sin_addr, lin->sin_port);
847 break;
848 }
849
850 if (dodrop) {
851 if (inp && (tp = intotcpcb(inp)) &&
852 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0))
853 tp = tcp_drop(tp, ECONNABORTED);
854 else
855 error = ESRCH;
856 splx(s);
857 return (error);
858 }
859
860 if (inp == NULL) {
861 ++tcpstat.tcps_pcbhashmiss;
862 switch (tir.faddr.ss_family) {
863 #ifdef INET6
864 case AF_INET6:
865 inp = in6_pcblookup_listen(&tcbtable,
866 &l6, lin6->sin6_port, 0);
867 break;
868 #endif
869 case AF_INET:
870 inp = in_pcblookup_listen(&tcbtable,
871 lin->sin_addr, lin->sin_port, 0);
872 break;
873 }
874 }
875
876 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
877 tir.ruid = inp->inp_socket->so_ruid;
878 tir.euid = inp->inp_socket->so_euid;
879 } else {
880 tir.ruid = -1;
881 tir.euid = -1;
882 }
883 splx(s);
884
885 *oldlenp = sizeof (tir);
886 error = copyout((void *)&tir, oldp, sizeof (tir));
887 return (error);
888 }
889
890 /*
891 * Sysctl for tcp variables.
892 */
893 int
894 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
895 int *name;
896 u_int namelen;
897 void *oldp;
898 size_t *oldlenp;
899 void *newp;
900 size_t newlen;
901 {
902 int error, nval;
903
904 /* All sysctl names at this level are terminal. */
905 if (namelen != 1)
906 return (ENOTDIR);
907
908 switch (name[0]) {
909 #ifdef TCP_SACK
910 case TCPCTL_SACK:
911 return (sysctl_int(oldp, oldlenp, newp, newlen,
912 &tcp_do_sack));
913 #endif
914 case TCPCTL_SLOWHZ:
915 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
916
917 case TCPCTL_BADDYNAMIC:
918 return (sysctl_struct(oldp, oldlenp, newp, newlen,
919 baddynamicports.tcp, sizeof(baddynamicports.tcp)));
920
921 case TCPCTL_IDENT:
922 return (tcp_ident(oldp, oldlenp, newp, newlen, 0));
923
924 case TCPCTL_DROP:
925 return (tcp_ident(oldp, oldlenp, newp, newlen, 1));
926
927 #ifdef TCP_ECN
928 case TCPCTL_ECN:
929 return (sysctl_int(oldp, oldlenp, newp, newlen,
930 &tcp_do_ecn));
931 #endif
932 case TCPCTL_REASS_LIMIT:
933 nval = tcp_reass_limit;
934 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
935 if (error)
936 return (error);
937 if (nval != tcp_reass_limit) {
938 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
939 if (error)
940 return (error);
941 tcp_reass_limit = nval;
942 }
943 return (0);
944 #ifdef TCP_SACK
945 case TCPCTL_SACKHOLE_LIMIT:
946 nval = tcp_sackhole_limit;
947 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
948 if (error)
949 return (error);
950 if (nval != tcp_sackhole_limit) {
951 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0);
952 if (error)
953 return (error);
954 tcp_sackhole_limit = nval;
955 }
956 return (0);
957 #endif
958 default:
959 if (name[0] < TCPCTL_MAXID)
960 return (sysctl_int_arr(tcpctl_vars, name, namelen,
961 oldp, oldlenp, newp, newlen));
962 return (ENOPROTOOPT);
963 }
964 /* NOTREACHED */
965 }