root/net/pf_norm.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. TAILQ_HEAD
  2. pf_normalize_init
  3. pf_frag_compare
  4. pf_purge_expired_fragments
  5. pf_flush_fragments
  6. pf_free_fragment
  7. pf_ip2key
  8. pf_find_fragment
  9. pf_remove_fragment
  10. pf_reassemble
  11. pf_fragcache
  12. pf_normalize_ip
  13. pf_normalize_ip6
  14. pf_normalize_tcp
  15. pf_normalize_tcp_init
  16. pf_normalize_tcp_cleanup
  17. pf_normalize_tcp_stateful
  18. pf_normalize_tcpopt

    1 /*      $OpenBSD: pf_norm.c,v 1.109 2007/05/28 17:16:39 henning Exp $ */
    2 
    3 /*
    4  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include "pflog.h"
   29 
   30 #include <sys/param.h>
   31 #include <sys/systm.h>
   32 #include <sys/mbuf.h>
   33 #include <sys/filio.h>
   34 #include <sys/fcntl.h>
   35 #include <sys/socket.h>
   36 #include <sys/kernel.h>
   37 #include <sys/time.h>
   38 #include <sys/pool.h>
   39 
   40 #include <dev/rndvar.h>
   41 #include <net/if.h>
   42 #include <net/if_types.h>
   43 #include <net/bpf.h>
   44 #include <net/route.h>
   45 #include <net/if_pflog.h>
   46 
   47 #include <netinet/in.h>
   48 #include <netinet/in_var.h>
   49 #include <netinet/in_systm.h>
   50 #include <netinet/ip.h>
   51 #include <netinet/ip_var.h>
   52 #include <netinet/tcp.h>
   53 #include <netinet/tcp_seq.h>
   54 #include <netinet/udp.h>
   55 #include <netinet/ip_icmp.h>
   56 
   57 #ifdef INET6
   58 #include <netinet/ip6.h>
   59 #endif /* INET6 */
   60 
   61 #include <net/pfvar.h>
   62 
   63 struct pf_frent {
   64         LIST_ENTRY(pf_frent) fr_next;
   65         struct ip *fr_ip;
   66         struct mbuf *fr_m;
   67 };
   68 
   69 struct pf_frcache {
   70         LIST_ENTRY(pf_frcache) fr_next;
   71         uint16_t        fr_off;
   72         uint16_t        fr_end;
   73 };
   74 
   75 #define PFFRAG_SEENLAST 0x0001          /* Seen the last fragment for this */
   76 #define PFFRAG_NOBUFFER 0x0002          /* Non-buffering fragment cache */
   77 #define PFFRAG_DROP     0x0004          /* Drop all fragments */
   78 #define BUFFER_FRAGMENTS(fr)    (!((fr)->fr_flags & PFFRAG_NOBUFFER))
   79 
   80 struct pf_fragment {
   81         RB_ENTRY(pf_fragment) fr_entry;
   82         TAILQ_ENTRY(pf_fragment) frag_next;
   83         struct in_addr  fr_src;
   84         struct in_addr  fr_dst;
   85         u_int8_t        fr_p;           /* protocol of this fragment */
   86         u_int8_t        fr_flags;       /* status flags */
   87         u_int16_t       fr_id;          /* fragment id for reassemble */
   88         u_int16_t       fr_max;         /* fragment data max */
   89         u_int32_t       fr_timeout;
   90 #define fr_queue        fr_u.fru_queue
   91 #define fr_cache        fr_u.fru_cache
   92         union {
   93                 LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
   94                 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
   95         } fr_u;
   96 };
   97 
   98 TAILQ_HEAD(pf_fragqueue, pf_fragment)   pf_fragqueue;
   99 TAILQ_HEAD(pf_cachequeue, pf_fragment)  pf_cachequeue;
  100 
  101 static __inline int      pf_frag_compare(struct pf_fragment *,
  102                             struct pf_fragment *);
  103 RB_HEAD(pf_frag_tree, pf_fragment)      pf_frag_tree, pf_cache_tree;
  104 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
  105 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
  106 
  107 /* Private prototypes */
  108 void                     pf_ip2key(struct pf_fragment *, struct ip *);
  109 void                     pf_remove_fragment(struct pf_fragment *);
  110 void                     pf_flush_fragments(void);
  111 void                     pf_free_fragment(struct pf_fragment *);
  112 struct pf_fragment      *pf_find_fragment(struct ip *, struct pf_frag_tree *);
  113 struct mbuf             *pf_reassemble(struct mbuf **, struct pf_fragment **,
  114                             struct pf_frent *, int);
  115 struct mbuf             *pf_fragcache(struct mbuf **, struct ip*,
  116                             struct pf_fragment **, int, int, int *);
  117 int                      pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
  118                             struct tcphdr *, int);
  119 
  120 #define DPFPRINTF(x) do {                               \
  121         if (pf_status.debug >= PF_DEBUG_MISC) {         \
  122                 printf("%s: ", __func__);               \
  123                 printf x ;                              \
  124         }                                               \
  125 } while(0)
  126 
  127 /* Globals */
  128 struct pool              pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
  129 struct pool              pf_state_scrub_pl;
  130 int                      pf_nfrents, pf_ncache;
  131 
  132 void
  133 pf_normalize_init(void)
  134 {
  135         pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
  136             NULL);
  137         pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
  138             NULL);
  139         pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
  140             "pffrcache", NULL);
  141         pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
  142             NULL);
  143         pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
  144             "pfstscr", NULL);
  145 
  146         pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
  147         pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
  148         pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
  149         pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
  150 
  151         TAILQ_INIT(&pf_fragqueue);
  152         TAILQ_INIT(&pf_cachequeue);
  153 }
  154 
  155 static __inline int
  156 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
  157 {
  158         int     diff;
  159 
  160         if ((diff = a->fr_id - b->fr_id))
  161                 return (diff);
  162         else if ((diff = a->fr_p - b->fr_p))
  163                 return (diff);
  164         else if (a->fr_src.s_addr < b->fr_src.s_addr)
  165                 return (-1);
  166         else if (a->fr_src.s_addr > b->fr_src.s_addr)
  167                 return (1);
  168         else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
  169                 return (-1);
  170         else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
  171                 return (1);
  172         return (0);
  173 }
  174 
  175 void
  176 pf_purge_expired_fragments(void)
  177 {
  178         struct pf_fragment      *frag;
  179         u_int32_t                expire = time_second -
  180                                     pf_default_rule.timeout[PFTM_FRAG];
  181 
  182         while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
  183                 KASSERT(BUFFER_FRAGMENTS(frag));
  184                 if (frag->fr_timeout > expire)
  185                         break;
  186 
  187                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
  188                 pf_free_fragment(frag);
  189         }
  190 
  191         while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
  192                 KASSERT(!BUFFER_FRAGMENTS(frag));
  193                 if (frag->fr_timeout > expire)
  194                         break;
  195 
  196                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
  197                 pf_free_fragment(frag);
  198                 KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
  199                     TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
  200         }
  201 }
  202 
  203 /*
  204  * Try to flush old fragments to make space for new ones
  205  */
  206 
  207 void
  208 pf_flush_fragments(void)
  209 {
  210         struct pf_fragment      *frag;
  211         int                      goal;
  212 
  213         goal = pf_nfrents * 9 / 10;
  214         DPFPRINTF(("trying to free > %d frents\n",
  215             pf_nfrents - goal));
  216         while (goal < pf_nfrents) {
  217                 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
  218                 if (frag == NULL)
  219                         break;
  220                 pf_free_fragment(frag);
  221         }
  222 
  223 
  224         goal = pf_ncache * 9 / 10;
  225         DPFPRINTF(("trying to free > %d cache entries\n",
  226             pf_ncache - goal));
  227         while (goal < pf_ncache) {
  228                 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
  229                 if (frag == NULL)
  230                         break;
  231                 pf_free_fragment(frag);
  232         }
  233 }
  234 
  235 /* Frees the fragments and all associated entries */
  236 
  237 void
  238 pf_free_fragment(struct pf_fragment *frag)
  239 {
  240         struct pf_frent         *frent;
  241         struct pf_frcache       *frcache;
  242 
  243         /* Free all fragments */
  244         if (BUFFER_FRAGMENTS(frag)) {
  245                 for (frent = LIST_FIRST(&frag->fr_queue); frent;
  246                     frent = LIST_FIRST(&frag->fr_queue)) {
  247                         LIST_REMOVE(frent, fr_next);
  248 
  249                         m_freem(frent->fr_m);
  250                         pool_put(&pf_frent_pl, frent);
  251                         pf_nfrents--;
  252                 }
  253         } else {
  254                 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
  255                     frcache = LIST_FIRST(&frag->fr_cache)) {
  256                         LIST_REMOVE(frcache, fr_next);
  257 
  258                         KASSERT(LIST_EMPTY(&frag->fr_cache) ||
  259                             LIST_FIRST(&frag->fr_cache)->fr_off >
  260                             frcache->fr_end);
  261 
  262                         pool_put(&pf_cent_pl, frcache);
  263                         pf_ncache--;
  264                 }
  265         }
  266 
  267         pf_remove_fragment(frag);
  268 }
  269 
  270 void
  271 pf_ip2key(struct pf_fragment *key, struct ip *ip)
  272 {
  273         key->fr_p = ip->ip_p;
  274         key->fr_id = ip->ip_id;
  275         key->fr_src.s_addr = ip->ip_src.s_addr;
  276         key->fr_dst.s_addr = ip->ip_dst.s_addr;
  277 }
  278 
  279 struct pf_fragment *
  280 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
  281 {
  282         struct pf_fragment       key;
  283         struct pf_fragment      *frag;
  284 
  285         pf_ip2key(&key, ip);
  286 
  287         frag = RB_FIND(pf_frag_tree, tree, &key);
  288         if (frag != NULL) {
  289                 /* XXX Are we sure we want to update the timeout? */
  290                 frag->fr_timeout = time_second;
  291                 if (BUFFER_FRAGMENTS(frag)) {
  292                         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
  293                         TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
  294                 } else {
  295                         TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
  296                         TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
  297                 }
  298         }
  299 
  300         return (frag);
  301 }
  302 
  303 /* Removes a fragment from the fragment queue and frees the fragment */
  304 
  305 void
  306 pf_remove_fragment(struct pf_fragment *frag)
  307 {
  308         if (BUFFER_FRAGMENTS(frag)) {
  309                 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
  310                 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
  311                 pool_put(&pf_frag_pl, frag);
  312         } else {
  313                 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
  314                 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
  315                 pool_put(&pf_cache_pl, frag);
  316         }
  317 }
  318 
  319 #define FR_IP_OFF(fr)   ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
  320 struct mbuf *
  321 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
  322     struct pf_frent *frent, int mff)
  323 {
  324         struct mbuf     *m = *m0, *m2;
  325         struct pf_frent *frea, *next;
  326         struct pf_frent *frep = NULL;
  327         struct ip       *ip = frent->fr_ip;
  328         int              hlen = ip->ip_hl << 2;
  329         u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
  330         u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
  331         u_int16_t        max = ip_len + off;
  332 
  333         KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
  334 
  335         /* Strip off ip header */
  336         m->m_data += hlen;
  337         m->m_len -= hlen;
  338 
  339         /* Create a new reassembly queue for this packet */
  340         if (*frag == NULL) {
  341                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
  342                 if (*frag == NULL) {
  343                         pf_flush_fragments();
  344                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
  345                         if (*frag == NULL)
  346                                 goto drop_fragment;
  347                 }
  348 
  349                 (*frag)->fr_flags = 0;
  350                 (*frag)->fr_max = 0;
  351                 (*frag)->fr_src = frent->fr_ip->ip_src;
  352                 (*frag)->fr_dst = frent->fr_ip->ip_dst;
  353                 (*frag)->fr_p = frent->fr_ip->ip_p;
  354                 (*frag)->fr_id = frent->fr_ip->ip_id;
  355                 (*frag)->fr_timeout = time_second;
  356                 LIST_INIT(&(*frag)->fr_queue);
  357 
  358                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
  359                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
  360 
  361                 /* We do not have a previous fragment */
  362                 frep = NULL;
  363                 goto insert;
  364         }
  365 
  366         /*
  367          * Find a fragment after the current one:
  368          *  - off contains the real shifted offset.
  369          */
  370         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
  371                 if (FR_IP_OFF(frea) > off)
  372                         break;
  373                 frep = frea;
  374         }
  375 
  376         KASSERT(frep != NULL || frea != NULL);
  377 
  378         if (frep != NULL &&
  379             FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
  380             4 > off)
  381         {
  382                 u_int16_t       precut;
  383 
  384                 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
  385                     frep->fr_ip->ip_hl * 4 - off;
  386                 if (precut >= ip_len)
  387                         goto drop_fragment;
  388                 m_adj(frent->fr_m, precut);
  389                 DPFPRINTF(("overlap -%d\n", precut));
  390                 /* Enforce 8 byte boundaries */
  391                 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
  392                 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
  393                 ip_len -= precut;
  394                 ip->ip_len = htons(ip_len);
  395         }
  396 
  397         for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
  398             frea = next)
  399         {
  400                 u_int16_t       aftercut;
  401 
  402                 aftercut = ip_len + off - FR_IP_OFF(frea);
  403                 DPFPRINTF(("adjust overlap %d\n", aftercut));
  404                 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
  405                     * 4)
  406                 {
  407                         frea->fr_ip->ip_len =
  408                             htons(ntohs(frea->fr_ip->ip_len) - aftercut);
  409                         frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
  410                             (aftercut >> 3));
  411                         m_adj(frea->fr_m, aftercut);
  412                         break;
  413                 }
  414 
  415                 /* This fragment is completely overlapped, lose it */
  416                 next = LIST_NEXT(frea, fr_next);
  417                 m_freem(frea->fr_m);
  418                 LIST_REMOVE(frea, fr_next);
  419                 pool_put(&pf_frent_pl, frea);
  420                 pf_nfrents--;
  421         }
  422 
  423  insert:
  424         /* Update maximum data size */
  425         if ((*frag)->fr_max < max)
  426                 (*frag)->fr_max = max;
  427         /* This is the last segment */
  428         if (!mff)
  429                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
  430 
  431         if (frep == NULL)
  432                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
  433         else
  434                 LIST_INSERT_AFTER(frep, frent, fr_next);
  435 
  436         /* Check if we are completely reassembled */
  437         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
  438                 return (NULL);
  439 
  440         /* Check if we have all the data */
  441         off = 0;
  442         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
  443                 next = LIST_NEXT(frep, fr_next);
  444 
  445                 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
  446                 if (off < (*frag)->fr_max &&
  447                     (next == NULL || FR_IP_OFF(next) != off))
  448                 {
  449                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
  450                             off, next == NULL ? -1 : FR_IP_OFF(next),
  451                             (*frag)->fr_max));
  452                         return (NULL);
  453                 }
  454         }
  455         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
  456         if (off < (*frag)->fr_max)
  457                 return (NULL);
  458 
  459         /* We have all the data */
  460         frent = LIST_FIRST(&(*frag)->fr_queue);
  461         KASSERT(frent != NULL);
  462         if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
  463                 DPFPRINTF(("drop: too big: %d\n", off));
  464                 pf_free_fragment(*frag);
  465                 *frag = NULL;
  466                 return (NULL);
  467         }
  468         next = LIST_NEXT(frent, fr_next);
  469 
  470         /* Magic from ip_input */
  471         ip = frent->fr_ip;
  472         m = frent->fr_m;
  473         m2 = m->m_next;
  474         m->m_next = NULL;
  475         m_cat(m, m2);
  476         pool_put(&pf_frent_pl, frent);
  477         pf_nfrents--;
  478         for (frent = next; frent != NULL; frent = next) {
  479                 next = LIST_NEXT(frent, fr_next);
  480 
  481                 m2 = frent->fr_m;
  482                 pool_put(&pf_frent_pl, frent);
  483                 pf_nfrents--;
  484                 m_cat(m, m2);
  485         }
  486 
  487         ip->ip_src = (*frag)->fr_src;
  488         ip->ip_dst = (*frag)->fr_dst;
  489 
  490         /* Remove from fragment queue */
  491         pf_remove_fragment(*frag);
  492         *frag = NULL;
  493 
  494         hlen = ip->ip_hl << 2;
  495         ip->ip_len = htons(off + hlen);
  496         m->m_len += hlen;
  497         m->m_data -= hlen;
  498 
  499         /* some debugging cruft by sklower, below, will go away soon */
  500         /* XXX this should be done elsewhere */
  501         if (m->m_flags & M_PKTHDR) {
  502                 int plen = 0;
  503                 for (m2 = m; m2; m2 = m2->m_next)
  504                         plen += m2->m_len;
  505                 m->m_pkthdr.len = plen;
  506         }
  507 
  508         DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
  509         return (m);
  510 
  511  drop_fragment:
  512         /* Oops - fail safe - drop packet */
  513         pool_put(&pf_frent_pl, frent);
  514         pf_nfrents--;
  515         m_freem(m);
  516         return (NULL);
  517 }
  518 
  519 struct mbuf *
  520 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
  521     int drop, int *nomem)
  522 {
  523         struct mbuf             *m = *m0;
  524         struct pf_frcache       *frp, *fra, *cur = NULL;
  525         int                      ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
  526         u_int16_t                off = ntohs(h->ip_off) << 3;
  527         u_int16_t                max = ip_len + off;
  528         int                      hosed = 0;
  529 
  530         KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
  531 
  532         /* Create a new range queue for this packet */
  533         if (*frag == NULL) {
  534                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
  535                 if (*frag == NULL) {
  536                         pf_flush_fragments();
  537                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
  538                         if (*frag == NULL)
  539                                 goto no_mem;
  540                 }
  541 
  542                 /* Get an entry for the queue */
  543                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
  544                 if (cur == NULL) {
  545                         pool_put(&pf_cache_pl, *frag);
  546                         *frag = NULL;
  547                         goto no_mem;
  548                 }
  549                 pf_ncache++;
  550 
  551                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
  552                 (*frag)->fr_max = 0;
  553                 (*frag)->fr_src = h->ip_src;
  554                 (*frag)->fr_dst = h->ip_dst;
  555                 (*frag)->fr_p = h->ip_p;
  556                 (*frag)->fr_id = h->ip_id;
  557                 (*frag)->fr_timeout = time_second;
  558 
  559                 cur->fr_off = off;
  560                 cur->fr_end = max;
  561                 LIST_INIT(&(*frag)->fr_cache);
  562                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
  563 
  564                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
  565                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
  566 
  567                 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
  568 
  569                 goto pass;
  570         }
  571 
  572         /*
  573          * Find a fragment after the current one:
  574          *  - off contains the real shifted offset.
  575          */
  576         frp = NULL;
  577         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
  578                 if (fra->fr_off > off)
  579                         break;
  580                 frp = fra;
  581         }
  582 
  583         KASSERT(frp != NULL || fra != NULL);
  584 
  585         if (frp != NULL) {
  586                 int     precut;
  587 
  588                 precut = frp->fr_end - off;
  589                 if (precut >= ip_len) {
  590                         /* Fragment is entirely a duplicate */
  591                         DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
  592                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
  593                         goto drop_fragment;
  594                 }
  595                 if (precut == 0) {
  596                         /* They are adjacent.  Fixup cache entry */
  597                         DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
  598                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
  599                         frp->fr_end = max;
  600                 } else if (precut > 0) {
  601                         /* The first part of this payload overlaps with a
  602                          * fragment that has already been passed.
  603                          * Need to trim off the first part of the payload.
  604                          * But to do so easily, we need to create another
  605                          * mbuf to throw the original header into.
  606                          */
  607 
  608                         DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
  609                             h->ip_id, precut, frp->fr_off, frp->fr_end, off,
  610                             max));
  611 
  612                         off += precut;
  613                         max -= precut;
  614                         /* Update the previous frag to encompass this one */
  615                         frp->fr_end = max;
  616 
  617                         if (!drop) {
  618                                 /* XXX Optimization opportunity
  619                                  * This is a very heavy way to trim the payload.
  620                                  * we could do it much faster by diddling mbuf
  621                                  * internals but that would be even less legible
  622                                  * than this mbuf magic.  For my next trick,
  623                                  * I'll pull a rabbit out of my laptop.
  624                                  */
  625                                 *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
  626                                 if (*m0 == NULL)
  627                                         goto no_mem;
  628                                 KASSERT((*m0)->m_next == NULL);
  629                                 m_adj(m, precut + (h->ip_hl << 2));
  630                                 m_cat(*m0, m);
  631                                 m = *m0;
  632                                 if (m->m_flags & M_PKTHDR) {
  633                                         int plen = 0;
  634                                         struct mbuf *t;
  635                                         for (t = m; t; t = t->m_next)
  636                                                 plen += t->m_len;
  637                                         m->m_pkthdr.len = plen;
  638                                 }
  639 
  640 
  641                                 h = mtod(m, struct ip *);
  642 
  643 
  644                                 KASSERT((int)m->m_len ==
  645                                     ntohs(h->ip_len) - precut);
  646                                 h->ip_off = htons(ntohs(h->ip_off) +
  647                                     (precut >> 3));
  648                                 h->ip_len = htons(ntohs(h->ip_len) - precut);
  649                         } else {
  650                                 hosed++;
  651                         }
  652                 } else {
  653                         /* There is a gap between fragments */
  654 
  655                         DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
  656                             h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
  657                             max));
  658 
  659                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
  660                         if (cur == NULL)
  661                                 goto no_mem;
  662                         pf_ncache++;
  663 
  664                         cur->fr_off = off;
  665                         cur->fr_end = max;
  666                         LIST_INSERT_AFTER(frp, cur, fr_next);
  667                 }
  668         }
  669 
  670         if (fra != NULL) {
  671                 int     aftercut;
  672                 int     merge = 0;
  673 
  674                 aftercut = max - fra->fr_off;
  675                 if (aftercut == 0) {
  676                         /* Adjacent fragments */
  677                         DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
  678                             h->ip_id, off, max, fra->fr_off, fra->fr_end));
  679                         fra->fr_off = off;
  680                         merge = 1;
  681                 } else if (aftercut > 0) {
  682                         /* Need to chop off the tail of this fragment */
  683                         DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
  684                             h->ip_id, aftercut, off, max, fra->fr_off,
  685                             fra->fr_end));
  686                         fra->fr_off = off;
  687                         max -= aftercut;
  688 
  689                         merge = 1;
  690 
  691                         if (!drop) {
  692                                 m_adj(m, -aftercut);
  693                                 if (m->m_flags & M_PKTHDR) {
  694                                         int plen = 0;
  695                                         struct mbuf *t;
  696                                         for (t = m; t; t = t->m_next)
  697                                                 plen += t->m_len;
  698                                         m->m_pkthdr.len = plen;
  699                                 }
  700                                 h = mtod(m, struct ip *);
  701                                 KASSERT((int)m->m_len ==
  702                                     ntohs(h->ip_len) - aftercut);
  703                                 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
  704                         } else {
  705                                 hosed++;
  706                         }
  707                 } else if (frp == NULL) {
  708                         /* There is a gap between fragments */
  709                         DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
  710                             h->ip_id, -aftercut, off, max, fra->fr_off,
  711                             fra->fr_end));
  712 
  713                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
  714                         if (cur == NULL)
  715                                 goto no_mem;
  716                         pf_ncache++;
  717 
  718                         cur->fr_off = off;
  719                         cur->fr_end = max;
  720                         LIST_INSERT_BEFORE(fra, cur, fr_next);
  721                 }
  722 
  723 
  724                 /* Need to glue together two separate fragment descriptors */
  725                 if (merge) {
  726                         if (cur && fra->fr_off <= cur->fr_end) {
  727                                 /* Need to merge in a previous 'cur' */
  728                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
  729                                     "%d-%d) %d-%d (%d-%d)\n",
  730                                     h->ip_id, cur->fr_off, cur->fr_end, off,
  731                                     max, fra->fr_off, fra->fr_end));
  732                                 fra->fr_off = cur->fr_off;
  733                                 LIST_REMOVE(cur, fr_next);
  734                                 pool_put(&pf_cent_pl, cur);
  735                                 pf_ncache--;
  736                                 cur = NULL;
  737 
  738                         } else if (frp && fra->fr_off <= frp->fr_end) {
  739                                 /* Need to merge in a modified 'frp' */
  740                                 KASSERT(cur == NULL);
  741                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
  742                                     "%d-%d) %d-%d (%d-%d)\n",
  743                                     h->ip_id, frp->fr_off, frp->fr_end, off,
  744                                     max, fra->fr_off, fra->fr_end));
  745                                 fra->fr_off = frp->fr_off;
  746                                 LIST_REMOVE(frp, fr_next);
  747                                 pool_put(&pf_cent_pl, frp);
  748                                 pf_ncache--;
  749                                 frp = NULL;
  750 
  751                         }
  752                 }
  753         }
  754 
  755         if (hosed) {
  756                 /*
  757                  * We must keep tracking the overall fragment even when
  758                  * we're going to drop it anyway so that we know when to
  759                  * free the overall descriptor.  Thus we drop the frag late.
  760                  */
  761                 goto drop_fragment;
  762         }
  763 
  764 
  765  pass:
  766         /* Update maximum data size */
  767         if ((*frag)->fr_max < max)
  768                 (*frag)->fr_max = max;
  769 
  770         /* This is the last segment */
  771         if (!mff)
  772                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
  773 
  774         /* Check if we are completely reassembled */
  775         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
  776             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
  777             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
  778                 /* Remove from fragment queue */
  779                 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
  780                     (*frag)->fr_max));
  781                 pf_free_fragment(*frag);
  782                 *frag = NULL;
  783         }
  784 
  785         return (m);
  786 
  787  no_mem:
  788         *nomem = 1;
  789 
  790         /* Still need to pay attention to !IP_MF */
  791         if (!mff && *frag != NULL)
  792                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
  793 
  794         m_freem(m);
  795         return (NULL);
  796 
  797  drop_fragment:
  798 
  799         /* Still need to pay attention to !IP_MF */
  800         if (!mff && *frag != NULL)
  801                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
  802 
  803         if (drop) {
  804                 /* This fragment has been deemed bad.  Don't reass */
  805                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
  806                         DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
  807                             h->ip_id));
  808                 (*frag)->fr_flags |= PFFRAG_DROP;
  809         }
  810 
  811         m_freem(m);
  812         return (NULL);
  813 }
  814 
  815 int
  816 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
  817     struct pf_pdesc *pd)
  818 {
  819         struct mbuf             *m = *m0;
  820         struct pf_rule          *r;
  821         struct pf_frent         *frent;
  822         struct pf_fragment      *frag = NULL;
  823         struct ip               *h = mtod(m, struct ip *);
  824         int                      mff = (ntohs(h->ip_off) & IP_MF);
  825         int                      hlen = h->ip_hl << 2;
  826         u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
  827         u_int16_t                max;
  828         int                      ip_len;
  829         int                      ip_off;
  830 
  831         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
  832         while (r != NULL) {
  833                 r->evaluations++;
  834                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
  835                         r = r->skip[PF_SKIP_IFP].ptr;
  836                 else if (r->direction && r->direction != dir)
  837                         r = r->skip[PF_SKIP_DIR].ptr;
  838                 else if (r->af && r->af != AF_INET)
  839                         r = r->skip[PF_SKIP_AF].ptr;
  840                 else if (r->proto && r->proto != h->ip_p)
  841                         r = r->skip[PF_SKIP_PROTO].ptr;
  842                 else if (PF_MISMATCHAW(&r->src.addr,
  843                     (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
  844                     r->src.neg, kif))
  845                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
  846                 else if (PF_MISMATCHAW(&r->dst.addr,
  847                     (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
  848                     r->dst.neg, NULL))
  849                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
  850                 else
  851                         break;
  852         }
  853 
  854         if (r == NULL || r->action == PF_NOSCRUB)
  855                 return (PF_PASS);
  856         else {
  857                 r->packets[dir == PF_OUT]++;
  858                 r->bytes[dir == PF_OUT] += pd->tot_len;
  859         }
  860 
  861         /* Check for illegal packets */
  862         if (hlen < (int)sizeof(struct ip))
  863                 goto drop;
  864 
  865         if (hlen > ntohs(h->ip_len))
  866                 goto drop;
  867 
  868         /* Clear IP_DF if the rule uses the no-df option */
  869         if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
  870                 u_int16_t ip_off = h->ip_off;
  871 
  872                 h->ip_off &= htons(~IP_DF);
  873                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
  874         }
  875 
  876         /* We will need other tests here */
  877         if (!fragoff && !mff)
  878                 goto no_fragment;
  879 
  880         /* We're dealing with a fragment now. Don't allow fragments
  881          * with IP_DF to enter the cache. If the flag was cleared by
  882          * no-df above, fine. Otherwise drop it.
  883          */
  884         if (h->ip_off & htons(IP_DF)) {
  885                 DPFPRINTF(("IP_DF\n"));
  886                 goto bad;
  887         }
  888 
  889         ip_len = ntohs(h->ip_len) - hlen;
  890         ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
  891 
  892         /* All fragments are 8 byte aligned */
  893         if (mff && (ip_len & 0x7)) {
  894                 DPFPRINTF(("mff and %d\n", ip_len));
  895                 goto bad;
  896         }
  897 
  898         /* Respect maximum length */
  899         if (fragoff + ip_len > IP_MAXPACKET) {
  900                 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
  901                 goto bad;
  902         }
  903         max = fragoff + ip_len;
  904 
  905         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
  906                 /* Fully buffer all of the fragments */
  907 
  908                 frag = pf_find_fragment(h, &pf_frag_tree);
  909 
  910                 /* Check if we saw the last fragment already */
  911                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
  912                     max > frag->fr_max)
  913                         goto bad;
  914 
  915                 /* Get an entry for the fragment queue */
  916                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
  917                 if (frent == NULL) {
  918                         REASON_SET(reason, PFRES_MEMORY);
  919                         return (PF_DROP);
  920                 }
  921                 pf_nfrents++;
  922                 frent->fr_ip = h;
  923                 frent->fr_m = m;
  924 
  925                 /* Might return a completely reassembled mbuf, or NULL */
  926                 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
  927                 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
  928 
  929                 if (m == NULL)
  930                         return (PF_DROP);
  931 
  932                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
  933                         goto drop;
  934 
  935                 h = mtod(m, struct ip *);
  936         } else {
  937                 /* non-buffering fragment cache (drops or masks overlaps) */
  938                 int     nomem = 0;
  939 
  940                 if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
  941                         /*
  942                          * Already passed the fragment cache in the
  943                          * input direction.  If we continued, it would
  944                          * appear to be a dup and would be dropped.
  945                          */
  946                         goto fragment_pass;
  947                 }
  948 
  949                 frag = pf_find_fragment(h, &pf_cache_tree);
  950 
  951                 /* Check if we saw the last fragment already */
  952                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
  953                     max > frag->fr_max) {
  954                         if (r->rule_flag & PFRULE_FRAGDROP)
  955                                 frag->fr_flags |= PFFRAG_DROP;
  956                         goto bad;
  957                 }
  958 
  959                 *m0 = m = pf_fragcache(m0, h, &frag, mff,
  960                     (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
  961                 if (m == NULL) {
  962                         if (nomem)
  963                                 goto no_mem;
  964                         goto drop;
  965                 }
  966 
  967                 if (dir == PF_IN)
  968                         m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
  969 
  970                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
  971                         goto drop;
  972                 goto fragment_pass;
  973         }
  974 
  975  no_fragment:
  976         /* At this point, only IP_DF is allowed in ip_off */
  977         if (h->ip_off & ~htons(IP_DF)) {
  978                 u_int16_t ip_off = h->ip_off;
  979 
  980                 h->ip_off &= htons(IP_DF);
  981                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
  982         }
  983 
  984         /* Enforce a minimum ttl, may cause endless packet loops */
  985         if (r->min_ttl && h->ip_ttl < r->min_ttl) {
  986                 u_int16_t ip_ttl = h->ip_ttl;
  987 
  988                 h->ip_ttl = r->min_ttl;
  989                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
  990         }
  991 
  992         if (r->rule_flag & PFRULE_RANDOMID) {
  993                 u_int16_t ip_id = h->ip_id;
  994 
  995                 h->ip_id = ip_randomid();
  996                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
  997         }
  998         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
  999                 pd->flags |= PFDESC_IP_REAS;
 1000 
 1001         return (PF_PASS);
 1002 
 1003  fragment_pass:
 1004         /* Enforce a minimum ttl, may cause endless packet loops */
 1005         if (r->min_ttl && h->ip_ttl < r->min_ttl) {
 1006                 u_int16_t ip_ttl = h->ip_ttl;
 1007 
 1008                 h->ip_ttl = r->min_ttl;
 1009                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
 1010         }
 1011         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
 1012                 pd->flags |= PFDESC_IP_REAS;
 1013         return (PF_PASS);
 1014 
 1015  no_mem:
 1016         REASON_SET(reason, PFRES_MEMORY);
 1017         if (r != NULL && r->log)
 1018                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
 1019         return (PF_DROP);
 1020 
 1021  drop:
 1022         REASON_SET(reason, PFRES_NORM);
 1023         if (r != NULL && r->log)
 1024                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
 1025         return (PF_DROP);
 1026 
 1027  bad:
 1028         DPFPRINTF(("dropping bad fragment\n"));
 1029 
 1030         /* Free associated fragments */
 1031         if (frag != NULL)
 1032                 pf_free_fragment(frag);
 1033 
 1034         REASON_SET(reason, PFRES_FRAG);
 1035         if (r != NULL && r->log)
 1036                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
 1037 
 1038         return (PF_DROP);
 1039 }
 1040 
 1041 #ifdef INET6
 1042 int
 1043 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
 1044     u_short *reason, struct pf_pdesc *pd)
 1045 {
 1046         struct mbuf             *m = *m0;
 1047         struct pf_rule          *r;
 1048         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
 1049         int                      off;
 1050         struct ip6_ext           ext;
 1051         struct ip6_opt           opt;
 1052         struct ip6_opt_jumbo     jumbo;
 1053         struct ip6_frag          frag;
 1054         u_int32_t                jumbolen = 0, plen;
 1055         u_int16_t                fragoff = 0;
 1056         int                      optend;
 1057         int                      ooff;
 1058         u_int8_t                 proto;
 1059         int                      terminal;
 1060 
 1061         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 1062         while (r != NULL) {
 1063                 r->evaluations++;
 1064                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
 1065                         r = r->skip[PF_SKIP_IFP].ptr;
 1066                 else if (r->direction && r->direction != dir)
 1067                         r = r->skip[PF_SKIP_DIR].ptr;
 1068                 else if (r->af && r->af != AF_INET6)
 1069                         r = r->skip[PF_SKIP_AF].ptr;
 1070 #if 0 /* header chain! */
 1071                 else if (r->proto && r->proto != h->ip6_nxt)
 1072                         r = r->skip[PF_SKIP_PROTO].ptr;
 1073 #endif
 1074                 else if (PF_MISMATCHAW(&r->src.addr,
 1075                     (struct pf_addr *)&h->ip6_src, AF_INET6,
 1076                     r->src.neg, kif))
 1077                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 1078                 else if (PF_MISMATCHAW(&r->dst.addr,
 1079                     (struct pf_addr *)&h->ip6_dst, AF_INET6,
 1080                     r->dst.neg, NULL))
 1081                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
 1082                 else
 1083                         break;
 1084         }
 1085 
 1086         if (r == NULL || r->action == PF_NOSCRUB)
 1087                 return (PF_PASS);
 1088         else {
 1089                 r->packets[dir == PF_OUT]++;
 1090                 r->bytes[dir == PF_OUT] += pd->tot_len;
 1091         }
 1092 
 1093         /* Check for illegal packets */
 1094         if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
 1095                 goto drop;
 1096 
 1097         off = sizeof(struct ip6_hdr);
 1098         proto = h->ip6_nxt;
 1099         terminal = 0;
 1100         do {
 1101                 switch (proto) {
 1102                 case IPPROTO_FRAGMENT:
 1103                         goto fragment;
 1104                         break;
 1105                 case IPPROTO_AH:
 1106                 case IPPROTO_ROUTING:
 1107                 case IPPROTO_DSTOPTS:
 1108                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
 1109                             NULL, AF_INET6))
 1110                                 goto shortpkt;
 1111                         if (proto == IPPROTO_AH)
 1112                                 off += (ext.ip6e_len + 2) * 4;
 1113                         else
 1114                                 off += (ext.ip6e_len + 1) * 8;
 1115                         proto = ext.ip6e_nxt;
 1116                         break;
 1117                 case IPPROTO_HOPOPTS:
 1118                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
 1119                             NULL, AF_INET6))
 1120                                 goto shortpkt;
 1121                         optend = off + (ext.ip6e_len + 1) * 8;
 1122                         ooff = off + sizeof(ext);
 1123                         do {
 1124                                 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
 1125                                     sizeof(opt.ip6o_type), NULL, NULL,
 1126                                     AF_INET6))
 1127                                         goto shortpkt;
 1128                                 if (opt.ip6o_type == IP6OPT_PAD1) {
 1129                                         ooff++;
 1130                                         continue;
 1131                                 }
 1132                                 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
 1133                                     NULL, NULL, AF_INET6))
 1134                                         goto shortpkt;
 1135                                 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
 1136                                         goto drop;
 1137                                 switch (opt.ip6o_type) {
 1138                                 case IP6OPT_JUMBO:
 1139                                         if (h->ip6_plen != 0)
 1140                                                 goto drop;
 1141                                         if (!pf_pull_hdr(m, ooff, &jumbo,
 1142                                             sizeof(jumbo), NULL, NULL,
 1143                                             AF_INET6))
 1144                                                 goto shortpkt;
 1145                                         memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
 1146                                             sizeof(jumbolen));
 1147                                         jumbolen = ntohl(jumbolen);
 1148                                         if (jumbolen <= IPV6_MAXPACKET)
 1149                                                 goto drop;
 1150                                         if (sizeof(struct ip6_hdr) + jumbolen !=
 1151                                             m->m_pkthdr.len)
 1152                                                 goto drop;
 1153                                         break;
 1154                                 default:
 1155                                         break;
 1156                                 }
 1157                                 ooff += sizeof(opt) + opt.ip6o_len;
 1158                         } while (ooff < optend);
 1159 
 1160                         off = optend;
 1161                         proto = ext.ip6e_nxt;
 1162                         break;
 1163                 default:
 1164                         terminal = 1;
 1165                         break;
 1166                 }
 1167         } while (!terminal);
 1168 
 1169         /* jumbo payload option must be present, or plen > 0 */
 1170         if (ntohs(h->ip6_plen) == 0)
 1171                 plen = jumbolen;
 1172         else
 1173                 plen = ntohs(h->ip6_plen);
 1174         if (plen == 0)
 1175                 goto drop;
 1176         if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
 1177                 goto shortpkt;
 1178 
 1179         /* Enforce a minimum ttl, may cause endless packet loops */
 1180         if (r->min_ttl && h->ip6_hlim < r->min_ttl)
 1181                 h->ip6_hlim = r->min_ttl;
 1182 
 1183         return (PF_PASS);
 1184 
 1185  fragment:
 1186         if (ntohs(h->ip6_plen) == 0 || jumbolen)
 1187                 goto drop;
 1188         plen = ntohs(h->ip6_plen);
 1189 
 1190         if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
 1191                 goto shortpkt;
 1192         fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
 1193         if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
 1194                 goto badfrag;
 1195 
 1196         /* do something about it */
 1197         /* remember to set pd->flags |= PFDESC_IP_REAS */
 1198         return (PF_PASS);
 1199 
 1200  shortpkt:
 1201         REASON_SET(reason, PFRES_SHORT);
 1202         if (r != NULL && r->log)
 1203                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
 1204         return (PF_DROP);
 1205 
 1206  drop:
 1207         REASON_SET(reason, PFRES_NORM);
 1208         if (r != NULL && r->log)
 1209                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
 1210         return (PF_DROP);
 1211 
 1212  badfrag:
 1213         REASON_SET(reason, PFRES_FRAG);
 1214         if (r != NULL && r->log)
 1215                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
 1216         return (PF_DROP);
 1217 }
 1218 #endif /* INET6 */
 1219 
 1220 int
 1221 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
 1222     int off, void *h, struct pf_pdesc *pd)
 1223 {
 1224         struct pf_rule  *r, *rm = NULL;
 1225         struct tcphdr   *th = pd->hdr.tcp;
 1226         int              rewrite = 0;
 1227         u_short          reason;
 1228         u_int8_t         flags;
 1229         sa_family_t      af = pd->af;
 1230 
 1231         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 1232         while (r != NULL) {
 1233                 r->evaluations++;
 1234                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
 1235                         r = r->skip[PF_SKIP_IFP].ptr;
 1236                 else if (r->direction && r->direction != dir)
 1237                         r = r->skip[PF_SKIP_DIR].ptr;
 1238                 else if (r->af && r->af != af)
 1239                         r = r->skip[PF_SKIP_AF].ptr;
 1240                 else if (r->proto && r->proto != pd->proto)
 1241                         r = r->skip[PF_SKIP_PROTO].ptr;
 1242                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 1243                     r->src.neg, kif))
 1244                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 1245                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
 1246                             r->src.port[0], r->src.port[1], th->th_sport))
 1247                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
 1248                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 1249                     r->dst.neg, NULL))
 1250                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
 1251                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 1252                             r->dst.port[0], r->dst.port[1], th->th_dport))
 1253                         r = r->skip[PF_SKIP_DST_PORT].ptr;
 1254                 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
 1255                             pf_osfp_fingerprint(pd, m, off, th),
 1256                             r->os_fingerprint))
 1257                         r = TAILQ_NEXT(r, entries);
 1258                 else {
 1259                         rm = r;
 1260                         break;
 1261                 }
 1262         }
 1263 
 1264         if (rm == NULL || rm->action == PF_NOSCRUB)
 1265                 return (PF_PASS);
 1266         else {
 1267                 r->packets[dir == PF_OUT]++;
 1268                 r->bytes[dir == PF_OUT] += pd->tot_len;
 1269         }
 1270 
 1271         if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
 1272                 pd->flags |= PFDESC_TCP_NORM;
 1273 
 1274         flags = th->th_flags;
 1275         if (flags & TH_SYN) {
 1276                 /* Illegal packet */
 1277                 if (flags & TH_RST)
 1278                         goto tcp_drop;
 1279 
 1280                 if (flags & TH_FIN)
 1281                         flags &= ~TH_FIN;
 1282         } else {
 1283                 /* Illegal packet */
 1284                 if (!(flags & (TH_ACK|TH_RST)))
 1285                         goto tcp_drop;
 1286         }
 1287 
 1288         if (!(flags & TH_ACK)) {
 1289                 /* These flags are only valid if ACK is set */
 1290                 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
 1291                         goto tcp_drop;
 1292         }
 1293 
 1294         /* Check for illegal header length */
 1295         if (th->th_off < (sizeof(struct tcphdr) >> 2))
 1296                 goto tcp_drop;
 1297 
 1298         /* If flags changed, or reserved data set, then adjust */
 1299         if (flags != th->th_flags || th->th_x2 != 0) {
 1300                 u_int16_t       ov, nv;
 1301 
 1302                 ov = *(u_int16_t *)(&th->th_ack + 1);
 1303                 th->th_flags = flags;
 1304                 th->th_x2 = 0;
 1305                 nv = *(u_int16_t *)(&th->th_ack + 1);
 1306 
 1307                 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
 1308                 rewrite = 1;
 1309         }
 1310 
 1311         /* Remove urgent pointer, if TH_URG is not set */
 1312         if (!(flags & TH_URG) && th->th_urp) {
 1313                 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
 1314                 th->th_urp = 0;
 1315                 rewrite = 1;
 1316         }
 1317 
 1318         /* Process options */
 1319         if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
 1320                 rewrite = 1;
 1321 
 1322         /* copy back packet headers if we sanitized */
 1323         if (rewrite)
 1324                 m_copyback(m, off, sizeof(*th), th);
 1325 
 1326         return (PF_PASS);
 1327 
 1328  tcp_drop:
 1329         REASON_SET(&reason, PFRES_NORM);
 1330         if (rm != NULL && r->log)
 1331                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
 1332         return (PF_DROP);
 1333 }
 1334 
 1335 int
 1336 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
 1337     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
 1338 {
 1339         u_int32_t tsval, tsecr;
 1340         u_int8_t hdr[60];
 1341         u_int8_t *opt;
 1342 
 1343         KASSERT(src->scrub == NULL);
 1344 
 1345         src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
 1346         if (src->scrub == NULL)
 1347                 return (1);
 1348         bzero(src->scrub, sizeof(*src->scrub));
 1349 
 1350         switch (pd->af) {
 1351 #ifdef INET
 1352         case AF_INET: {
 1353                 struct ip *h = mtod(m, struct ip *);
 1354                 src->scrub->pfss_ttl = h->ip_ttl;
 1355                 break;
 1356         }
 1357 #endif /* INET */
 1358 #ifdef INET6
 1359         case AF_INET6: {
 1360                 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
 1361                 src->scrub->pfss_ttl = h->ip6_hlim;
 1362                 break;
 1363         }
 1364 #endif /* INET6 */
 1365         }
 1366 
 1367 
 1368         /*
 1369          * All normalizations below are only begun if we see the start of
 1370          * the connections.  They must all set an enabled bit in pfss_flags
 1371          */
 1372         if ((th->th_flags & TH_SYN) == 0)
 1373                 return (0);
 1374 
 1375 
 1376         if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
 1377             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
 1378                 /* Diddle with TCP options */
 1379                 int hlen;
 1380                 opt = hdr + sizeof(struct tcphdr);
 1381                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
 1382                 while (hlen >= TCPOLEN_TIMESTAMP) {
 1383                         switch (*opt) {
 1384                         case TCPOPT_EOL:        /* FALLTHROUGH */
 1385                         case TCPOPT_NOP:
 1386                                 opt++;
 1387                                 hlen--;
 1388                                 break;
 1389                         case TCPOPT_TIMESTAMP:
 1390                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
 1391                                         src->scrub->pfss_flags |=
 1392                                             PFSS_TIMESTAMP;
 1393                                         src->scrub->pfss_ts_mod =
 1394                                             htonl(arc4random());
 1395 
 1396                                         /* note PFSS_PAWS not set yet */
 1397                                         memcpy(&tsval, &opt[2],
 1398                                             sizeof(u_int32_t));
 1399                                         memcpy(&tsecr, &opt[6],
 1400                                             sizeof(u_int32_t));
 1401                                         src->scrub->pfss_tsval0 = ntohl(tsval);
 1402                                         src->scrub->pfss_tsval = ntohl(tsval);
 1403                                         src->scrub->pfss_tsecr = ntohl(tsecr);
 1404                                         getmicrouptime(&src->scrub->pfss_last);
 1405                                 }
 1406                                 /* FALLTHROUGH */
 1407                         default:
 1408                                 hlen -= MAX(opt[1], 2);
 1409                                 opt += MAX(opt[1], 2);
 1410                                 break;
 1411                         }
 1412                 }
 1413         }
 1414 
 1415         return (0);
 1416 }
 1417 
 1418 void
 1419 pf_normalize_tcp_cleanup(struct pf_state *state)
 1420 {
 1421         if (state->src.scrub)
 1422                 pool_put(&pf_state_scrub_pl, state->src.scrub);
 1423         if (state->dst.scrub)
 1424                 pool_put(&pf_state_scrub_pl, state->dst.scrub);
 1425 
 1426         /* Someday... flush the TCP segment reassembly descriptors. */
 1427 }
 1428 
 1429 int
 1430 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
 1431     u_short *reason, struct tcphdr *th, struct pf_state *state,
 1432     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
 1433 {
 1434         struct timeval uptime;
 1435         u_int32_t tsval, tsecr;
 1436         u_int tsval_from_last;
 1437         u_int8_t hdr[60];
 1438         u_int8_t *opt;
 1439         int copyback = 0;
 1440         int got_ts = 0;
 1441 
 1442         KASSERT(src->scrub || dst->scrub);
 1443 
 1444         /*
 1445          * Enforce the minimum TTL seen for this connection.  Negate a common
 1446          * technique to evade an intrusion detection system and confuse
 1447          * firewall state code.
 1448          */
 1449         switch (pd->af) {
 1450 #ifdef INET
 1451         case AF_INET: {
 1452                 if (src->scrub) {
 1453                         struct ip *h = mtod(m, struct ip *);
 1454                         if (h->ip_ttl > src->scrub->pfss_ttl)
 1455                                 src->scrub->pfss_ttl = h->ip_ttl;
 1456                         h->ip_ttl = src->scrub->pfss_ttl;
 1457                 }
 1458                 break;
 1459         }
 1460 #endif /* INET */
 1461 #ifdef INET6
 1462         case AF_INET6: {
 1463                 if (src->scrub) {
 1464                         struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
 1465                         if (h->ip6_hlim > src->scrub->pfss_ttl)
 1466                                 src->scrub->pfss_ttl = h->ip6_hlim;
 1467                         h->ip6_hlim = src->scrub->pfss_ttl;
 1468                 }
 1469                 break;
 1470         }
 1471 #endif /* INET6 */
 1472         }
 1473 
 1474         if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
 1475             ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
 1476             (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
 1477             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
 1478                 /* Diddle with TCP options */
 1479                 int hlen;
 1480                 opt = hdr + sizeof(struct tcphdr);
 1481                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
 1482                 while (hlen >= TCPOLEN_TIMESTAMP) {
 1483                         switch (*opt) {
 1484                         case TCPOPT_EOL:        /* FALLTHROUGH */
 1485                         case TCPOPT_NOP:
 1486                                 opt++;
 1487                                 hlen--;
 1488                                 break;
 1489                         case TCPOPT_TIMESTAMP:
 1490                                 /* Modulate the timestamps.  Can be used for
 1491                                  * NAT detection, OS uptime determination or
 1492                                  * reboot detection.
 1493                                  */
 1494 
 1495                                 if (got_ts) {
 1496                                         /* Huh?  Multiple timestamps!? */
 1497                                         if (pf_status.debug >= PF_DEBUG_MISC) {
 1498                                                 DPFPRINTF(("multiple TS??"));
 1499                                                 pf_print_state(state);
 1500                                                 printf("\n");
 1501                                         }
 1502                                         REASON_SET(reason, PFRES_TS);
 1503                                         return (PF_DROP);
 1504                                 }
 1505                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
 1506                                         memcpy(&tsval, &opt[2],
 1507                                             sizeof(u_int32_t));
 1508                                         if (tsval && src->scrub &&
 1509                                             (src->scrub->pfss_flags &
 1510                                             PFSS_TIMESTAMP)) {
 1511                                                 tsval = ntohl(tsval);
 1512                                                 pf_change_a(&opt[2],
 1513                                                     &th->th_sum,
 1514                                                     htonl(tsval +
 1515                                                     src->scrub->pfss_ts_mod),
 1516                                                     0);
 1517                                                 copyback = 1;
 1518                                         }
 1519 
 1520                                         /* Modulate TS reply iff valid (!0) */
 1521                                         memcpy(&tsecr, &opt[6],
 1522                                             sizeof(u_int32_t));
 1523                                         if (tsecr && dst->scrub &&
 1524                                             (dst->scrub->pfss_flags &
 1525                                             PFSS_TIMESTAMP)) {
 1526                                                 tsecr = ntohl(tsecr)
 1527                                                     - dst->scrub->pfss_ts_mod;
 1528                                                 pf_change_a(&opt[6],
 1529                                                     &th->th_sum, htonl(tsecr),
 1530                                                     0);
 1531                                                 copyback = 1;
 1532                                         }
 1533                                         got_ts = 1;
 1534                                 }
 1535                                 /* FALLTHROUGH */
 1536                         default:
 1537                                 hlen -= MAX(opt[1], 2);
 1538                                 opt += MAX(opt[1], 2);
 1539                                 break;
 1540                         }
 1541                 }
 1542                 if (copyback) {
 1543                         /* Copyback the options, caller copys back header */
 1544                         *writeback = 1;
 1545                         m_copyback(m, off + sizeof(struct tcphdr),
 1546                             (th->th_off << 2) - sizeof(struct tcphdr), hdr +
 1547                             sizeof(struct tcphdr));
 1548                 }
 1549         }
 1550 
 1551 
 1552         /*
 1553          * Must invalidate PAWS checks on connections idle for too long.
 1554          * The fastest allowed timestamp clock is 1ms.  That turns out to
 1555          * be about 24 days before it wraps.  XXX Right now our lowerbound
 1556          * TS echo check only works for the first 12 days of a connection
 1557          * when the TS has exhausted half its 32bit space
 1558          */
 1559 #define TS_MAX_IDLE     (24*24*60*60)
 1560 #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
 1561 
 1562         getmicrouptime(&uptime);
 1563         if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
 1564             (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
 1565             time_second - state->creation > TS_MAX_CONN))  {
 1566                 if (pf_status.debug >= PF_DEBUG_MISC) {
 1567                         DPFPRINTF(("src idled out of PAWS\n"));
 1568                         pf_print_state(state);
 1569                         printf("\n");
 1570                 }
 1571                 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
 1572                     | PFSS_PAWS_IDLED;
 1573         }
 1574         if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
 1575             uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
 1576                 if (pf_status.debug >= PF_DEBUG_MISC) {
 1577                         DPFPRINTF(("dst idled out of PAWS\n"));
 1578                         pf_print_state(state);
 1579                         printf("\n");
 1580                 }
 1581                 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
 1582                     | PFSS_PAWS_IDLED;
 1583         }
 1584 
 1585         if (got_ts && src->scrub && dst->scrub &&
 1586             (src->scrub->pfss_flags & PFSS_PAWS) &&
 1587             (dst->scrub->pfss_flags & PFSS_PAWS)) {
 1588                 /* Validate that the timestamps are "in-window".
 1589                  * RFC1323 describes TCP Timestamp options that allow
 1590                  * measurement of RTT (round trip time) and PAWS
 1591                  * (protection against wrapped sequence numbers).  PAWS
 1592                  * gives us a set of rules for rejecting packets on
 1593                  * long fat pipes (packets that were somehow delayed 
 1594                  * in transit longer than the time it took to send the
 1595                  * full TCP sequence space of 4Gb).  We can use these
 1596                  * rules and infer a few others that will let us treat
 1597                  * the 32bit timestamp and the 32bit echoed timestamp
 1598                  * as sequence numbers to prevent a blind attacker from
 1599                  * inserting packets into a connection.
 1600                  *
 1601                  * RFC1323 tells us:
 1602                  *  - The timestamp on this packet must be greater than
 1603                  *    or equal to the last value echoed by the other
 1604                  *    endpoint.  The RFC says those will be discarded
 1605                  *    since it is a dup that has already been acked.
 1606                  *    This gives us a lowerbound on the timestamp.
 1607                  *        timestamp >= other last echoed timestamp
 1608                  *  - The timestamp will be less than or equal to
 1609                  *    the last timestamp plus the time between the
 1610                  *    last packet and now.  The RFC defines the max
 1611                  *    clock rate as 1ms.  We will allow clocks to be
 1612                  *    up to 10% fast and will allow a total difference
 1613                  *    or 30 seconds due to a route change.  And this
 1614                  *    gives us an upperbound on the timestamp.
 1615                  *        timestamp <= last timestamp + max ticks
 1616                  *    We have to be careful here.  Windows will send an
 1617                  *    initial timestamp of zero and then initialize it
 1618                  *    to a random value after the 3whs; presumably to
 1619                  *    avoid a DoS by having to call an expensive RNG
 1620                  *    during a SYN flood.  Proof MS has at least one
 1621                  *    good security geek.
 1622                  *
 1623                  *  - The TCP timestamp option must also echo the other
 1624                  *    endpoints timestamp.  The timestamp echoed is the
 1625                  *    one carried on the earliest unacknowledged segment
 1626                  *    on the left edge of the sequence window.  The RFC
 1627                  *    states that the host will reject any echoed
 1628                  *    timestamps that were larger than any ever sent.
 1629                  *    This gives us an upperbound on the TS echo.
 1630                  *        tescr <= largest_tsval
 1631                  *  - The lowerbound on the TS echo is a little more
 1632                  *    tricky to determine.  The other endpoint's echoed
 1633                  *    values will not decrease.  But there may be
 1634                  *    network conditions that re-order packets and
 1635                  *    cause our view of them to decrease.  For now the
 1636                  *    only lowerbound we can safely determine is that
 1637                  *    the TS echo will never be less than the original
 1638                  *    TS.  XXX There is probably a better lowerbound.
 1639                  *    Remove TS_MAX_CONN with better lowerbound check.
 1640                  *        tescr >= other original TS
 1641                  *
 1642                  * It is also important to note that the fastest
 1643                  * timestamp clock of 1ms will wrap its 32bit space in
 1644                  * 24 days.  So we just disable TS checking after 24
 1645                  * days of idle time.  We actually must use a 12d
 1646                  * connection limit until we can come up with a better
 1647                  * lowerbound to the TS echo check.
 1648                  */
 1649                 struct timeval delta_ts;
 1650                 int ts_fudge;
 1651 
 1652 
 1653                 /*
 1654                  * PFTM_TS_DIFF is how many seconds of leeway to allow
 1655                  * a host's timestamp.  This can happen if the previous
 1656                  * packet got delayed in transit for much longer than
 1657                  * this packet.
 1658                  */
 1659                 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
 1660                         ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
 1661 
 1662 
 1663                 /* Calculate max ticks since the last timestamp */
 1664 #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
 1665 #define TS_MICROSECS    1000000         /* microseconds per second */
 1666                 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
 1667                 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
 1668                 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
 1669 
 1670 
 1671                 if ((src->state >= TCPS_ESTABLISHED &&
 1672                     dst->state >= TCPS_ESTABLISHED) &&
 1673                     (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
 1674                     SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
 1675                     (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
 1676                     SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
 1677                         /* Bad RFC1323 implementation or an insertion attack.
 1678                          *
 1679                          * - Solaris 2.6 and 2.7 are known to send another ACK
 1680                          *   after the FIN,FIN|ACK,ACK closing that carries
 1681                          *   an old timestamp.
 1682                          */
 1683 
 1684                         DPFPRINTF(("Timestamp failed %c%c%c%c\n",
 1685                             SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
 1686                             SEQ_GT(tsval, src->scrub->pfss_tsval +
 1687                             tsval_from_last) ? '1' : ' ',
 1688                             SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
 1689                             SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
 1690                         DPFPRINTF((" tsval: %lu  tsecr: %lu  +ticks: %lu  "
 1691                             "idle: %lus %lums\n",
 1692                             tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
 1693                             delta_ts.tv_usec / 1000));
 1694                         DPFPRINTF((" src->tsval: %lu  tsecr: %lu\n",
 1695                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
 1696                         DPFPRINTF((" dst->tsval: %lu  tsecr: %lu  tsval0: %lu"
 1697                             "\n", dst->scrub->pfss_tsval,
 1698                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
 1699                         if (pf_status.debug >= PF_DEBUG_MISC) {
 1700                                 pf_print_state(state);
 1701                                 pf_print_flags(th->th_flags);
 1702                                 printf("\n");
 1703                         }
 1704                         REASON_SET(reason, PFRES_TS);
 1705                         return (PF_DROP);
 1706                 }
 1707 
 1708                 /* XXX I'd really like to require tsecr but it's optional */
 1709 
 1710         } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
 1711             ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
 1712             || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
 1713             src->scrub && dst->scrub &&
 1714             (src->scrub->pfss_flags & PFSS_PAWS) &&
 1715             (dst->scrub->pfss_flags & PFSS_PAWS)) {
 1716                 /* Didn't send a timestamp.  Timestamps aren't really useful
 1717                  * when:
 1718                  *  - connection opening or closing (often not even sent).
 1719                  *    but we must not let an attacker to put a FIN on a
 1720                  *    data packet to sneak it through our ESTABLISHED check.
 1721                  *  - on a TCP reset.  RFC suggests not even looking at TS.
 1722                  *  - on an empty ACK.  The TS will not be echoed so it will
 1723                  *    probably not help keep the RTT calculation in sync and
 1724                  *    there isn't as much danger when the sequence numbers
 1725                  *    got wrapped.  So some stacks don't include TS on empty
 1726                  *    ACKs :-(
 1727                  *
 1728                  * To minimize the disruption to mostly RFC1323 conformant
 1729                  * stacks, we will only require timestamps on data packets.
 1730                  *
 1731                  * And what do ya know, we cannot require timestamps on data
 1732                  * packets.  There appear to be devices that do legitimate
 1733                  * TCP connection hijacking.  There are HTTP devices that allow
 1734                  * a 3whs (with timestamps) and then buffer the HTTP request.
 1735                  * If the intermediate device has the HTTP response cache, it
 1736                  * will spoof the response but not bother timestamping its
 1737                  * packets.  So we can look for the presence of a timestamp in
 1738                  * the first data packet and if there, require it in all future
 1739                  * packets.
 1740                  */
 1741 
 1742                 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
 1743                         /*
 1744                          * Hey!  Someone tried to sneak a packet in.  Or the
 1745                          * stack changed its RFC1323 behavior?!?!
 1746                          */
 1747                         if (pf_status.debug >= PF_DEBUG_MISC) {
 1748                                 DPFPRINTF(("Did not receive expected RFC1323 "
 1749                                     "timestamp\n"));
 1750                                 pf_print_state(state);
 1751                                 pf_print_flags(th->th_flags);
 1752                                 printf("\n");
 1753                         }
 1754                         REASON_SET(reason, PFRES_TS);
 1755                         return (PF_DROP);
 1756                 }
 1757         }
 1758 
 1759 
 1760         /*
 1761          * We will note if a host sends his data packets with or without
 1762          * timestamps.  And require all data packets to contain a timestamp
 1763          * if the first does.  PAWS implicitly requires that all data packets be
 1764          * timestamped.  But I think there are middle-man devices that hijack
 1765          * TCP streams immediately after the 3whs and don't timestamp their
 1766          * packets (seen in a WWW accelerator or cache).
 1767          */
 1768         if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
 1769             (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
 1770                 if (got_ts)
 1771                         src->scrub->pfss_flags |= PFSS_DATA_TS;
 1772                 else {
 1773                         src->scrub->pfss_flags |= PFSS_DATA_NOTS;
 1774                         if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
 1775                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
 1776                                 /* Don't warn if other host rejected RFC1323 */
 1777                                 DPFPRINTF(("Broken RFC1323 stack did not "
 1778                                     "timestamp data packet. Disabled PAWS "
 1779                                     "security.\n"));
 1780                                 pf_print_state(state);
 1781                                 pf_print_flags(th->th_flags);
 1782                                 printf("\n");
 1783                         }
 1784                 }
 1785         }
 1786 
 1787 
 1788         /*
 1789          * Update PAWS values
 1790          */
 1791         if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
 1792             (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
 1793                 getmicrouptime(&src->scrub->pfss_last);
 1794                 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
 1795                     (src->scrub->pfss_flags & PFSS_PAWS) == 0)
 1796                         src->scrub->pfss_tsval = tsval;
 1797 
 1798                 if (tsecr) {
 1799                         if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
 1800                             (src->scrub->pfss_flags & PFSS_PAWS) == 0)
 1801                                 src->scrub->pfss_tsecr = tsecr;
 1802 
 1803                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
 1804                             (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
 1805                             src->scrub->pfss_tsval0 == 0)) {
 1806                                 /* tsval0 MUST be the lowest timestamp */
 1807                                 src->scrub->pfss_tsval0 = tsval;
 1808                         }
 1809 
 1810                         /* Only fully initialized after a TS gets echoed */
 1811                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
 1812                                 src->scrub->pfss_flags |= PFSS_PAWS;
 1813                 }
 1814         }
 1815 
 1816         /* I have a dream....  TCP segment reassembly.... */
 1817         return (0);
 1818 }
 1819 
 1820 int
 1821 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
 1822     int off)
 1823 {
 1824         u_int16_t       *mss;
 1825         int              thoff;
 1826         int              opt, cnt, optlen = 0;
 1827         int              rewrite = 0;
 1828         u_char          *optp;
 1829 
 1830         thoff = th->th_off << 2;
 1831         cnt = thoff - sizeof(struct tcphdr);
 1832         optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr);
 1833 
 1834         for (; cnt > 0; cnt -= optlen, optp += optlen) {
 1835                 opt = optp[0];
 1836                 if (opt == TCPOPT_EOL)
 1837                         break;
 1838                 if (opt == TCPOPT_NOP)
 1839                         optlen = 1;
 1840                 else {
 1841                         if (cnt < 2)
 1842                                 break;
 1843                         optlen = optp[1];
 1844                         if (optlen < 2 || optlen > cnt)
 1845                                 break;
 1846                 }
 1847                 switch (opt) {
 1848                 case TCPOPT_MAXSEG:
 1849                         mss = (u_int16_t *)(optp + 2);
 1850                         if ((ntohs(*mss)) > r->max_mss) {
 1851                                 th->th_sum = pf_cksum_fixup(th->th_sum,
 1852                                     *mss, htons(r->max_mss), 0);
 1853                                 *mss = htons(r->max_mss);
 1854                                 rewrite = 1;
 1855                         }
 1856                         break;
 1857                 default:
 1858                         break;
 1859                 }
 1860         }
 1861 
 1862         return (rewrite);
 1863 }

/* [<][>][^][v][top][bottom][index][help] */