root/arch/i386/i386/pmapae.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pmap_tmpmap_pa_pae
  2. pmap_tmpunmap_pa_pae
  3. pmap_tmpmap_pvepte_pae
  4. pmap_tmpunmap_pvepte_pae
  5. pmap_map_ptes_pae
  6. pmap_unmap_ptes_pae
  7. pmap_pte_set_pae
  8. pmap_pte_setbits_pae
  9. pmap_pte_bits_pae
  10. pmap_pte_paddr_pae
  11. pmap_bootstrap_pae
  12. pmap_try_steal_pv_pae
  13. pmap_alloc_ptp_pae
  14. pmap_get_ptp_pae
  15. pmap_pinit_pd_pae
  16. pmap_extract_pae
  17. pmap_zero_phys_pae
  18. pmap_zero_page_uncached_pae
  19. pmap_copy_page_pae
  20. pmap_remove_ptes_pae
  21. pmap_remove_pte_pae
  22. pmap_remove_pae
  23. pmap_page_remove_pae
  24. pmap_test_attrs_pae
  25. pmap_change_attrs_pae
  26. pmap_write_protect_pae
  27. pmap_unwire_pae
  28. pmap_enter_pae
  29. pmap_growkernel_pae
  30. pmap_dump_pae

    1 /*      $OpenBSD: pmapae.c,v 1.13 2007/07/20 19:48:15 mk Exp $  */
    2 
    3 /*
    4  * Copyright (c) 2006 Michael Shalayeff
    5  * All rights reserved.
    6  *
    7  * Permission to use, copy, modify, and distribute this software for any
    8  * purpose with or without fee is hereby granted, provided that the above
    9  * copyright notice and this permission notice appear in all copies.
   10  *
   11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   15  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
   16  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
   17  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   18  */
   19 /*
   20  *
   21  * Copyright (c) 1997 Charles D. Cranor and Washington University.
   22  * All rights reserved.
   23  *
   24  * Redistribution and use in source and binary forms, with or without
   25  * modification, are permitted provided that the following conditions
   26  * are met:
   27  * 1. Redistributions of source code must retain the above copyright
   28  *    notice, this list of conditions and the following disclaimer.
   29  * 2. Redistributions in binary form must reproduce the above copyright
   30  *    notice, this list of conditions and the following disclaimer in the
   31  *    documentation and/or other materials provided with the distribution.
   32  * 3. All advertising materials mentioning features or use of this software
   33  *    must display the following acknowledgement:
   34  *      This product includes software developed by Charles D. Cranor and
   35  *      Washington University.
   36  * 4. The name of the author may not be used to endorse or promote products
   37  *    derived from this software without specific prior written permission.
   38  *
   39  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   40  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   41  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   42  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   43  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   44  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   45  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   46  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   47  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   48  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   49  *
   50  *      from OpenBSD: pmap.c,v 1.85 2005/11/18 17:05:04 brad Exp
   51  */
   52 /*
   53  * pmap.c: i386 pmap module rewrite
   54  * Chuck Cranor <chuck@ccrc.wustl.edu>
   55  * 11-Aug-97
   56  *
   57  * history of this pmap module: in addition to my own input, i used
   58  *    the following references for this rewrite of the i386 pmap:
   59  *
   60  * [1] the NetBSD i386 pmap.   this pmap appears to be based on the
   61  *     BSD hp300 pmap done by Mike Hibler at University of Utah.
   62  *     it was then ported to the i386 by William Jolitz of UUNET
   63  *     Technologies, Inc.   Then Charles M. Hannum of the NetBSD
   64  *     project fixed some bugs and provided some speed ups.
   65  *
   66  * [2] the FreeBSD i386 pmap.   this pmap seems to be the
   67  *     Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
   68  *     and David Greenman.
   69  *
   70  * [3] the Mach pmap.   this pmap, from CMU, seems to have migrated
   71  *     between several processors.   the VAX version was done by
   72  *     Avadis Tevanian, Jr., and Michael Wayne Young.    the i386
   73  *     version was done by Lance Berc, Mike Kupfer, Bob Baron,
   74  *     David Golub, and Richard Draves.    the alpha version was
   75  *     done by Alessandro Forin (CMU/Mach) and Chris Demetriou
   76  *     (NetBSD/alpha).
   77  */
   78 /*
   79  * PAE support
   80  * Michael Shalayeff <mickey@lucifier.net>
   81  *
   82  * This module implements PAE mode for i386.
   83  *
   84  */
   85 
   86 #include <sys/param.h>
   87 #include <sys/systm.h>
   88 #include <sys/proc.h>
   89 #include <sys/malloc.h>
   90 #include <sys/pool.h>
   91 #include <sys/user.h>
   92 #include <sys/kernel.h>
   93 #include <sys/mutex.h>
   94 
   95 #include <uvm/uvm.h>
   96 
   97 #include <machine/atomic.h>
   98 #include <machine/cpu.h>
   99 #include <machine/specialreg.h>
  100 #include <machine/gdt.h>
  101 
  102 #include <dev/isa/isareg.h>
  103 #include <sys/msgbuf.h>
  104 #include <stand/boot/bootarg.h>
  105 
  106 /*
  107  * this file contains the code for the "pmap module."   the module's
  108  * job is to manage the hardware's virtual to physical address mappings.
  109  * note that there are two levels of mapping in the VM system:
  110  *
  111  *  [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
  112  *      to map ranges of virtual address space to objects/files.  for
  113  *      example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
  114  *      to the file /bin/ls starting at offset zero."   note that
  115  *      the upper layer mapping is not concerned with how individual
  116  *      vm_pages are mapped.
  117  *
  118  *  [2] the lower layer of the VM system (the pmap) maintains the mappings
  119  *      from virtual addresses.   it is concerned with which vm_page is
  120  *      mapped where.   for example, when you run /bin/ls and start
  121  *      at page 0x1000 the fault routine may lookup the correct page
  122  *      of the /bin/ls file and then ask the pmap layer to establish
  123  *      a mapping for it.
  124  *
  125  * note that information in the lower layer of the VM system can be
  126  * thrown away since it can easily be reconstructed from the info
  127  * in the upper layer.
  128  *
  129  * data structures we use include:
  130  *
  131  *  - struct pmap: describes the address space of one thread
  132  *  - struct pv_entry: describes one <PMAP,VA> mapping of a PA
  133  *  - struct pv_head: there is one pv_head per managed page of
  134  *      physical memory.   the pv_head points to a list of pv_entry
  135  *      structures which describe all the <PMAP,VA> pairs that this
  136  *      page is mapped in.    this is critical for page based operations
  137  *      such as pmap_page_protect() [change protection on _all_ mappings
  138  *      of a page]
  139  *  - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
  140  *      if we run out of pv_entry's we allocate a new pv_page and free
  141  *      its pv_entrys.
  142  */
  143 /*
  144  * i386 PAE hardware Page Tables structure:
  145  *
  146  * the i386 PAE Page Table is a three-level PT which maps 4GB of VA.
  147  * the pagesize is 4K (4096 [0x1000] bytes) or 2MB.
  148  *
  149  * the first level table is called "page directory index" and consists
  150  * of 4 page directory index entries (PDIE) each 64 bits in size.
  151  *
  152  * the second level table is called a "page directory" and it contains
  153  * 512 page directory entries (PDEs).   each PDE is
  154  * 8 bytes (a long long), so a PD fits in a single 4K page.   this page is
  155  * the page directory page (PDP).  each PDE in a PDP maps 1GB of space
  156  * (512 * 2MB = 1GB).   a PDE contains the physical address of the
  157  * second level table: the page table.   or, if 2MB pages are being used,
  158  * then the PDE contains the PA of the 2MB page being mapped.
  159  *
  160  * a page table consists of 512 page table entries (PTEs).  each PTE is
  161  * 8 bytes (a long long), so a page table also fits in a single 4K page.
  162  * a 4K page being used as a page table is called a page table page (PTP).
  163  * each PTE in a PTP maps one 4K page (512 * 4K = 2MB).   a PTE contains
  164  * the physical address of the page it maps and some flag bits (described
  165  * below).
  166  *
  167  * the processor has a special register, "cr3", which points to the
  168  * the PDP which is currently controlling the mappings of the virtual
  169  * address space.
  170  *
  171  * the following picture shows the translation process for a 4K page:
  172  *
  173  * %cr3 register [PA of PDPT]
  174  *  |
  175  *  |  bits <31-30> of VA
  176  *  |  index the DPE (0-3)
  177  *  |        |
  178  *  v        v
  179  *  +-----------+
  180  *  |  PDP Ptr  |
  181  *  | 4 entries |
  182  *  +-----------+
  183  *       |
  184  *    PA of PDP
  185  *       |
  186  *       |
  187  *       |  bits <29-21> of VA       bits <20-12> of VA   bits <11-0>
  188  *       |  index the PDP (0 - 512)  index the PTP        are the page offset
  189  *       |        |                         |                    |
  190  *       |        v                         |                    |
  191  *       +-->+---------+                    |                    |
  192  *           | PD Page |    PA of           v                    |
  193  *           |         |-----PTP----->+------------+             |
  194  *           | 512 PDE |              | page table |--PTE--+     |
  195  *           | entries |              | (aka PTP)  |       |     |
  196  *           +---------+              |  512 PTE   |       |     |
  197  *                                    |  entries   |       |     |
  198  *                                    +------------+       |     |
  199  *                                                         |     |
  200  *                                              bits <35-12>   bits <11-0>
  201  *                                               p h y s i c a l  a d d r
  202  *
  203  * the i386 caches PTEs in a TLB.   it is important to flush out old
  204  * TLB mappings when making a change to a mappings.   writing to the
  205  * %cr3 will flush the entire TLB.    newer processors also have an
  206  * instruction that will invalidate the mapping of a single page (which
  207  * is useful if you are changing a single mappings because it preserves
  208  * all the cached TLB entries).
  209  *
  210  * as shows, bits 31-12 of the PTE contain PA of the page being mapped.
  211  * the rest of the PTE is defined as follows:
  212  *   bit#       name    use
  213  *   63         NX      no-execute bit (0=ITLB, 1=DTLB), optional
  214  *   11         n/a     available for OS use, hardware ignores it
  215  *   10         n/a     available for OS use, hardware ignores it
  216  *   9          n/a     available for OS use, hardware ignores it
  217  *   8          G       global bit (see discussion below)
  218  *   7          PS      page size [for PDEs] (0=4k, 1=4M <if supported>)
  219  *   6          D       dirty (modified) page
  220  *   5          A       accessed (referenced) page
  221  *   4          PCD     cache disable
  222  *   3          PWT     prevent write through (cache)
  223  *   2          U/S     user/supervisor bit (0=supervisor only, 1=both u&s)
  224  *   1          R/W     read/write bit (0=read only, 1=read-write)
  225  *   0          P       present (valid)
  226  *
  227  * notes:
  228  *  - on the i386 the R/W bit is ignored if processor is in supervisor
  229  *    state (bug!)
  230  *  - PS is only supported on newer processors
  231  *  - PTEs with the G bit are global in the sense that they are not
  232  *    flushed from the TLB when %cr3 is written (to flush, use the
  233  *    "flush single page" instruction).   this is only supported on
  234  *    newer processors.    this bit can be used to keep the kernel's
  235  *    TLB entries around while context switching.   since the kernel
  236  *    is mapped into all processes at the same place it does not make
  237  *    sense to flush these entries when switching from one process'
  238  *    pmap to another.
  239  */
  240 /*
  241  * A pmap describes a process' 4GB virtual address space.  This
  242  * virtual address space can be broken up into 2048 2MB regions which
  243  * are described by PDEs in the PDP.  The PDEs are defined as follows:
  244  *
  245  * Ranges are inclusive -> exclusive, just like vm_map_entry start/end.
  246  * The following assumes that KERNBASE is 0xd0000000.
  247  *
  248  * PDE#s        VA range                Usage
  249  * 0->1660      0x0 -> 0xcf800000       user address space, note that the
  250  *                                      max user address is 0xcfbfe000
  251  *                                      the final two pages in the last 4MB
  252  *                                      used to be reserved for the UAREA
  253  *                                      but now are no longer used.
  254  * 1660         0xcf800000->            recursive mapping of PDP (used for
  255  *                      0xd0000000      linear mapping of PTPs).
  256  * 1664->2044   0xd0000000->            kernel address space (constant
  257  *                      0xff800000      across all pmaps/processes).
  258  * 2044         0xff800000->            "alternate" recursive PDP mapping
  259  *                      <end>           (for other pmaps).
  260  *
  261  *
  262  * Note: A recursive PDP mapping provides a way to map all the PTEs for
  263  * a 4GB address space into a linear chunk of virtual memory.  In other
  264  * words, the PTE for page 0 is the first int mapped into the 2MB recursive
  265  * area.  The PTE for page 1 is the second int.  The very last int in the
  266  * 2MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB
  267  * address).
  268  *
  269  * All pmaps' PDs must have the same values in slots 1660->2043 so that
  270  * the kernel is always mapped in every process.  These values are loaded
  271  * into the PD at pmap creation time.
  272  *
  273  * At any one time only one pmap can be active on a processor.  This is
  274  * the pmap whose PDP is pointed to by processor register %cr3.  This pmap
  275  * will have all its PTEs mapped into memory at the recursive mapping
  276  * point (slots #1660-3 as show above).  When the pmap code wants to find the
  277  * PTE for a virtual address, all it has to do is the following:
  278  *
  279  * Address of PTE = (1660 * 2MB) + (VA / NBPG) * sizeof(pt_entry_t)
  280  *                = 0xcf800000 + (VA / 4096) * 8
  281  *
  282  * What happens if the pmap layer is asked to perform an operation
  283  * on a pmap that is not the one which is currently active?  In that
  284  * case we take the PA of the PDP of non-active pmap and put it in
  285  * slots 2044-7 of the active pmap.  This causes the non-active pmap's
  286  * PTEs to get mapped in the final 4MB of the 4GB address space
  287  * (e.g. starting at 0xffc00000).
  288  *
  289  * The following figure shows the effects of the recursive PDP mapping:
  290  *
  291  *   PDP (%cr3->PDPTP)
  292  *   +----+
  293  *   |   0| -> PTP#0 that maps VA 0x0 -> 0x200000
  294  *   |    |
  295  *   |    |
  296  *   |1660| -> points back to PDP (%cr3) mapping VA 0xcf800000 -> 0xd0000000
  297  *   |1661|    (PDP is 4 pages)
  298  *   |1662|
  299  *   |1663|
  300  *   |1664| -> first kernel PTP (maps 0xd0000000 -> 0xe0200000)
  301  *   |    |
  302  *   |2044| -> points to alternate pmap's PDP (maps 0xff800000 -> end)
  303  *   |2045|
  304  *   |2046|
  305  *   |2047|
  306  *   +----+
  307  *
  308  * Note that the PDE#1660 VA (0xcf8033e0) is defined as "PTE_BASE".
  309  * Note that the PDE#2044 VA (0xff803fe0) is defined as "APTE_BASE".
  310  *
  311  * Starting at VA 0xcf8033e0 the current active PDPs (%cr3) acts as a
  312  * PDPTP and references four consequetly mapped pages:
  313  *
  314  * PTP#1660-3 == PDP(%cr3) => maps VA 0xcf800000 -> 0xd0000000
  315  *   +----+
  316  *   |   0| -> maps the contents of PTP#0 at VA 0xcf800000->0xcf801000
  317  *   |    |
  318  *   |    |
  319  *   |1660| -> maps the contents of PTP#1660 (the PDP) at VA 0xcfe7c000
  320  *   |1661|
  321  *   |1662|
  322  *   |1663|
  323  *   |1664| -> maps the contents of first kernel PTP
  324  *   |    |
  325  *   |2047|
  326  *   +----+
  327  *
  328  * Note that mapping of the PDP at PTP#1660's VA (0xcfe7c000) is
  329  * defined as "PDP_BASE".... within that mapping there are two
  330  * defines:
  331  *   "PDP_PDE" (0xcfe7f3e0) is the VA of the PDE in the PDP
  332  *      which points back to itself.
  333  *   "APDP_PDE" (0xfff02fe0) is the VA of the PDE in the PDP which
  334  *      establishes the recursive mapping of the alternate pmap.
  335  *      To set the alternate PDP, one just has to put the correct
  336  *      PA info in *APDP_PDE.
  337  *
  338  * Note that in the APTE_BASE space, the APDP appears at VA
  339  * "APDP_BASE" (0xffffc000).
  340  *
  341  * unfortunately we cannot use recursive PDPT from the page tables
  342  * because in their infinite wisdom they have defined cr3 32 bits!
  343  *
  344  */
  345 /*
  346  * memory allocation
  347  *
  348  *  - there are three data structures that we must dynamically allocate:
  349  *
  350  * [A] new process' page directory page (PDP)
  351  *      - plan 1: done at pmap_create() we use
  352  *        uvm_km_alloc(kernel_map, PAGE_SIZE)  [fka kmem_alloc] to do this
  353  *        allocation.
  354  *
  355  * if we are low in free physical memory then we sleep in
  356  * uvm_km_alloc -- in this case this is ok since we are creating
  357  * a new pmap and should not be holding any locks.
  358  *
  359  * if the kernel is totally out of virtual space
  360  * (i.e. uvm_km_alloc returns NULL), then we panic.
  361  *
  362  * XXX: the fork code currently has no way to return an "out of
  363  * memory, try again" error code since uvm_fork [fka vm_fork]
  364  * is a void function.
  365  *
  366  * [B] new page tables pages (PTP)
  367  *      call pae_pagealloc()
  368  *              => success: zero page, add to pm_pdir
  369  *              => failure: we are out of free vm_pages, let pmap_enter()
  370  *                 tell UVM about it.
  371  *
  372  * note: for kernel PTPs, we start with NKPTP of them.   as we map
  373  * kernel memory (at uvm_map time) we check to see if we've grown
  374  * the kernel pmap.   if so, we call the optional function
  375  * pmap_growkernel() to grow the kernel PTPs in advance.
  376  *
  377  * [C] pv_entry structures
  378  *      - plan 1: try to allocate one off the free list
  379  *              => success: done!
  380  *              => failure: no more free pv_entrys on the list
  381  *      - plan 2: try to allocate a new pv_page to add a chunk of
  382  *      pv_entrys to the free list
  383  *              [a] obtain a free, unmapped, VA in kmem_map.  either
  384  *              we have one saved from a previous call, or we allocate
  385  *              one now using a "vm_map_lock_try" in uvm_map
  386  *              => success: we have an unmapped VA, continue to [b]
  387  *              => failure: unable to lock kmem_map or out of VA in it.
  388  *                      move on to plan 3.
  389  *              [b] allocate a page in kmem_object for the VA
  390  *              => success: map it in, free the pv_entry's, DONE!
  391  *              => failure: kmem_object locked, no free vm_pages, etc.
  392  *                      save VA for later call to [a], go to plan 3.
  393  *      If we fail, we simply let pmap_enter() tell UVM about it.
  394  */
  395 /*
  396  * locking
  397  *
  398  * we have the following locks that we must contend with:
  399  *
  400  * "simple" locks:
  401  *
  402  * - pmap lock (per pmap, part of uvm_object)
  403  *   this lock protects the fields in the pmap structure including
  404  *   the non-kernel PDEs in the PDP, and the PTEs.  it also locks
  405  *   in the alternate PTE space (since that is determined by the
  406  *   entry in the PDP).
  407  *
  408  * - pvh_lock (per pv_head)
  409  *   this lock protects the pv_entry list which is chained off the
  410  *   pv_head structure for a specific managed PA.   it is locked
  411  *   when traversing the list (e.g. adding/removing mappings,
  412  *   syncing R/M bits, etc.)
  413  *
  414  * - pvalloc_lock
  415  *   this lock protects the data structures which are used to manage
  416  *   the free list of pv_entry structures.
  417  *
  418  * - pmaps_lock
  419  *   this lock protects the list of active pmaps (headed by "pmaps").
  420  *   we lock it when adding or removing pmaps from this list.
  421  *
  422  */
  423 
  424 /*
  425  * locking data structures
  426  */
  427 
  428 #define PMAP_MAP_TO_HEAD_LOCK()         /* null */
  429 #define PMAP_MAP_TO_HEAD_UNLOCK()       /* null */
  430 
  431 #define PMAP_HEAD_TO_MAP_LOCK()         /* null */
  432 #define PMAP_HEAD_TO_MAP_UNLOCK()       /* null */
  433 
  434 #define PG_FRAME        0xffffff000ULL  /* page frame mask */
  435 #define PG_LGFRAME      0xfffe00000ULL  /* large (2M) page frame mask */
  436 
  437 /*
  438  * Redefine the PDSHIFT, NBPD
  439  */
  440 #undef  PDSHIFT
  441 #define PD_MASK         0xffe00000      /* page directory address bits */
  442 #define PDSHIFT         21              /* page directory address shift */
  443 #define PT_MASK         0x001ff000      /* page table address bits */
  444 #undef  NBPD
  445 #define NBPD            (1U << PDSHIFT) /* # bytes mapped by PD (2MB) */
  446 
  447 /*
  448  *
  449  */
  450 #undef  PDSLOT_PTE
  451 #define PDSLOT_PTE      (1660U) /* 1660: for recursive PDP map */
  452 #undef  PDSLOT_KERN
  453 #define PDSLOT_KERN     (1664U) /* 1664: start of kernel space */
  454 #undef  PDSLOT_APTE
  455 #define PDSLOT_APTE     (2044U) /* 2044: alternative recursive slot */
  456 
  457 /*
  458  * The following defines give the virtual addresses of various MMU
  459  * data structures:
  460  * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings
  461  * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD
  462  * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP
  463  */
  464 #define PTE_BASE        ((pt_entry_t *) (PDSLOT_PTE * NBPD) )
  465 #define APTE_BASE       ((pt_entry_t *) (PDSLOT_APTE * NBPD) )
  466 #define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG)))
  467 #define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG)))
  468 #define PDP_PDE         (PDP_BASE + PDSLOT_PTE)
  469 #define APDP_PDE        (PDP_BASE + PDSLOT_APTE)
  470 
  471 #define PTES_PER_PTP    (NBPG / sizeof(pt_entry_t))  /* # of PTEs in a PTP */
  472 
  473 /*
  474  * various address macros
  475  *
  476  *  vtopte: return a pointer to the PTE mapping a VA
  477  *
  478  */
  479 #define vtopte(VA)      (PTE_BASE + atop((vaddr_t)VA))
  480 
  481 /*
  482  * pdei/ptei: generate index into PDP/PTP from a VA
  483  */
  484 #define pdei(VA)        (((VA) & PD_MASK) >> PDSHIFT)
  485 #define ptei(VA)        (((VA) & PT_MASK) >> PGSHIFT)
  486 
  487 /*
  488  * Mach derived conversion macros
  489  */
  490 #define i386_round_pdr(x)       ((((unsigned)(x)) + ~PD_MASK) & PD_MASK)
  491 
  492 /*
  493  * PTP macros:
  494  *   A PTP's index is the PD index of the PDE that points to it.
  495  *   A PTP's offset is the byte-offset in the PTE space that this PTP is at.
  496  *   A PTP's VA is the first VA mapped by that PTP.
  497  *
  498  * Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries)
  499  *           NBPD == number of bytes a PTP can map (4MB)
  500  */
  501 
  502 #define ptp_i2o(I)      ((I) * NBPG)    /* index => offset */
  503 #define ptp_o2i(O)      ((O) / NBPG)    /* offset => index */
  504 #define ptp_i2v(I)      ((I) * NBPD)    /* index => VA */
  505 #define ptp_v2i(V)      ((V) / NBPD)    /* VA => index (same as pdei) */
  506 
  507 /*
  508  * Access PD and PT
  509  */
  510 #define PDE(pm,i)       (((pd_entry_t *)(pm)->pm_pdir)[(i)])
  511 
  512 /*
  513  * here we define the data types for PDEs and PTEs
  514  */
  515 typedef u_int64_t pd_entry_t;   /* PDE */
  516 typedef u_int64_t pt_entry_t;   /* PTE */
  517 
  518 /*
  519  * Number of PTE's per cache line. 8 byte pte, 32-byte cache line
  520  * Used to avoid false sharing of cache lines.
  521  */
  522 #define NPTECL          4
  523 
  524 /*
  525  * other data structures
  526  */
  527 
  528 extern u_int32_t protection_codes[];    /* maps MI prot to i386 prot code */
  529 extern boolean_t pmap_initialized;      /* pmap_init done yet? */
  530 
  531 /*
  532  * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
  533  * I386_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing
  534  * due to false sharing.
  535  */
  536 
  537 #ifdef MULTIPROCESSOR
  538 #define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
  539 #define VASLEW(va,id) ((va)+(id)*NPTECL*NBPG)
  540 #else
  541 #define PTESLEW(pte, id) (pte)
  542 #define VASLEW(va,id) (va)
  543 #endif
  544 
  545 /*
  546  * special VAs and the PTEs that map them
  547  */
  548 
  549 static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
  550 extern caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp;
  551 
  552 extern int pmap_pg_g;
  553 extern struct pmap_head pmaps;
  554 
  555 /*
  556  * a towards larger memory prioritised version opf uvm_pagealloc()
  557  */
  558 #define pae_pagealloc(obj, off, anon, flags) \
  559     uvm_pagealloc_strat((obj), (off), (anon), (flags), \
  560         UVM_PGA_STRAT_FALLBACK, VM_FREELIST_ABOVE4G)
  561 
  562 /*
  563  * local prototypes
  564  */
  565 
  566 struct vm_page  *pmap_alloc_ptp_pae(struct pmap *, int, boolean_t);
  567 #define ALLOCPV_NEED    0       /* need PV now */
  568 #define ALLOCPV_TRY     1       /* just try to allocate, don't steal */
  569 #define ALLOCPV_NONEED  2       /* don't need PV, just growing cache */
  570 struct vm_page  *pmap_get_ptp_pae(struct pmap *, int, boolean_t);
  571 pt_entry_t      *pmap_map_ptes_pae(struct pmap *);
  572 void             pmap_remove_ptes_pae(struct pmap *, struct vm_page *,
  573                      vaddr_t, vaddr_t, vaddr_t, int32_t *);
  574 boolean_t        pmap_remove_pte_pae(struct pmap *, struct vm_page *,
  575                      pt_entry_t *, vaddr_t, int32_t *);
  576 void             pmap_unmap_ptes_pae(struct pmap *);
  577 vaddr_t          pmap_tmpmap_pa_pae(paddr_t);
  578 pt_entry_t      *pmap_tmpmap_pvepte_pae(struct pv_entry *);
  579 void             pmap_tmpunmap_pa_pae(void);
  580 void             pmap_tmpunmap_pvepte_pae(struct pv_entry *);
  581 
  582 /*
  583  * pmap_tmpmap_pa: map a page in for tmp usage
  584  */
  585 
  586 vaddr_t
  587 pmap_tmpmap_pa_pae(paddr_t pa)
  588 {
  589 #ifdef MULTIPROCESSOR
  590         int id = cpu_number();
  591 #endif
  592         pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
  593         caddr_t ptpva = VASLEW(pmap_ptpp, id);
  594 #if defined(DIAGNOSTIC)
  595         if (*ptpte)
  596                 panic("pmap_tmpmap_pa: ptp_pte in use?");
  597 #endif
  598         *ptpte = PG_V | PG_RW | pa;             /* always a new mapping */
  599         return((vaddr_t)ptpva);
  600 }
  601 
  602 /*
  603  * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
  604  */
  605 
  606 void
  607 pmap_tmpunmap_pa_pae()
  608 {
  609 #ifdef MULTIPROCESSOR
  610         int id = cpu_number();
  611 #endif
  612         pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
  613         caddr_t ptpva = VASLEW(pmap_ptpp, id);
  614 #if defined(DIAGNOSTIC)
  615         if (!pmap_valid_entry(*ptpte))
  616                 panic("pmap_tmpunmap_pa: our pte invalid?");
  617 #endif
  618         *ptpte = 0;             /* zap! */
  619         pmap_update_pg((vaddr_t)ptpva);
  620 #ifdef MULTIPROCESSOR
  621         /*
  622          * No need for tlb shootdown here, since ptp_pte is per-CPU.
  623          */
  624 #endif
  625 }
  626 
  627 /*
  628  * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
  629  *
  630  * => do NOT use this on kernel mappings [why?  because pv_ptp may be NULL]
  631  */
  632 
  633 pt_entry_t *
  634 pmap_tmpmap_pvepte_pae(struct pv_entry *pve)
  635 {
  636 #ifdef DIAGNOSTIC
  637         if (pve->pv_pmap == pmap_kernel())
  638                 panic("pmap_tmpmap_pvepte: attempt to map kernel");
  639 #endif
  640 
  641         /* is it current pmap?  use direct mapping... */
  642         if (pmap_is_curpmap(pve->pv_pmap))
  643                 return(vtopte(pve->pv_va));
  644 
  645         return(((pt_entry_t *)pmap_tmpmap_pa_pae(VM_PAGE_TO_PHYS(pve->pv_ptp)))
  646                + ptei((unsigned)pve->pv_va));
  647 }
  648 
  649 /*
  650  * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
  651  */
  652 
  653 void
  654 pmap_tmpunmap_pvepte_pae(struct pv_entry *pve)
  655 {
  656         /* was it current pmap?   if so, return */
  657         if (pmap_is_curpmap(pve->pv_pmap))
  658                 return;
  659 
  660         pmap_tmpunmap_pa_pae();
  661 }
  662 
  663 /*
  664  * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
  665  *
  666  * => we lock enough pmaps to keep things locked in
  667  * => must be undone with pmap_unmap_ptes before returning
  668  */
  669 
  670 pt_entry_t *
  671 pmap_map_ptes_pae(struct pmap *pmap)
  672 {
  673         pd_entry_t opde;
  674 
  675         /* the kernel's pmap is always accessible */
  676         if (pmap == pmap_kernel()) {
  677                 return(PTE_BASE);
  678         }
  679 
  680         /* if curpmap then we are always mapped */
  681         if (pmap_is_curpmap(pmap)) {
  682                 simple_lock(&pmap->pm_obj.vmobjlock);
  683                 return(PTE_BASE);
  684         }
  685 
  686         /* need to lock both curpmap and pmap: use ordered locking */
  687         if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) {
  688                 simple_lock(&pmap->pm_obj.vmobjlock);
  689                 simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
  690         } else {
  691                 simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
  692                 simple_lock(&pmap->pm_obj.vmobjlock);
  693         }
  694 
  695         /* need to load a new alternate pt space into curpmap? */
  696         opde = *APDP_PDE;
  697         if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdidx[0]) {
  698                 APDP_PDE[0] = pmap->pm_pdidx[0] | PG_RW | PG_V;
  699                 APDP_PDE[1] = pmap->pm_pdidx[1] | PG_RW | PG_V;
  700                 APDP_PDE[2] = pmap->pm_pdidx[2] | PG_RW | PG_V;
  701                 APDP_PDE[3] = pmap->pm_pdidx[3] | PG_RW | PG_V;
  702                 if (pmap_valid_entry(opde))
  703                         pmap_apte_flush(curpcb->pcb_pmap);
  704         }
  705         return(APTE_BASE);
  706 }
  707 
  708 /*
  709  * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
  710  */
  711 
  712 void
  713 pmap_unmap_ptes_pae(struct pmap *pmap)
  714 {
  715         if (pmap == pmap_kernel())
  716                 return;
  717 
  718         if (pmap_is_curpmap(pmap)) {
  719                 simple_unlock(&pmap->pm_obj.vmobjlock);
  720         } else {
  721 #if defined(MULTIPROCESSOR)
  722                 APDP_PDE[0] = 0;
  723                 APDP_PDE[1] = 0;
  724                 APDP_PDE[2] = 0;
  725                 APDP_PDE[3] = 0;
  726                 pmap_apte_flush(curpcb->pcb_pmap);
  727 #endif
  728                 simple_unlock(&pmap->pm_obj.vmobjlock);
  729                 simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
  730         }
  731 }
  732 
  733 u_int32_t
  734 pmap_pte_set_pae(vaddr_t va, paddr_t pa, u_int32_t bits)
  735 {
  736         pt_entry_t pte, *ptep = vtopte(va);
  737 
  738         pte = i386_atomic_testset_uq(ptep, pa | bits);
  739         return (pte & ~PG_FRAME);
  740 }
  741 
  742 u_int32_t
  743 pmap_pte_setbits_pae(vaddr_t va, u_int32_t set, u_int32_t clr)
  744 {
  745         pt_entry_t *ptep = vtopte(va);
  746         pt_entry_t pte = *ptep;
  747 
  748         i386_atomic_testset_uq(ptep, (pte | set) & ~(pt_entry_t)clr);
  749         return (pte & ~PG_FRAME);
  750 
  751 }
  752 
  753 u_int32_t
  754 pmap_pte_bits_pae(vaddr_t va)
  755 {
  756         pt_entry_t *ptep = vtopte(va);
  757 
  758         return (*ptep & ~PG_FRAME);
  759 }
  760 
  761 paddr_t
  762 pmap_pte_paddr_pae(vaddr_t va)
  763 {
  764         pt_entry_t *ptep = vtopte(va);
  765 
  766         return (*ptep & PG_FRAME);
  767 }
  768 
  769 /*
  770  * Switch over to PAE page tables
  771  */
  772 void
  773 pmap_bootstrap_pae()
  774 {
  775         extern paddr_t avail_end, avail_end2;
  776         extern int cpu_pae, nkpde;
  777         struct pmap *kpm = pmap_kernel();
  778         struct vm_page *ptp;
  779         paddr_t ptaddr;
  780         u_int32_t bits;
  781         vaddr_t va, eva;
  782         int i, pn, pe;
  783 
  784         if (!cpu_pae || avail_end >= avail_end2 || !(cpu_feature & CPUID_PAE)){
  785                 avail_end2 = avail_end;
  786                 return;
  787         }
  788 
  789         va = (vaddr_t)kpm->pm_pdir;
  790         kpm->pm_pdidx[0] = (va + 0*NBPG - KERNBASE) | PG_V;
  791         kpm->pm_pdidx[1] = (va + 1*NBPG - KERNBASE) | PG_V;
  792         kpm->pm_pdidx[2] = (va + 2*NBPG - KERNBASE) | PG_V;
  793         kpm->pm_pdidx[3] = (va + 3*NBPG - KERNBASE) | PG_V;
  794         /* map pde recursively into itself */
  795         PDE(kpm, PDSLOT_PTE+0) = kpm->pm_pdidx[0] | PG_KW;
  796         PDE(kpm, PDSLOT_PTE+1) = kpm->pm_pdidx[1] | PG_KW;
  797         PDE(kpm, PDSLOT_PTE+2) = kpm->pm_pdidx[2] | PG_KW;
  798         PDE(kpm, PDSLOT_PTE+3) = kpm->pm_pdidx[3] | PG_KW;
  799 
  800         /* transfer all kernel mappings over into pae tables */
  801         for (va = KERNBASE, eva = va + (nkpde << 22);
  802             va < eva; va += PAGE_SIZE) {
  803                 if (!pmap_valid_entry(PDE(kpm, pdei(va)))) {
  804                         ptp = pae_pagealloc(&kpm->pm_obj, va, NULL,
  805                             UVM_PGA_ZERO);
  806                         ptaddr = VM_PAGE_TO_PHYS(ptp);
  807                         PDE(kpm, pdei(va)) = ptaddr | PG_KW | PG_V;
  808                         pmap_pte_set_86((vaddr_t)vtopte(va),
  809                             ptaddr, PG_KW | PG_V);
  810 
  811                         /* count PTP as resident */
  812                         kpm->pm_stats.resident_count++;
  813                 }
  814                 bits = pmap_pte_bits_86(va) | pmap_pg_g;
  815                 if (pmap_valid_entry(bits))
  816                         pmap_pte_set_pae(va, pmap_pte_paddr_86(va), bits);
  817         }
  818 
  819         if (!cpu_paenable(&kpm->pm_pdidx[0])) {
  820                 extern struct user *proc0paddr;
  821 
  822                 proc0paddr->u_pcb.pcb_cr3 = kpm->pm_pdirpa =
  823                     (vaddr_t)kpm - KERNBASE;
  824                 kpm->pm_pdirsize = 4 * NBPG;
  825 
  826                 csrc_pte = vtopte(pmap_csrcp);
  827                 cdst_pte = vtopte(pmap_cdstp);
  828                 zero_pte = vtopte(pmap_zerop);
  829                 ptp_pte  = vtopte(pmap_ptpp);
  830 
  831                 nkpde *= 2;
  832                 nkptp_max = 2048 - PDSLOT_KERN - 4;
  833                 vm_max_address = (PDSLOT_PTE << PDSHIFT) +
  834                     (PDSLOT_PTE << PGSHIFT);
  835 
  836                 pmap_pte_set_p = pmap_pte_set_pae;
  837                 pmap_pte_setbits_p = pmap_pte_setbits_pae;
  838                 pmap_pte_bits_p = pmap_pte_bits_pae;
  839                 pmap_pte_paddr_p = pmap_pte_paddr_pae;
  840                 pmap_change_attrs_p = pmap_change_attrs_pae;
  841                 pmap_enter_p = pmap_enter_pae;
  842                 pmap_extract_p = pmap_extract_pae;
  843                 pmap_growkernel_p = pmap_growkernel_pae;
  844                 pmap_page_remove_p = pmap_page_remove_pae;
  845                 pmap_remove_p = pmap_remove_pae;
  846                 pmap_test_attrs_p = pmap_test_attrs_pae;
  847                 pmap_unwire_p = pmap_unwire_pae;
  848                 pmap_write_protect_p = pmap_write_protect_pae;
  849                 pmap_pinit_pd_p = pmap_pinit_pd_pae;
  850                 pmap_zero_phys_p = pmap_zero_phys_pae;
  851                 pmap_zero_page_uncached_p = pmap_zero_page_uncached_pae;
  852                 pmap_copy_page_p = pmap_copy_page_pae;
  853                 pmap_try_steal_pv_p = pmap_try_steal_pv_pae;
  854 
  855                 bzero((void *)kpm->pm_pdir + 8, (PDSLOT_PTE-1) * 8);
  856                 /* TODO also reclaim old PDPs */
  857                 for (i = 0; i < vm_nphysseg; i++)
  858                         if (vm_physmem[i].start > atop(0xfffff000)) {
  859                                 vm_physmem[i].avail_end = vm_physmem[i].end;
  860                                 /* free vm_pages (uvm had already zeroed 'em) */
  861                                 for (pn = 0, pe = vm_physmem[i].end -
  862                                     vm_physmem[i].start; pn < pe ; pn++) {
  863                                         uvmexp.npages++;
  864                                         /* add page to free pool */
  865                                         uvm_pagefree(&vm_physmem[i].pgs[pn]);
  866                                 }
  867 
  868                         }
  869                 uvm_page_rehash();
  870         }
  871 }
  872 
  873 /*
  874  * p v _ e n t r y   f u n c t i o n s
  875  */
  876 
  877 /*
  878  * pv_entry allocation functions:
  879  *   the main pv_entry allocation functions are:
  880  *     pmap_alloc_pv: allocate a pv_entry structure
  881  *     pmap_free_pv: free one pv_entry
  882  *     pmap_free_pvs: free a list of pv_entrys
  883  *
  884  * the rest are helper functions
  885  */
  886 
  887 /*
  888  * pmap_try_steal_pv: try and steal a pv_entry from a pmap
  889  *
  890  * => return true if we did it!
  891  */
  892 
  893 boolean_t
  894 pmap_try_steal_pv_pae(struct pv_head *pvh, struct pv_entry *cpv,
  895     struct pv_entry *prevpv)
  896 {
  897         pt_entry_t *ptep, opte;
  898 #ifdef MULTIPROCESSOR
  899         int32_t cpumask = 0;
  900 #endif
  901 
  902         /*
  903          * we never steal kernel mappings or mappings from pmaps we can't lock
  904          */
  905 
  906         if (cpv->pv_pmap == pmap_kernel() ||
  907             !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock))
  908                 return(FALSE);
  909 
  910         /*
  911          * yes, we can try and steal it.   first we need to remove the
  912          * mapping from the pmap.
  913          */
  914 
  915         ptep = pmap_tmpmap_pvepte_pae(cpv);
  916         if (*ptep & PG_W) {
  917                 ptep = NULL;    /* wired page, avoid stealing this one */
  918         } else {
  919                 opte = i386_atomic_testset_uq(ptep, 0); /* zap! */
  920 #ifdef MULTIPROCESSOR
  921                 pmap_tlb_shootdown(cpv->pv_pmap, cpv->pv_va, opte, &cpumask);
  922                 pmap_tlb_shootnow(cpumask);
  923 #else
  924                 /* Don't bother deferring in the single CPU case. */
  925                 if (pmap_is_curpmap(cpv->pv_pmap))
  926                         pmap_update_pg(cpv->pv_va);
  927 #endif
  928                 pmap_tmpunmap_pvepte_pae(cpv);
  929         }
  930         if (ptep == NULL) {
  931                 simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock);
  932                 return(FALSE);  /* wired page, abort! */
  933         }
  934         cpv->pv_pmap->pm_stats.resident_count--;
  935         if (cpv->pv_ptp && cpv->pv_ptp->wire_count)
  936                 /* drop PTP's wired count */
  937                 cpv->pv_ptp->wire_count--;
  938 
  939         /*
  940          * XXX: if wire_count goes to one the PTP could be freed, however,
  941          * we'd have to lock the page queues (etc.) to do that and it could
  942          * cause deadlock headaches.   besides, the pmap we just stole from
  943          * may want the mapping back anyway, so leave the PTP around.
  944          */
  945 
  946         /*
  947          * now we need to remove the entry from the pvlist
  948          */
  949 
  950         if (cpv == pvh->pvh_list)
  951                 pvh->pvh_list = cpv->pv_next;
  952         else
  953                 prevpv->pv_next = cpv->pv_next;
  954         return(TRUE);
  955 }
  956 
  957 /*
  958  * p t p   f u n c t i o n s
  959  */
  960 
  961 /*
  962  * pmap_alloc_ptp: allocate a PTP for a PMAP
  963  *
  964  * => pmap should already be locked by caller
  965  * => we use the ptp's wire_count to count the number of active mappings
  966  *      in the PTP (we start it at one to prevent any chance this PTP
  967  *      will ever leak onto the active/inactive queues)
  968  * => we should not be holding any pv_head locks (in case we are forced
  969  *      to call pmap_steal_ptp())
  970  * => we may need to lock pv_head's if we have to steal a PTP
  971  * => just_try: true if we want a PTP, but not enough to steal one
  972  *      from another pmap (e.g. during optional functions like pmap_copy)
  973  */
  974 
  975 struct vm_page *
  976 pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try)
  977 {
  978         struct vm_page *ptp;
  979 
  980         ptp = pae_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
  981                             UVM_PGA_USERESERVE|UVM_PGA_ZERO);
  982         if (ptp == NULL)
  983                 return(NULL);
  984 
  985         /* got one! */
  986         atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
  987         ptp->wire_count = 1;    /* no mappings yet */
  988         PDE(pmap, pde_index) =
  989             (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V);
  990         pmap->pm_stats.resident_count++;        /* count PTP as resident */
  991         pmap->pm_ptphint = ptp;
  992         return(ptp);
  993 }
  994 
  995 /*
  996  * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
  997  *
  998  * => pmap should NOT be pmap_kernel()
  999  * => pmap should be locked
 1000  */
 1001 
 1002 struct vm_page *
 1003 pmap_get_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try)
 1004 {
 1005         struct vm_page *ptp;
 1006 
 1007         if (pmap_valid_entry(PDE(pmap, pde_index))) {
 1008 
 1009                 /* valid... check hint (saves us a PA->PG lookup) */
 1010                 if (pmap->pm_ptphint &&
 1011                     (PDE(pmap, pde_index) & PG_FRAME) ==
 1012                     VM_PAGE_TO_PHYS(pmap->pm_ptphint))
 1013                         return(pmap->pm_ptphint);
 1014 
 1015                 ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
 1016 #ifdef DIAGNOSTIC
 1017                 if (ptp == NULL)
 1018                         panic("pmap_get_ptp: unmanaged user PTP");
 1019 #endif
 1020                 pmap->pm_ptphint = ptp;
 1021                 return(ptp);
 1022         }
 1023 
 1024         /* allocate a new PTP (updates ptphint) */
 1025         return (pmap_alloc_ptp_pae(pmap, pde_index, just_try));
 1026 }
 1027 
 1028 /*
 1029  * pmap_pinit_pd: given a freshly allocated pmap structure, give it a PD
 1030  */
 1031 void
 1032 pmap_pinit_pd_pae(struct pmap *pmap)
 1033 {
 1034         extern int nkpde;
 1035         vaddr_t va;
 1036 
 1037         /* allocate PDP */
 1038         pmap->pm_pdir = uvm_km_alloc(kernel_map, 4 * NBPG);
 1039         if (pmap->pm_pdir == NULL)
 1040                 panic("pmap_pinit: kernel_map out of virtual space!");
 1041         /* page index is in the pmap! */
 1042         pmap_extract(pmap_kernel(), (vaddr_t)pmap, &pmap->pm_pdirpa);
 1043         /* fill out the PDPT entries */
 1044         va = (vaddr_t)pmap->pm_pdir;
 1045         pmap_extract(pmap_kernel(), va + 0*NBPG, &pmap->pm_pdidx[0]);
 1046         pmap_extract(pmap_kernel(), va + 1*NBPG, &pmap->pm_pdidx[1]);
 1047         pmap_extract(pmap_kernel(), va + 2*NBPG, &pmap->pm_pdidx[2]);
 1048         pmap_extract(pmap_kernel(), va + 3*NBPG, &pmap->pm_pdidx[3]);
 1049         pmap->pm_pdidx[0] |= PG_V;
 1050         pmap->pm_pdidx[1] |= PG_V;
 1051         pmap->pm_pdidx[2] |= PG_V;
 1052         pmap->pm_pdidx[3] |= PG_V;
 1053         pmap->pm_pdirsize = 4 * NBPG;
 1054 
 1055         /* init PDP */
 1056         /* zero init area */
 1057         bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
 1058         /* put in recursive PDE to map the PTEs */
 1059         PDE(pmap, PDSLOT_PTE+0) = pmap->pm_pdidx[0] | PG_KW;
 1060         PDE(pmap, PDSLOT_PTE+1) = pmap->pm_pdidx[1] | PG_KW;
 1061         PDE(pmap, PDSLOT_PTE+2) = pmap->pm_pdidx[2] | PG_KW;
 1062         PDE(pmap, PDSLOT_PTE+3) = pmap->pm_pdidx[3] | PG_KW;
 1063 
 1064         /*
 1065          * we need to lock pmaps_lock to prevent nkpde from changing on
 1066          * us.   note that there is no need to splvm to protect us from
 1067          * malloc since malloc allocates out of a submap and we should have
 1068          * already allocated kernel PTPs to cover the range...
 1069          */
 1070         simple_lock(&pmaps_lock);
 1071         /* put in kernel VM PDEs */
 1072         bcopy(&PDP_BASE[PDSLOT_KERN], &PDE(pmap, PDSLOT_KERN),
 1073                nkpde * sizeof(pd_entry_t));
 1074         /* zero the rest */
 1075         bzero(&PDE(pmap, PDSLOT_KERN + nkpde), pmap->pm_pdirsize -
 1076             ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
 1077         LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
 1078         simple_unlock(&pmaps_lock);
 1079 }
 1080 
 1081 /*
 1082  * some misc. functions
 1083  */
 1084 
 1085 /*
 1086  * pmap_extract: extract a PA for the given VA
 1087  */
 1088 
 1089 boolean_t
 1090 pmap_extract_pae(struct pmap *pmap, vaddr_t va, paddr_t *pap)
 1091 {
 1092         paddr_t retval;
 1093         pt_entry_t *ptes;
 1094 
 1095         if (PDE(pmap, pdei(va))) {
 1096                 ptes = pmap_map_ptes_pae(pmap);
 1097                 retval = (paddr_t)(ptes[atop(va)] & PG_FRAME);
 1098                 pmap_unmap_ptes_pae(pmap);
 1099                 if (pap != NULL)
 1100                         *pap = retval | (va & ~PG_FRAME);
 1101                 return (TRUE);
 1102         }
 1103         return (FALSE);
 1104 }
 1105 
 1106 extern void (*pagezero)(void *, size_t);
 1107 
 1108 /*
 1109  * pmap_zero_phys: same as pmap_zero_page, but for use before vm_pages are
 1110  * initialized.
 1111  */
 1112 void
 1113 pmap_zero_phys_pae(paddr_t pa)
 1114 {
 1115 #ifdef MULTIPROCESSOR
 1116         int id = cpu_number();
 1117 #endif
 1118         pt_entry_t *zpte = PTESLEW(zero_pte, id);
 1119         caddr_t zerova = VASLEW(pmap_zerop, id);
 1120 
 1121 #ifdef DIAGNOSTIC
 1122         if (*zpte)
 1123                 panic("pmap_zero_phys: lock botch");
 1124 #endif
 1125         *zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */
 1126         pmap_update_pg((vaddr_t)zerova);        /* flush TLB */
 1127         pagezero(zerova, PAGE_SIZE);            /* zero */
 1128         *zpte = 0;                              /* zap! */
 1129 }
 1130 
 1131 /*
 1132  * pmap_zero_page_uncached: the same, except uncached.
 1133  */
 1134 
 1135 boolean_t
 1136 pmap_zero_page_uncached_pae(paddr_t pa)
 1137 {
 1138 #ifdef MULTIPROCESSOR
 1139         int id = cpu_number();
 1140 #endif
 1141         pt_entry_t *zpte = PTESLEW(zero_pte, id);
 1142         caddr_t zerova = VASLEW(pmap_zerop, id);
 1143 
 1144 #ifdef DIAGNOSTIC
 1145         if (*zpte)
 1146                 panic("pmap_zero_page_uncached: lock botch");
 1147 #endif
 1148 
 1149         *zpte = (pa & PG_FRAME) | PG_V | PG_RW | PG_N); /* map in */
 1150         pmap_update_pg((vaddr_t)zerova);                /* flush TLB */
 1151         pagezero(zerova, PAGE_SIZE);                    /* zero */
 1152         *zpte = 0;                                      /* zap! */
 1153 
 1154         return (TRUE);
 1155 }
 1156 
 1157 /*
 1158  * pmap_copy_page: copy a page
 1159  */
 1160 
 1161 void
 1162 pmap_copy_page_pae(struct vm_page *srcpg, struct vm_page *dstpg)
 1163 {
 1164         paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
 1165         paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
 1166 #ifdef MULTIPROCESSOR
 1167         int id = cpu_number();
 1168 #endif
 1169         pt_entry_t *spte = PTESLEW(csrc_pte,id);
 1170         pt_entry_t *dpte = PTESLEW(cdst_pte,id);
 1171         caddr_t csrcva = VASLEW(pmap_csrcp, id);
 1172         caddr_t cdstva = VASLEW(pmap_cdstp, id);
 1173 
 1174 #ifdef DIAGNOSTIC
 1175         if (*spte || *dpte)
 1176                 panic("pmap_copy_page: lock botch");
 1177 #endif
 1178 
 1179         *spte = (srcpa & PG_FRAME) | PG_V | PG_RW;
 1180         *dpte = (dstpa & PG_FRAME) | PG_V | PG_RW;
 1181         pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
 1182         bcopy(csrcva, cdstva, PAGE_SIZE);
 1183         *spte = *dpte = 0;                      /* zap! */
 1184         pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
 1185 #ifdef MULTIPROCESSOR
 1186         /* Using per-cpu VA; no shootdown required here. */
 1187 #endif
 1188 }
 1189 
 1190 /*
 1191  * p m a p   r e m o v e   f u n c t i o n s
 1192  *
 1193  * functions that remove mappings
 1194  */
 1195 
 1196 /*
 1197  * pmap_remove_ptes: remove PTEs from a PTP
 1198  *
 1199  * => must have proper locking on pmap_master_lock
 1200  * => caller must hold pmap's lock
 1201  * => PTP must be mapped into KVA
 1202  * => PTP should be null if pmap == pmap_kernel()
 1203  */
 1204 
 1205 void
 1206 pmap_remove_ptes_pae(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
 1207     vaddr_t startva, vaddr_t endva, int32_t *cpumaskp)
 1208 {
 1209         struct pv_entry *pv_tofree = NULL;      /* list of pv_entrys to free */
 1210         struct pv_entry *pve;
 1211         pt_entry_t *pte = (pt_entry_t *) ptpva;
 1212         pt_entry_t opte;
 1213         int bank, off;
 1214 
 1215         /*
 1216          * note that ptpva points to the PTE that maps startva.   this may
 1217          * or may not be the first PTE in the PTP.
 1218          *
 1219          * we loop through the PTP while there are still PTEs to look at
 1220          * and the wire_count is greater than 1 (because we use the wire_count
 1221          * to keep track of the number of real PTEs in the PTP).
 1222          */
 1223 
 1224         for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
 1225                              ; pte++, startva += NBPG) {
 1226                 if (!pmap_valid_entry(*pte))
 1227                         continue;                       /* VA not mapped */
 1228 
 1229                 opte = i386_atomic_testset_uq(pte, 0);  /* zap! */
 1230 
 1231                 if (opte & PG_W)
 1232                         pmap->pm_stats.wired_count--;
 1233                 pmap->pm_stats.resident_count--;
 1234 
 1235                 if (opte & PG_U)
 1236                         pmap_tlb_shootdown(pmap, startva, opte, cpumaskp);
 1237 
 1238                 if (ptp) {
 1239                         ptp->wire_count--;              /* dropping a PTE */
 1240                         /* Make sure that the PDE is flushed */
 1241                         if ((ptp->wire_count <= 1) && !(opte & PG_U))
 1242                                 pmap_tlb_shootdown(pmap, startva, opte,
 1243                                     cpumaskp);
 1244                 }
 1245 
 1246                 /*
 1247                  * if we are not on a pv_head list we are done.
 1248                  */
 1249 
 1250                 if ((opte & PG_PVLIST) == 0) {
 1251 #ifdef DIAGNOSTIC
 1252                         if (vm_physseg_find(atop(opte & PG_FRAME), &off)
 1253                             != -1)
 1254                                 panic("pmap_remove_ptes: managed page without "
 1255                                       "PG_PVLIST for 0x%lx", startva);
 1256 #endif
 1257                         continue;
 1258                 }
 1259 
 1260                 bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
 1261 #ifdef DIAGNOSTIC
 1262                 if (bank == -1)
 1263                         panic("pmap_remove_ptes: unmanaged page marked "
 1264                               "PG_PVLIST, va = 0x%lx, pa = 0x%lx",
 1265                               startva, (u_long)(opte & PG_FRAME));
 1266 #endif
 1267 
 1268                 /* sync R/M bits */
 1269                 simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
 1270                 vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
 1271                 pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap,
 1272                                      startva);
 1273                 simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
 1274 
 1275                 if (pve) {
 1276                         pve->pv_next = pv_tofree;
 1277                         pv_tofree = pve;
 1278                 }
 1279 
 1280                 /* end of "for" loop: time for next pte */
 1281         }
 1282         if (pv_tofree)
 1283                 pmap_free_pvs(pmap, pv_tofree);
 1284 }
 1285 
 1286 
 1287 /*
 1288  * pmap_remove_pte: remove a single PTE from a PTP
 1289  *
 1290  * => must have proper locking on pmap_master_lock
 1291  * => caller must hold pmap's lock
 1292  * => PTP must be mapped into KVA
 1293  * => PTP should be null if pmap == pmap_kernel()
 1294  * => returns true if we removed a mapping
 1295  */
 1296 
 1297 boolean_t
 1298 pmap_remove_pte_pae(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
 1299     vaddr_t va, int32_t *cpumaskp)
 1300 {
 1301         pt_entry_t opte;
 1302         int bank, off;
 1303         struct pv_entry *pve;
 1304 
 1305         if (!pmap_valid_entry(*pte))
 1306                 return(FALSE);          /* VA not mapped */
 1307 
 1308         opte = *pte;                    /* save the old PTE */
 1309         *pte = 0;                       /* zap! */
 1310 
 1311         pmap_exec_account(pmap, va, opte, 0);
 1312 
 1313         if (opte & PG_W)
 1314                 pmap->pm_stats.wired_count--;
 1315         pmap->pm_stats.resident_count--;
 1316 
 1317         if (opte & PG_U)
 1318                 pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
 1319 
 1320         if (ptp) {
 1321                 ptp->wire_count--;              /* dropping a PTE */
 1322                 /* Make sure that the PDE is flushed */
 1323                 if ((ptp->wire_count <= 1) && !(opte & PG_U))
 1324                         pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
 1325 
 1326         }
 1327 
 1328         /*
 1329          * if we are not on a pv_head list we are done.
 1330          */
 1331 
 1332         if ((opte & PG_PVLIST) == 0) {
 1333 #ifdef DIAGNOSTIC
 1334                 if (vm_physseg_find(atop(opte & PG_FRAME), &off) != -1)
 1335                         panic("pmap_remove_pte: managed page without "
 1336                               "PG_PVLIST for 0x%lx", va);
 1337 #endif
 1338                 return(TRUE);
 1339         }
 1340 
 1341         bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
 1342 #ifdef DIAGNOSTIC
 1343         if (bank == -1)
 1344                 panic("pmap_remove_pte: unmanaged page marked "
 1345                     "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va,
 1346                     (u_long)(opte & PG_FRAME));
 1347 #endif
 1348 
 1349         /* sync R/M bits */
 1350         simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
 1351         vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
 1352         pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va);
 1353         simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
 1354 
 1355         if (pve)
 1356                 pmap_free_pv(pmap, pve);
 1357         return(TRUE);
 1358 }
 1359 
 1360 /*
 1361  * pmap_remove: top level mapping removal function
 1362  *
 1363  * => caller should not be holding any pmap locks
 1364  */
 1365 
 1366 void
 1367 pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
 1368 {
 1369         pt_entry_t *ptes, opte;
 1370         boolean_t result;
 1371         paddr_t ptppa;
 1372         vaddr_t blkendva;
 1373         struct vm_page *ptp;
 1374         int32_t cpumask = 0;
 1375         TAILQ_HEAD(, vm_page) empty_ptps;
 1376 
 1377         /*
 1378          * we lock in the pmap => pv_head direction
 1379          */
 1380 
 1381         TAILQ_INIT(&empty_ptps);
 1382 
 1383         PMAP_MAP_TO_HEAD_LOCK();
 1384         ptes = pmap_map_ptes_pae(pmap); /* locks pmap */
 1385         /*
 1386          * removing one page?  take shortcut function.
 1387          */
 1388 
 1389         if (sva + PAGE_SIZE == eva) {
 1390 
 1391                 if (pmap_valid_entry(PDE(pmap, pdei(sva)))) {
 1392 
 1393                         /* PA of the PTP */
 1394                         ptppa = PDE(pmap, pdei(sva)) & PG_FRAME;
 1395 
 1396                         /* get PTP if non-kernel mapping */
 1397 
 1398                         if (pmap == pmap_kernel()) {
 1399                                 /* we never free kernel PTPs */
 1400                                 ptp = NULL;
 1401                         } else {
 1402                                 if (pmap->pm_ptphint &&
 1403                                     VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
 1404                                     ptppa) {
 1405                                         ptp = pmap->pm_ptphint;
 1406                                 } else {
 1407                                         ptp = PHYS_TO_VM_PAGE(ptppa);
 1408 #ifdef DIAGNOSTIC
 1409                                         if (ptp == NULL)
 1410                                                 panic("pmap_remove: unmanaged "
 1411                                                       "PTP detected");
 1412 #endif
 1413                                 }
 1414                         }
 1415 
 1416                         /* do it! */
 1417                         result = pmap_remove_pte_pae(pmap, ptp,
 1418                             &ptes[atop(sva)], sva, &cpumask);
 1419 
 1420                         /*
 1421                          * if mapping removed and the PTP is no longer
 1422                          * being used, free it!
 1423                          */
 1424 
 1425                         if (result && ptp && ptp->wire_count <= 1) {
 1426                                 opte = i386_atomic_testset_uq(&PDE(pmap,
 1427                                     pdei(sva)), 0);     /* zap! */
 1428 #ifdef MULTIPROCESSOR
 1429                                 /*
 1430                                  * XXXthorpej Redundant shootdown can happen
 1431                                  * here if we're using APTE space.
 1432                                  */
 1433 #endif
 1434                                 pmap_tlb_shootdown(curpcb->pcb_pmap,
 1435                                     ((vaddr_t)ptes) + ptp->offset, opte,
 1436                                     &cpumask);
 1437 #ifdef MULTIPROCESSOR
 1438                                 /*
 1439                                  * Always shoot down the pmap's self-mapping
 1440                                  * of the PTP.
 1441                                  * XXXthorpej Redundant shootdown can happen
 1442                                  * here if pmap == curpcb->pcb_pmap (not APTE
 1443                                  * space).
 1444                                  */
 1445                                 pmap_tlb_shootdown(pmap,
 1446                                     ((vaddr_t)PTE_BASE) + ptp->offset, opte,
 1447                                     &cpumask);
 1448 #endif
 1449                                 pmap->pm_stats.resident_count--;
 1450                                 if (pmap->pm_ptphint == ptp)
 1451                                         pmap->pm_ptphint =
 1452                                             TAILQ_FIRST(&pmap->pm_obj.memq);
 1453                                 ptp->wire_count = 0;
 1454                                 /* Postpone free to after shootdown. */
 1455                                 uvm_pagerealloc(ptp, NULL, 0);
 1456                                 TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
 1457                         }
 1458                 }
 1459                 pmap_tlb_shootnow(cpumask);
 1460                 pmap_unmap_ptes_pae(pmap);              /* unlock pmap */
 1461                 PMAP_MAP_TO_HEAD_UNLOCK();
 1462                 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
 1463                         TAILQ_REMOVE(&empty_ptps, ptp, listq);
 1464                         uvm_pagefree(ptp);
 1465                 }
 1466                 return;
 1467         }
 1468 
 1469         for (/* null */ ; sva < eva ; sva = blkendva) {
 1470 
 1471                 /* determine range of block */
 1472                 blkendva = i386_round_pdr(sva+1);
 1473                 if (blkendva > eva)
 1474                         blkendva = eva;
 1475 
 1476                 /*
 1477                  * XXXCDC: our PTE mappings should never be removed
 1478                  * with pmap_remove!  if we allow this (and why would
 1479                  * we?) then we end up freeing the pmap's page
 1480                  * directory page (PDP) before we are finished using
 1481                  * it when we hit in in the recursive mapping.  this
 1482                  * is BAD.
 1483                  *
 1484                  * long term solution is to move the PTEs out of user
 1485                  * address space.  and into kernel address space (up
 1486                  * with APTE).  then we can set VM_MAXUSER_ADDRESS to
 1487                  * be VM_MAX_ADDRESS.
 1488                  */
 1489 
 1490                 if (pdei(sva) == PDSLOT_PTE)
 1491                         /* XXXCDC: ugly hack to avoid freeing PDP here */
 1492                         continue;
 1493 
 1494                 if (!pmap_valid_entry(PDE(pmap, pdei(sva))))
 1495                         /* valid block? */
 1496                         continue;
 1497 
 1498                 /* PA of the PTP */
 1499                 ptppa = PDE(pmap, pdei(sva)) & PG_FRAME;
 1500 
 1501                 /* get PTP if non-kernel mapping */
 1502                 if (pmap == pmap_kernel()) {
 1503                         /* we never free kernel PTPs */
 1504                         ptp = NULL;
 1505                 } else {
 1506                         if (pmap->pm_ptphint &&
 1507                             VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
 1508                                 ptp = pmap->pm_ptphint;
 1509                         } else {
 1510                                 ptp = PHYS_TO_VM_PAGE(ptppa);
 1511 #ifdef DIAGNOSTIC
 1512                                 if (ptp == NULL)
 1513                                         panic("pmap_remove: unmanaged PTP "
 1514                                               "detected");
 1515 #endif
 1516                         }
 1517                 }
 1518                 pmap_remove_ptes_pae(pmap, ptp, (vaddr_t)&ptes[atop(sva)],
 1519                     sva, blkendva, &cpumask);
 1520 
 1521                 /* if PTP is no longer being used, free it! */
 1522                 if (ptp && ptp->wire_count <= 1) {
 1523                         opte = i386_atomic_testset_uq(&PDE(pmap, pdei(sva)),0);
 1524 #if defined(MULTIPROCESSOR)
 1525                         /*
 1526                          * XXXthorpej Redundant shootdown can happen here
 1527                          * if we're using APTE space.
 1528                          */
 1529 #endif
 1530                         pmap_tlb_shootdown(curpcb->pcb_pmap,
 1531                             ((vaddr_t)ptes) + ptp->offset, opte, &cpumask);
 1532 #if defined(MULTIPROCESSOR)
 1533                         /*
 1534                          * Always shoot down the pmap's self-mapping
 1535                          * of the PTP.
 1536                          * XXXthorpej Redundant shootdown can happen here
 1537                          * if pmap == curpcb->pcb_pmap (not APTE space).
 1538                          */
 1539                         pmap_tlb_shootdown(pmap,
 1540                             ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask);
 1541 #endif
 1542                         pmap->pm_stats.resident_count--;
 1543                         if (pmap->pm_ptphint == ptp)    /* update hint? */
 1544                                 pmap->pm_ptphint =
 1545                                     TAILQ_FIRST(&pmap->pm_obj.memq);
 1546                         ptp->wire_count = 0;
 1547                         /* Postpone free to after shootdown. */
 1548                         uvm_pagerealloc(ptp, NULL, 0);
 1549                         TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
 1550                 }
 1551         }
 1552 
 1553         pmap_tlb_shootnow(cpumask);
 1554         pmap_unmap_ptes_pae(pmap);
 1555         PMAP_MAP_TO_HEAD_UNLOCK();
 1556         while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
 1557                 TAILQ_REMOVE(&empty_ptps, ptp, listq);
 1558                 uvm_pagefree(ptp);
 1559         }
 1560 }
 1561 
 1562 /*
 1563  * pmap_page_remove: remove a managed vm_page from all pmaps that map it
 1564  *
 1565  * => we set pv_head => pmap locking
 1566  * => R/M bits are sync'd back to attrs
 1567  */
 1568 
 1569 void
 1570 pmap_page_remove_pae(struct vm_page *pg)
 1571 {
 1572         int bank, off;
 1573         struct pv_head *pvh;
 1574         struct pv_entry *pve;
 1575         pt_entry_t *ptes, opte;
 1576         int32_t cpumask = 0;
 1577         TAILQ_HEAD(, vm_page) empty_ptps;
 1578         struct vm_page *ptp;
 1579 
 1580         /* XXX: vm_page should either contain pv_head or have a pointer to it */
 1581         bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
 1582         if (bank == -1) {
 1583                 printf("pmap_page_remove: unmanaged page?\n");
 1584                 return;
 1585         }
 1586 
 1587         pvh = &vm_physmem[bank].pmseg.pvhead[off];
 1588         if (pvh->pvh_list == NULL) {
 1589                 return;
 1590         }
 1591 
 1592         TAILQ_INIT(&empty_ptps);
 1593 
 1594         /* set pv_head => pmap locking */
 1595         PMAP_HEAD_TO_MAP_LOCK();
 1596 
 1597         /* XXX: needed if we hold head->map lock? */
 1598         simple_lock(&pvh->pvh_lock);
 1599 
 1600         for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) {
 1601                 ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */
 1602 
 1603 #ifdef DIAGNOSTIC
 1604                 if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva)
 1605                         printf("pmap_page_remove: found pager VA on pv_list\n");
 1606                 if (pve->pv_ptp && (PDE(pve->pv_pmap,
 1607                     pdei(pve->pv_va)) & PG_FRAME) !=
 1608                     VM_PAGE_TO_PHYS(pve->pv_ptp)) {
 1609                         printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
 1610                                pg, pve->pv_va, pve->pv_ptp);
 1611                         printf("pmap_page_remove: PTP's phys addr: "
 1612                                "actual=%llx, recorded=%llx\n",
 1613                                (PDE(pve->pv_pmap, pdei(pve->pv_va)) &
 1614                                 PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
 1615                         panic("pmap_page_remove: mapped managed page has "
 1616                               "invalid pv_ptp field");
 1617                 }
 1618 #endif
 1619 
 1620                 opte = ptes[atop(pve->pv_va)];
 1621                 ptes[atop(pve->pv_va)] = 0;                     /* zap! */
 1622 
 1623                 if (opte & PG_W)
 1624                         pve->pv_pmap->pm_stats.wired_count--;
 1625                 pve->pv_pmap->pm_stats.resident_count--;
 1626 
 1627                 /* Shootdown only if referenced */
 1628                 if (opte & PG_U)
 1629                         pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,
 1630                             &cpumask);
 1631 
 1632                 /* sync R/M bits */
 1633                 vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
 1634 
 1635                 /* update the PTP reference count.  free if last reference. */
 1636                 if (pve->pv_ptp) {
 1637                         pve->pv_ptp->wire_count--;
 1638                         if (pve->pv_ptp->wire_count <= 1) {
 1639                                 /*
 1640                                  * Do we have to shootdown the page just to
 1641                                  * get the pte out of the TLB ?
 1642                                  */
 1643                                 if(!(opte & PG_U))
 1644                                         pmap_tlb_shootdown(pve->pv_pmap,
 1645                                             pve->pv_va, opte, &cpumask);
 1646 
 1647                                 opte = i386_atomic_testset_uq(&PDE(pve->pv_pmap,
 1648                                     pdei(pve->pv_va)), 0);
 1649                                 pmap_tlb_shootdown(curpcb->pcb_pmap,
 1650                                     ((vaddr_t)ptes) + pve->pv_ptp->offset,
 1651                                     opte, &cpumask);
 1652 #if defined(MULTIPROCESSOR)
 1653                                 /*
 1654                                  * Always shoot down the other pmap's
 1655                                  * self-mapping of the PTP.
 1656                                  */
 1657                                 pmap_tlb_shootdown(pve->pv_pmap,
 1658                                     ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset,
 1659                                     opte, &cpumask);
 1660 #endif
 1661                                 pve->pv_pmap->pm_stats.resident_count--;
 1662                                 /* update hint? */
 1663                                 if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
 1664                                         pve->pv_pmap->pm_ptphint =
 1665                                             TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq);
 1666                                 pve->pv_ptp->wire_count = 0;
 1667                                 /* Postpone free to after shootdown. */
 1668                                 uvm_pagerealloc(pve->pv_ptp, NULL, 0);
 1669                                 TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
 1670                                     listq);
 1671                         }
 1672                 }
 1673                 pmap_unmap_ptes_pae(pve->pv_pmap);      /* unlocks pmap */
 1674         }
 1675         pmap_free_pvs(NULL, pvh->pvh_list);
 1676         pvh->pvh_list = NULL;
 1677         simple_unlock(&pvh->pvh_lock);
 1678         PMAP_HEAD_TO_MAP_UNLOCK();
 1679         pmap_tlb_shootnow(cpumask);
 1680         while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
 1681                 TAILQ_REMOVE(&empty_ptps, ptp, listq);
 1682                 uvm_pagefree(ptp);
 1683         }
 1684 }
 1685 
 1686 /*
 1687  * p m a p   a t t r i b u t e  f u n c t i o n s
 1688  * functions that test/change managed page's attributes
 1689  * since a page can be mapped multiple times we must check each PTE that
 1690  * maps it by going down the pv lists.
 1691  */
 1692 
 1693 /*
 1694  * pmap_test_attrs: test a page's attributes
 1695  *
 1696  * => we set pv_head => pmap locking
 1697  */
 1698 
 1699 boolean_t
 1700 pmap_test_attrs_pae(struct vm_page *pg, int testbits)
 1701 {
 1702         int bank, off;
 1703         char *myattrs;
 1704         struct pv_head *pvh;
 1705         struct pv_entry *pve;
 1706         pt_entry_t *ptes, pte;
 1707 
 1708         /* XXX: vm_page should either contain pv_head or have a pointer to it */
 1709         bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
 1710         if (bank == -1) {
 1711                 printf("pmap_test_attrs: unmanaged page?\n");
 1712                 return(FALSE);
 1713         }
 1714 
 1715         /*
 1716          * before locking: see if attributes are already set and if so,
 1717          * return!
 1718          */
 1719 
 1720         myattrs = &vm_physmem[bank].pmseg.attrs[off];
 1721         if (*myattrs & testbits)
 1722                 return(TRUE);
 1723 
 1724         /* test to see if there is a list before bothering to lock */
 1725         pvh = &vm_physmem[bank].pmseg.pvhead[off];
 1726         if (pvh->pvh_list == NULL) {
 1727                 return(FALSE);
 1728         }
 1729 
 1730         /* nope, gonna have to do it the hard way */
 1731         PMAP_HEAD_TO_MAP_LOCK();
 1732         /* XXX: needed if we hold head->map lock? */
 1733         simple_lock(&pvh->pvh_lock);
 1734 
 1735         for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0;
 1736              pve = pve->pv_next) {
 1737                 ptes = pmap_map_ptes_pae(pve->pv_pmap);
 1738                 pte = ptes[atop(pve->pv_va)];
 1739                 pmap_unmap_ptes_pae(pve->pv_pmap);
 1740                 *myattrs |= pte;
 1741         }
 1742 
 1743         /*
 1744          * note that we will exit the for loop with a non-null pve if
 1745          * we have found the bits we are testing for.
 1746          */
 1747 
 1748         simple_unlock(&pvh->pvh_lock);
 1749         PMAP_HEAD_TO_MAP_UNLOCK();
 1750         return((*myattrs & testbits) != 0);
 1751 }
 1752 
 1753 /*
 1754  * pmap_change_attrs: change a page's attributes
 1755  *
 1756  * => we set pv_head => pmap locking
 1757  * => we return TRUE if we cleared one of the bits we were asked to
 1758  */
 1759 
 1760 boolean_t
 1761 pmap_change_attrs_pae(struct vm_page *pg, int setbits, int clearbits)
 1762 {
 1763         u_int32_t result;
 1764         int bank, off;
 1765         struct pv_head *pvh;
 1766         struct pv_entry *pve;
 1767         pt_entry_t *ptes, npte, opte;
 1768         char *myattrs;
 1769         int32_t cpumask = 0;
 1770 
 1771         /* XXX: vm_page should either contain pv_head or have a pointer to it */
 1772         bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
 1773         if (bank == -1) {
 1774                 printf("pmap_change_attrs: unmanaged page?\n");
 1775                 return(FALSE);
 1776         }
 1777 
 1778         PMAP_HEAD_TO_MAP_LOCK();
 1779         pvh = &vm_physmem[bank].pmseg.pvhead[off];
 1780         /* XXX: needed if we hold head->map lock? */
 1781         simple_lock(&pvh->pvh_lock);
 1782 
 1783         myattrs = &vm_physmem[bank].pmseg.attrs[off];
 1784         result = *myattrs & clearbits;
 1785         *myattrs = (*myattrs | setbits) & ~clearbits;
 1786 
 1787         for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) {
 1788 #ifdef DIAGNOSTIC
 1789                 if (!pmap_valid_entry(PDE(pve->pv_pmap, pdei(pve->pv_va))))
 1790                         panic("pmap_change_attrs: mapping without PTP "
 1791                               "detected");
 1792 #endif
 1793 
 1794                 ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */
 1795                 npte = ptes[atop(pve->pv_va)];
 1796                 result |= (npte & clearbits);
 1797                 npte = (npte | setbits) & ~(pt_entry_t)clearbits;
 1798                 if (ptes[atop(pve->pv_va)] != npte) {
 1799                         opte = i386_atomic_testset_uq(&ptes[atop(pve->pv_va)],
 1800                             npte);
 1801                         pmap_tlb_shootdown(pve->pv_pmap,
 1802                             atop(pve->pv_va), opte, &cpumask);
 1803                 }
 1804                 pmap_unmap_ptes_pae(pve->pv_pmap);      /* unlocks pmap */
 1805         }
 1806 
 1807         simple_unlock(&pvh->pvh_lock);
 1808         PMAP_HEAD_TO_MAP_UNLOCK();
 1809         pmap_tlb_shootnow(cpumask);
 1810 
 1811         return(result != 0);
 1812 }
 1813 
 1814 /*
 1815  * p m a p   p r o t e c t i o n   f u n c t i o n s
 1816  */
 1817 
 1818 /*
 1819  * pmap_page_protect: change the protection of all recorded mappings
 1820  *      of a managed page
 1821  *
 1822  * => NOTE: this is an inline function in pmap.h
 1823  */
 1824 
 1825 /* see pmap.h */
 1826 
 1827 /*
 1828  * pmap_protect: set the protection in of the pages in a pmap
 1829  *
 1830  * => NOTE: this is an inline function in pmap.h
 1831  */
 1832 
 1833 /* see pmap.h */
 1834 
 1835 /*
 1836  * pmap_write_protect: write-protect pages in a pmap
 1837  */
 1838 void
 1839 pmap_write_protect_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva,
 1840     vm_prot_t prot)
 1841 {
 1842         pt_entry_t *ptes, *spte, *epte, opte, npte;
 1843         vaddr_t blockend;
 1844         u_int32_t md_prot;
 1845         int32_t cpumask = 0;
 1846 
 1847         ptes = pmap_map_ptes_pae(pmap);         /* locks pmap */
 1848 
 1849         /* should be ok, but just in case ... */
 1850         sva &= PG_FRAME;
 1851         eva &= PG_FRAME;
 1852 
 1853         for (/* null */ ; sva < eva ; sva = blockend) {
 1854 
 1855                 blockend = (sva & PD_MASK) + NBPD;
 1856                 if (blockend > eva)
 1857                         blockend = eva;
 1858 
 1859                 /*
 1860                  * XXXCDC: our PTE mappings should never be write-protected!
 1861                  *
 1862                  * long term solution is to move the PTEs out of user
 1863                  * address space.  and into kernel address space (up
 1864                  * with APTE).  then we can set VM_MAXUSER_ADDRESS to
 1865                  * be VM_MAX_ADDRESS.
 1866                  */
 1867 
 1868                 /* XXXCDC: ugly hack to avoid freeing PDP here */
 1869                 if (pdei(sva) == PDSLOT_PTE)
 1870                         continue;
 1871 
 1872                 /* empty block? */
 1873                 if (!pmap_valid_entry(PDE(pmap, pdei(sva))))
 1874                         continue;
 1875 
 1876                 md_prot = protection_codes[prot];
 1877                 if (sva < VM_MAXUSER_ADDRESS)
 1878                         md_prot |= PG_u;
 1879                 else if (sva < VM_MAX_ADDRESS)
 1880                         /* XXX: write-prot our PTES? never! */
 1881                         md_prot |= (PG_u | PG_RW);
 1882 
 1883                 spte = &ptes[atop(sva)];
 1884                 epte = &ptes[atop(blockend)];
 1885 
 1886                 for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) {
 1887 
 1888                         if (!pmap_valid_entry(*spte))   /* no mapping? */
 1889                                 continue;
 1890 
 1891                         npte = (*spte & ~(pt_entry_t)PG_PROT) | md_prot;
 1892 
 1893                         if (npte != *spte) {
 1894                                 pmap_exec_account(pmap, sva, *spte, npte);
 1895                                 opte = *spte;
 1896                                 *spte = npte;
 1897                                 pmap_tlb_shootdown(pmap, sva, opte, &cpumask);
 1898                         }
 1899                 }
 1900         }
 1901 
 1902         pmap_tlb_shootnow(cpumask);
 1903         pmap_unmap_ptes_pae(pmap);              /* unlocks pmap */
 1904 }
 1905 
 1906 /*
 1907  * end of protection functions
 1908  */
 1909 
 1910 /*
 1911  * pmap_unwire: clear the wired bit in the PTE
 1912  *
 1913  * => mapping should already be in map
 1914  */
 1915 
 1916 void
 1917 pmap_unwire_pae(struct pmap *pmap, vaddr_t va)
 1918 {
 1919         pt_entry_t *ptes;
 1920 
 1921         if (pmap_valid_entry(PDE(pmap, pdei(va)))) {
 1922                 ptes = pmap_map_ptes_pae(pmap);         /* locks pmap */
 1923 
 1924 #ifdef DIAGNOSTIC
 1925                 if (!pmap_valid_entry(ptes[atop(va)]))
 1926                         panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
 1927 #endif
 1928                 if ((ptes[atop(va)] & PG_W) != 0) {
 1929                         ptes[atop(va)] &= ~PG_W;
 1930                         pmap->pm_stats.wired_count--;
 1931                 }
 1932 #ifdef DIAGNOSTIC
 1933                 else {
 1934                         printf("pmap_unwire: wiring for pmap %p va 0x%lx "
 1935                                "didn't change!\n", pmap, va);
 1936                 }
 1937 #endif
 1938                 pmap_unmap_ptes_pae(pmap);              /* unlocks map */
 1939         }
 1940 #ifdef DIAGNOSTIC
 1941         else {
 1942                 panic("pmap_unwire: invalid PDE");
 1943         }
 1944 #endif
 1945 }
 1946 
 1947 /*
 1948  * pmap_copy: copy mappings from one pmap to another
 1949  *
 1950  * => optional function
 1951  * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 1952  */
 1953 
 1954 /*
 1955  * defined as macro in pmap.h
 1956  */
 1957 
 1958 /*
 1959  * pmap_enter: enter a mapping into a pmap
 1960  *
 1961  * => must be done "now" ... no lazy-evaluation
 1962  * => we set pmap => pv_head locking
 1963  */
 1964 
 1965 int
 1966 pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
 1967     int flags)
 1968 {
 1969         pt_entry_t *ptes, opte, npte;
 1970         struct vm_page *ptp;
 1971         struct pv_head *pvh;
 1972         struct pv_entry *pve;
 1973         int bank, off, error;
 1974         boolean_t wired = (flags & PMAP_WIRED) != 0;
 1975 
 1976 #ifdef DIAGNOSTIC
 1977         /* sanity check: totally out of range? */
 1978         if (va >= VM_MAX_KERNEL_ADDRESS)
 1979                 panic("pmap_enter: too big");
 1980 
 1981         if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
 1982                 panic("pmap_enter: trying to map over PDP/APDP!");
 1983 
 1984         /* sanity check: kernel PTPs should already have been pre-allocated */
 1985         if (va >= VM_MIN_KERNEL_ADDRESS &&
 1986             !pmap_valid_entry(PDE(pmap, pdei(va))))
 1987                 panic("pmap_enter: missing kernel PTP!");
 1988 #endif
 1989 
 1990         /* get lock */
 1991         PMAP_MAP_TO_HEAD_LOCK();
 1992 
 1993         /*
 1994          * map in ptes and get a pointer to our PTP (unless we are the kernel)
 1995          */
 1996 
 1997         ptes = pmap_map_ptes_pae(pmap);         /* locks pmap */
 1998         if (pmap == pmap_kernel()) {
 1999                 ptp = NULL;
 2000         } else {
 2001                 ptp = pmap_get_ptp_pae(pmap, pdei(va), FALSE);
 2002                 if (ptp == NULL) {
 2003                         if (flags & PMAP_CANFAIL) {
 2004                                 error = ENOMEM;
 2005                                 goto out;
 2006                         }
 2007                         panic("pmap_enter: get ptp failed");
 2008                 }
 2009         }
 2010         opte = ptes[atop(va)];                  /* old PTE */
 2011 
 2012         /*
 2013          * is there currently a valid mapping at our VA?
 2014          */
 2015 
 2016         if (pmap_valid_entry(opte)) {
 2017 
 2018                 /*
 2019                  * first, update pm_stats.  resident count will not
 2020                  * change since we are replacing/changing a valid
 2021                  * mapping.  wired count might change...
 2022                  */
 2023 
 2024                 if (wired && (opte & PG_W) == 0)
 2025                         pmap->pm_stats.wired_count++;
 2026                 else if (!wired && (opte & PG_W) != 0)
 2027                         pmap->pm_stats.wired_count--;
 2028 
 2029                 /*
 2030                  * is the currently mapped PA the same as the one we
 2031                  * want to map?
 2032                  */
 2033 
 2034                 if ((opte & PG_FRAME) == pa) {
 2035 
 2036                         /* if this is on the PVLIST, sync R/M bit */
 2037                         if (opte & PG_PVLIST) {
 2038                                 bank = vm_physseg_find(atop(pa), &off);
 2039 #ifdef DIAGNOSTIC
 2040                                 if (bank == -1)
 2041                                         panic("pmap_enter: same pa PG_PVLIST "
 2042                                               "mapping with unmanaged page "
 2043                                               "pa = 0x%lx (0x%lx)", pa,
 2044                                               atop(pa));
 2045 #endif
 2046                                 pvh = &vm_physmem[bank].pmseg.pvhead[off];
 2047                                 simple_lock(&pvh->pvh_lock);
 2048                                 vm_physmem[bank].pmseg.attrs[off] |= opte;
 2049                                 simple_unlock(&pvh->pvh_lock);
 2050                         } else {
 2051                                 pvh = NULL;     /* ensure !PG_PVLIST */
 2052                         }
 2053                         goto enter_now;
 2054                 }
 2055 
 2056                 /*
 2057                  * changing PAs: we must remove the old one first
 2058                  */
 2059 
 2060                 /*
 2061                  * if current mapping is on a pvlist,
 2062                  * remove it (sync R/M bits)
 2063                  */
 2064 
 2065                 if (opte & PG_PVLIST) {
 2066                         bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
 2067 #ifdef DIAGNOSTIC
 2068                         if (bank == -1)
 2069                                 panic("pmap_enter: PG_PVLIST mapping with "
 2070                                       "unmanaged page "
 2071                                       "pa = 0x%lx (0x%lx)", pa, atop(pa));
 2072 #endif
 2073                         pvh = &vm_physmem[bank].pmseg.pvhead[off];
 2074                         simple_lock(&pvh->pvh_lock);
 2075                         pve = pmap_remove_pv(pvh, pmap, va);
 2076                         vm_physmem[bank].pmseg.attrs[off] |= opte;
 2077                         simple_unlock(&pvh->pvh_lock);
 2078                 } else {
 2079                         pve = NULL;
 2080                 }
 2081         } else {        /* opte not valid */
 2082                 pve = NULL;
 2083                 pmap->pm_stats.resident_count++;
 2084                 if (wired)
 2085                         pmap->pm_stats.wired_count++;
 2086                 if (ptp)
 2087                         ptp->wire_count++;      /* count # of valid entrys */
 2088         }
 2089 
 2090         /*
 2091          * at this point pm_stats has been updated.   pve is either NULL
 2092          * or points to a now-free pv_entry structure (the latter case is
 2093          * if we called pmap_remove_pv above).
 2094          *
 2095          * if this entry is to be on a pvlist, enter it now.
 2096          */
 2097 
 2098         bank = vm_physseg_find(atop(pa), &off);
 2099         if (pmap_initialized && bank != -1) {
 2100                 pvh = &vm_physmem[bank].pmseg.pvhead[off];
 2101                 if (pve == NULL) {
 2102                         pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
 2103                         if (pve == NULL) {
 2104                                 if (flags & PMAP_CANFAIL) {
 2105                                         error = ENOMEM;
 2106                                         goto out;
 2107                                 }
 2108                                 panic("pmap_enter: no pv entries available");
 2109                         }
 2110                 }
 2111                 /* lock pvh when adding */
 2112                 pmap_enter_pv(pvh, pve, pmap, va, ptp);
 2113         } else {
 2114 
 2115                 /* new mapping is not PG_PVLIST.   free pve if we've got one */
 2116                 pvh = NULL;             /* ensure !PG_PVLIST */
 2117                 if (pve)
 2118                         pmap_free_pv(pmap, pve);
 2119         }
 2120 
 2121 enter_now:
 2122         /*
 2123          * at this point pvh is !NULL if we want the PG_PVLIST bit set
 2124          */
 2125 
 2126         npte = pa | protection_codes[prot] | PG_V;
 2127         pmap_exec_account(pmap, va, opte, npte);
 2128         if (pvh)
 2129                 npte |= PG_PVLIST;
 2130         if (wired)
 2131                 npte |= PG_W;
 2132         if (va < VM_MAXUSER_ADDRESS)
 2133                 npte |= PG_u;
 2134         else if (va < VM_MAX_ADDRESS)
 2135                 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
 2136         if (pmap == pmap_kernel())
 2137                 npte |= pmap_pg_g;
 2138 
 2139         ptes[atop(va)] = npte;                  /* zap! */
 2140 
 2141         if ((opte & ~(pt_entry_t)(PG_M|PG_U)) != npte) {
 2142 #ifdef MULTIPROCESSOR
 2143                 int32_t cpumask = 0;
 2144 
 2145                 pmap_tlb_shootdown(pmap, va, opte, &cpumask);
 2146                 pmap_tlb_shootnow(cpumask);
 2147 #else
 2148                 /* Don't bother deferring in the single CPU case. */
 2149                 if (pmap_is_curpmap(pmap))
 2150                         pmap_update_pg(va);
 2151 #endif
 2152         }
 2153 
 2154         error = 0;
 2155 
 2156 out:
 2157         pmap_unmap_ptes_pae(pmap);
 2158         PMAP_MAP_TO_HEAD_UNLOCK();
 2159         return error;
 2160 }
 2161 
 2162 /*
 2163  * pmap_growkernel: increase usage of KVM space
 2164  *
 2165  * => we allocate new PTPs for the kernel and install them in all
 2166  *      the pmaps on the system.
 2167  */
 2168 
 2169 vaddr_t
 2170 pmap_growkernel_pae(vaddr_t maxkvaddr)
 2171 {
 2172         extern int nkpde;
 2173         struct pmap *kpm = pmap_kernel(), *pm;
 2174         int needed_kpde;   /* needed number of kernel PTPs */
 2175         int s;
 2176         paddr_t ptaddr;
 2177 
 2178         needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
 2179                 / NBPD;
 2180         if (needed_kpde <= nkpde)
 2181                 goto out;               /* we are OK */
 2182 
 2183         /*
 2184          * whoops!   we need to add kernel PTPs
 2185          */
 2186 
 2187         s = splhigh();  /* to be safe */
 2188         simple_lock(&kpm->pm_obj.vmobjlock);
 2189 
 2190         for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
 2191 
 2192                 if (uvm.page_init_done == FALSE) {
 2193 
 2194                         /*
 2195                          * we're growing the kernel pmap early (from
 2196                          * uvm_pageboot_alloc()).  this case must be
 2197                          * handled a little differently.
 2198                          */
 2199 
 2200                         if (uvm_page_physget(&ptaddr) == FALSE)
 2201                                 panic("pmap_growkernel: out of memory");
 2202                         pmap_zero_phys(ptaddr);
 2203 
 2204                         PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V;
 2205 
 2206                         /* count PTP as resident */
 2207                         kpm->pm_stats.resident_count++;
 2208                         continue;
 2209                 }
 2210 
 2211                 /*
 2212                  * THIS *MUST* BE CODED SO AS TO WORK IN THE
 2213                  * pmap_initialized == FALSE CASE!  WE MAY BE
 2214                  * INVOKED WHILE pmap_init() IS RUNNING!
 2215                  */
 2216 
 2217                 while (!pmap_alloc_ptp_pae(kpm, PDSLOT_KERN + nkpde, FALSE))
 2218                         uvm_wait("pmap_growkernel");
 2219 
 2220                 /* PG_u not for kernel */
 2221                 PDE(kpm, PDSLOT_KERN + nkpde) &= ~PG_u;
 2222 
 2223                 /* distribute new kernel PTP to all active pmaps */
 2224                 simple_lock(&pmaps_lock);
 2225                 LIST_FOREACH(pm, &pmaps, pm_list) {
 2226                         PDE(pm, PDSLOT_KERN + nkpde) =
 2227                                 PDE(kpm, PDSLOT_KERN + nkpde);
 2228                 }
 2229                 simple_unlock(&pmaps_lock);
 2230         }
 2231 
 2232         simple_unlock(&kpm->pm_obj.vmobjlock);
 2233         splx(s);
 2234 
 2235 out:
 2236         return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
 2237 }
 2238 
 2239 #ifdef DEBUG
 2240 void pmap_dump_pae(struct pmap *, vaddr_t, vaddr_t);
 2241 
 2242 /*
 2243  * pmap_dump: dump all the mappings from a pmap
 2244  *
 2245  * => caller should not be holding any pmap locks
 2246  */
 2247 
 2248 void
 2249 pmap_dump_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
 2250 {
 2251         pt_entry_t *ptes, *pte;
 2252         vaddr_t blkendva;
 2253 
 2254         /*
 2255          * if end is out of range truncate.
 2256          * if (end == start) update to max.
 2257          */
 2258 
 2259         if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
 2260                 eva = VM_MAXUSER_ADDRESS;
 2261 
 2262         /*
 2263          * we lock in the pmap => pv_head direction
 2264          */
 2265 
 2266         PMAP_MAP_TO_HEAD_LOCK();
 2267         ptes = pmap_map_ptes_pae(pmap); /* locks pmap */
 2268 
 2269         /*
 2270          * dumping a range of pages: we dump in PTP sized blocks (4MB)
 2271          */
 2272 
 2273         for (/* null */ ; sva < eva ; sva = blkendva) {
 2274 
 2275                 /* determine range of block */
 2276                 blkendva = i386_round_pdr(sva+1);
 2277                 if (blkendva > eva)
 2278                         blkendva = eva;
 2279 
 2280                 /* valid block? */
 2281                 if (!pmap_valid_entry(PDE(pmap, pdei(sva))))
 2282                         continue;
 2283 
 2284                 pte = &ptes[atop(sva)];
 2285                 for (/* null */; sva < blkendva ; sva += NBPG, pte++) {
 2286                         if (!pmap_valid_entry(*pte))
 2287                                 continue;
 2288                         printf("va %#lx -> pa %#x (pte=%#x)\n",
 2289                                sva, *pte, *pte & PG_FRAME);
 2290                 }
 2291         }
 2292         pmap_unmap_ptes_pae(pmap);
 2293         PMAP_MAP_TO_HEAD_UNLOCK();
 2294 }
 2295 #endif

/* [<][>][^][v][top][bottom][index][help] */