uvm/uvm_amap.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
LIST_HEAD
amap_list_remove
pp_getreflen
pp_setreflen
amap_init
amap_alloc1
amap_alloc
amap_free
amap_extend
amap_share_protect
amap_wipeout
amap_copy
amap_cow_now
amap_splitref
amap_pp_establish
amap_pp_adjref
amap_wiperange
amap_swap_off
    1 /*      $OpenBSD: uvm_amap.c,v 1.39 2007/06/18 21:51:15 pedro Exp $     */
    2 /*      $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $        */
    3 
    4 /*
    5  *
    6  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by Charles D. Cranor and
   20  *      Washington University.
   21  * 4. The name of the author may not be used to endorse or promote products
   22  *    derived from this software without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   29  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   33  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   34  */
   35 
   36 /*
   37  * uvm_amap.c: amap operations
   38  */
   39 
   40 /*
   41  * this file contains functions that perform operations on amaps.  see
   42  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
   43  */
   44 
   45 #undef UVM_AMAP_INLINE          /* enable/disable amap inlines */
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/proc.h>
   50 #include <sys/malloc.h>
   51 #include <sys/kernel.h>
   52 #include <sys/pool.h>
   53 
   54 #define UVM_AMAP_C              /* ensure disabled inlines are in */
   55 #include <uvm/uvm.h>
   56 #include <uvm/uvm_swap.h>
   57 
   58 /*
   59  * pool for allocation of vm_map structures.  note that the pool has
   60  * its own simplelock for its protection.  also note that in order to
   61  * avoid an endless loop, the amap pool's allocator cannot allocate
   62  * memory from an amap (it currently goes through the kernel uobj, so
   63  * we are ok).
   64  */
   65 
   66 struct pool uvm_amap_pool;
   67 
   68 LIST_HEAD(, vm_amap) amap_list;
   69 
   70 /*
   71  * local functions
   72  */
   73 
   74 static struct vm_amap *amap_alloc1(int, int, int);
   75 static __inline void amap_list_insert(struct vm_amap *);
   76 static __inline void amap_list_remove(struct vm_amap *);   
   77 
   78 static __inline void
   79 amap_list_insert(struct vm_amap *amap)
   80 {
   81         LIST_INSERT_HEAD(&amap_list, amap, am_list);
   82 }
   83 
   84 static __inline void
   85 amap_list_remove(struct vm_amap *amap)
   86 { 
   87         LIST_REMOVE(amap, am_list);
   88 }
   89 
   90 #ifdef UVM_AMAP_PPREF
   91 /*
   92  * what is ppref?   ppref is an _optional_ amap feature which is used
   93  * to keep track of reference counts on a per-page basis.  it is enabled
   94  * when UVM_AMAP_PPREF is defined.
   95  *
   96  * when enabled, an array of ints is allocated for the pprefs.  this
   97  * array is allocated only when a partial reference is added to the
   98  * map (either by unmapping part of the amap, or gaining a reference
   99  * to only a part of an amap).  if the malloc of the array fails
  100  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
  101  * that we tried to do ppref's but couldn't alloc the array so just
  102  * give up (after all, this is an optional feature!).
  103  *
  104  * the array is divided into page sized "chunks."   for chunks of length 1,
  105  * the chunk reference count plus one is stored in that chunk's slot.
  106  * for chunks of length > 1 the first slot contains (the reference count
  107  * plus one) * -1.    [the negative value indicates that the length is
  108  * greater than one.]   the second slot of the chunk contains the length
  109  * of the chunk.   here is an example:
  110  *
  111  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
  112  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
  113  *              <----------><-><----><-------><----><-><------->
  114  * (x = don't care)
  115  *
  116  * this allows us to allow one int to contain the ref count for the whole
  117  * chunk.    note that the "plus one" part is needed because a reference
  118  * count of zero is neither positive or negative (need a way to tell
  119  * if we've got one zero or a bunch of them).
  120  * 
  121  * here are some in-line functions to help us.
  122  */
  123 
  124 static __inline void pp_getreflen(int *, int, int *, int *);
  125 static __inline void pp_setreflen(int *, int, int, int);
  126 
  127 /*
  128  * pp_getreflen: get the reference and length for a specific offset
  129  *
  130  * => ppref's amap must be locked
  131  */
  132 static __inline void
  133 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
  134 {
  135 
  136         if (ppref[offset] > 0) {                /* chunk size must be 1 */
  137                 *refp = ppref[offset] - 1;      /* don't forget to adjust */
  138                 *lenp = 1;
  139         } else {
  140                 *refp = (ppref[offset] * -1) - 1;
  141                 *lenp = ppref[offset+1];
  142         }
  143 }
  144 
  145 /*
  146  * pp_setreflen: set the reference and length for a specific offset
  147  *
  148  * => ppref's amap must be locked
  149  */
  150 static __inline void
  151 pp_setreflen(int *ppref, int offset, int ref, int len)
  152 {
  153         if (len == 1) {
  154                 ppref[offset] = ref + 1;
  155         } else {
  156                 ppref[offset] = (ref + 1) * -1;
  157                 ppref[offset+1] = len;
  158         }
  159 }
  160 #endif
  161 
  162 /*
  163  * amap_init: called at boot time to init global amap data structures
  164  */
  165 
  166 void
  167 amap_init(void)
  168 {
  169         /*
  170          * Initialize the vm_amap pool.
  171          */
  172         pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
  173             "amappl", &pool_allocator_nointr);
  174         pool_sethiwat(&uvm_amap_pool, 4096);
  175 }
  176 
  177 /*
  178  * amap_alloc1: internal function that allocates an amap, but does not
  179  *      init the overlay.
  180  *
  181  * => lock on returned amap is init'd
  182  */
  183 static inline struct vm_amap *
  184 amap_alloc1(int slots, int padslots, int waitf)
  185 {
  186         struct vm_amap *amap;
  187         int totalslots;
  188 
  189         amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
  190         if (amap == NULL)
  191                 return(NULL);
  192 
  193         totalslots = malloc_roundup((slots + padslots) * sizeof(int)) /
  194             sizeof(int);
  195         amap->am_ref = 1;
  196         amap->am_flags = 0;
  197 #ifdef UVM_AMAP_PPREF
  198         amap->am_ppref = NULL;
  199 #endif
  200         amap->am_maxslot = totalslots;
  201         amap->am_nslot = slots;
  202         amap->am_nused = 0;
  203 
  204         amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
  205             waitf);
  206         if (amap->am_slots == NULL)
  207                 goto fail1;
  208 
  209         amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
  210         if (amap->am_bckptr == NULL)
  211                 goto fail2;
  212 
  213         amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
  214             M_UVMAMAP, waitf);
  215         if (amap->am_anon == NULL)
  216                 goto fail3;
  217 
  218         return(amap);
  219 
  220 fail3:
  221         free(amap->am_bckptr, M_UVMAMAP);
  222 fail2:
  223         free(amap->am_slots, M_UVMAMAP);
  224 fail1:
  225         pool_put(&uvm_amap_pool, amap);
  226         return (NULL);
  227 }
  228 
  229 /*
  230  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
  231  *
  232  * => caller should ensure sz is a multiple of PAGE_SIZE
  233  * => reference count to new amap is set to one
  234  * => new amap is returned unlocked
  235  */
  236 
  237 struct vm_amap *
  238 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
  239 {
  240         struct vm_amap *amap;
  241         int slots, padslots;
  242         UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
  243 
  244         AMAP_B2SLOT(slots, sz);         /* load slots */
  245         AMAP_B2SLOT(padslots, padsz);
  246 
  247         amap = amap_alloc1(slots, padslots, waitf);
  248         if (amap) {
  249                 memset(amap->am_anon, 0,
  250                     amap->am_maxslot * sizeof(struct vm_anon *));
  251                 amap_list_insert(amap);
  252         }
  253 
  254         UVMHIST_LOG(maphist,"<- done, amap = %p, sz=%lu", amap, sz, 0, 0);
  255         return(amap);
  256 }
  257 
  258 
  259 /*
  260  * amap_free: free an amap
  261  *
  262  * => the amap must be locked (mainly for simplelock accounting)
  263  * => the amap should have a zero reference count and be empty
  264  */
  265 void
  266 amap_free(struct vm_amap *amap)
  267 {
  268         UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
  269 
  270         KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
  271         KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
  272 
  273         free(amap->am_slots, M_UVMAMAP);
  274         free(amap->am_bckptr, M_UVMAMAP);
  275         free(amap->am_anon, M_UVMAMAP);
  276 #ifdef UVM_AMAP_PPREF
  277         if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
  278                 free(amap->am_ppref, M_UVMAMAP);
  279 #endif
  280         pool_put(&uvm_amap_pool, amap);
  281 
  282         UVMHIST_LOG(maphist,"<- done, freed amap = %p", amap, 0, 0, 0);
  283 }
  284 
  285 /*
  286  * amap_extend: extend the size of an amap (if needed)
  287  *
  288  * => called from uvm_map when we want to extend an amap to cover
  289  *    a new mapping (rather than allocate a new one)
  290  * => amap should be unlocked (we will lock it)
  291  * => to safely extend an amap it should have a reference count of
  292  *    one (thus it can't be shared)
  293  * => XXXCDC: support padding at this level?
  294  */
  295 int
  296 amap_extend(struct vm_map_entry *entry, vsize_t addsize)
  297 {
  298         struct vm_amap *amap = entry->aref.ar_amap;
  299         int slotoff = entry->aref.ar_pageoff;
  300         int slotmapped, slotadd, slotneed, slotalloc;
  301 #ifdef UVM_AMAP_PPREF
  302         int *newppref, *oldppref;
  303 #endif
  304         u_int *newsl, *newbck, *oldsl, *oldbck;
  305         struct vm_anon **newover, **oldover;
  306         int slotadded;
  307         UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
  308 
  309         UVMHIST_LOG(maphist, "  (entry=%p, addsize=%lu)", entry, addsize, 0, 0);
  310 
  311         /*
  312          * first, determine how many slots we need in the amap.  don't
  313          * forget that ar_pageoff could be non-zero: this means that
  314          * there are some unused slots before us in the amap.
  315          */
  316 
  317         AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
  318         AMAP_B2SLOT(slotadd, addsize);                  /* slots to add */
  319         slotneed = slotoff + slotmapped + slotadd;
  320 
  321         /*
  322          * case 1: we already have enough slots in the map and thus
  323          * only need to bump the reference counts on the slots we are
  324          * adding.
  325          */
  326 
  327         if (amap->am_nslot >= slotneed) {
  328 #ifdef UVM_AMAP_PPREF
  329                 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
  330                         amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1);
  331                 }
  332 #endif
  333                 UVMHIST_LOG(maphist,"<- done (case 1), amap = %p, sltneed=%ld", 
  334                     amap, slotneed, 0, 0);
  335                 return (0);
  336         }
  337 
  338         /*
  339          * case 2: we pre-allocated slots for use and we just need to
  340          * bump nslot up to take account for these slots.
  341          */
  342 
  343         if (amap->am_maxslot >= slotneed) {
  344 #ifdef UVM_AMAP_PPREF
  345                 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
  346                         if ((slotoff + slotmapped) < amap->am_nslot)
  347                                 amap_pp_adjref(amap, slotoff + slotmapped, 
  348                                     (amap->am_nslot - (slotoff + slotmapped)),
  349                                     1);
  350                         pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 
  351                            slotneed - amap->am_nslot);
  352                 }
  353 #endif
  354                 amap->am_nslot = slotneed;
  355 
  356                 /*
  357                  * no need to zero am_anon since that was done at
  358                  * alloc time and we never shrink an allocation.
  359                  */
  360                 UVMHIST_LOG(maphist,"<- done (case 2), amap = %p, slotneed=%ld",
  361                     amap, slotneed, 0, 0);
  362                 return (0);
  363         }
  364 
  365         /*
  366          * case 3: we need to malloc a new amap and copy all the amap
  367          * data over from old amap to the new one.
  368          *
  369          * XXXCDC: could we take advantage of a kernel realloc()?  
  370          */
  371 
  372         slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int);
  373 #ifdef UVM_AMAP_PPREF
  374         newppref = NULL;
  375         if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
  376                 newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP,
  377                     M_WAITOK | M_CANFAIL);
  378                 if (newppref == NULL) {
  379                         /* give up if malloc fails */
  380                         free(amap->am_ppref, M_UVMAMAP);
  381                         amap->am_ppref = PPREF_NONE;
  382                 }
  383         }
  384 #endif
  385         newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP,
  386             M_WAITOK | M_CANFAIL);
  387         newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP,
  388             M_WAITOK | M_CANFAIL);
  389         newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP,
  390             M_WAITOK | M_CANFAIL);
  391         if (newsl == NULL || newbck == NULL || newover == NULL) {
  392                 if (newsl != NULL) {
  393                         free(newsl, M_UVMAMAP);
  394                 }
  395                 if (newbck != NULL) {
  396                         free(newbck, M_UVMAMAP);
  397                 }
  398                 if (newover != NULL) {
  399                         free(newover, M_UVMAMAP);
  400                 }
  401                 return (ENOMEM);
  402         }
  403         KASSERT(amap->am_maxslot < slotneed);
  404 
  405         /*
  406          * now copy everything over to new malloc'd areas...
  407          */
  408 
  409         slotadded = slotalloc - amap->am_nslot;
  410 
  411         /* do am_slots */
  412         oldsl = amap->am_slots;
  413         memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
  414         amap->am_slots = newsl;
  415 
  416         /* do am_anon */
  417         oldover = amap->am_anon;
  418         memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
  419         memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) *
  420             slotadded);
  421         amap->am_anon = newover;
  422 
  423         /* do am_bckptr */
  424         oldbck = amap->am_bckptr;
  425         memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
  426         memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */
  427         amap->am_bckptr = newbck;
  428 
  429 #ifdef UVM_AMAP_PPREF
  430         /* do ppref */
  431         oldppref = amap->am_ppref;
  432         if (newppref) {
  433                 memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot);
  434                 memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
  435                 amap->am_ppref = newppref;
  436                 if ((slotoff + slotmapped) < amap->am_nslot)
  437                         amap_pp_adjref(amap, slotoff + slotmapped, 
  438                             (amap->am_nslot - (slotoff + slotmapped)), 1);
  439                 pp_setreflen(newppref, amap->am_nslot, 1,
  440                     slotneed - amap->am_nslot);
  441         }
  442 #endif
  443 
  444         /* update master values */
  445         amap->am_nslot = slotneed;
  446         amap->am_maxslot = slotalloc;
  447 
  448         /* and free */
  449         free(oldsl, M_UVMAMAP);
  450         free(oldbck, M_UVMAMAP);
  451         free(oldover, M_UVMAMAP);
  452 #ifdef UVM_AMAP_PPREF
  453         if (oldppref && oldppref != PPREF_NONE)
  454                 free(oldppref, M_UVMAMAP);
  455 #endif
  456         UVMHIST_LOG(maphist,"<- done (case 3), amap = %p, slotneed=%ld", 
  457             amap, slotneed, 0, 0);
  458         return (0);
  459 }
  460 
  461 /*
  462  * amap_share_protect: change protection of anons in a shared amap
  463  *
  464  * for shared amaps, given the current data structure layout, it is
  465  * not possible for us to directly locate all maps referencing the
  466  * shared anon (to change the protection).  in order to protect data
  467  * in shared maps we use pmap_page_protect().  [this is useful for IPC
  468  * mechanisms like map entry passing that may want to write-protect
  469  * all mappings of a shared amap.]  we traverse am_anon or am_slots
  470  * depending on the current state of the amap.
  471  *
  472  * => entry's map and amap must be locked by the caller
  473  */
  474 void
  475 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
  476 {
  477         struct vm_amap *amap = entry->aref.ar_amap;
  478         int slots, lcv, slot, stop;
  479 
  480         AMAP_B2SLOT(slots, (entry->end - entry->start));
  481         stop = entry->aref.ar_pageoff + slots;
  482 
  483         if (slots < amap->am_nused) {
  484                 /* cheaper to traverse am_anon */
  485                 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
  486                         if (amap->am_anon[lcv] == NULL)
  487                                 continue;
  488                         if (amap->am_anon[lcv]->an_page != NULL)
  489                                 pmap_page_protect(amap->am_anon[lcv]->an_page,
  490                                                   prot);
  491                 }
  492                 return;
  493         }
  494 
  495         /* cheaper to traverse am_slots */
  496         for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
  497                 slot = amap->am_slots[lcv];
  498                 if (slot < entry->aref.ar_pageoff || slot >= stop)
  499                         continue;
  500                 if (amap->am_anon[slot]->an_page != NULL)
  501                         pmap_page_protect(amap->am_anon[slot]->an_page, prot);
  502         }
  503         return;
  504 }
  505 
  506 /*
  507  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
  508  *
  509  * => called from amap_unref when the final reference to an amap is
  510  *      discarded (i.e. when reference count == 1)
  511  * => the amap should be locked (by the caller)
  512  */
  513 
  514 void
  515 amap_wipeout(struct vm_amap *amap)
  516 {
  517         int lcv, slot;
  518         struct vm_anon *anon;
  519         UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
  520         UVMHIST_LOG(maphist,"(amap=%p)", amap, 0,0,0);
  521 
  522         KASSERT(amap->am_ref == 0);
  523 
  524         if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
  525                 /*
  526                  * amap_swap_off will call us again.
  527                  */
  528                 return;
  529         }
  530         amap_list_remove(amap);
  531 
  532         for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
  533                 int refs;
  534 
  535                 slot = amap->am_slots[lcv];
  536                 anon = amap->am_anon[slot];
  537 
  538                 if (anon == NULL || anon->an_ref == 0)
  539                         panic("amap_wipeout: corrupt amap");
  540 
  541                 simple_lock(&anon->an_lock); /* lock anon */
  542 
  543                 UVMHIST_LOG(maphist,"  processing anon %p, ref=%ld", anon, 
  544                     anon->an_ref, 0, 0);
  545 
  546                 refs = --anon->an_ref;
  547                 simple_unlock(&anon->an_lock);
  548                 if (refs == 0) {
  549                         /*
  550                          * we had the last reference to a vm_anon. free it.
  551                          */
  552                         uvm_anfree(anon);
  553                 }
  554         }
  555 
  556         /*
  557          * now we free the map
  558          */
  559 
  560         amap->am_ref = 0;       /* ... was one */
  561         amap->am_nused = 0;
  562         amap_free(amap);        /* will unlock and free amap */
  563         UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
  564 }
  565 
  566 /*
  567  * amap_copy: ensure that a map entry's "needs_copy" flag is false
  568  *      by copying the amap if necessary.
  569  * 
  570  * => an entry with a null amap pointer will get a new (blank) one.
  571  * => the map that the map entry belongs to must be locked by caller.
  572  * => the amap currently attached to "entry" (if any) must be unlocked.
  573  * => if canchunk is true, then we may clip the entry into a chunk
  574  * => "startva" and "endva" are used only if canchunk is true.  they are
  575  *     used to limit chunking (e.g. if you have a large space that you
  576  *     know you are going to need to allocate amaps for, there is no point
  577  *     in allowing that to be chunked)
  578  */
  579 
  580 void
  581 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
  582     boolean_t canchunk, vaddr_t startva, vaddr_t endva)
  583 {
  584         struct vm_amap *amap, *srcamap;
  585         int slots, lcv;
  586         vaddr_t chunksize;
  587         UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
  588         UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%ld)",
  589                     map, entry, waitf, 0);
  590 
  591         /*
  592          * is there a map to copy?   if not, create one from scratch.
  593          */
  594 
  595         if (entry->aref.ar_amap == NULL) {
  596 
  597                 /*
  598                  * check to see if we have a large amap that we can
  599                  * chunk.  we align startva/endva to chunk-sized
  600                  * boundaries and then clip to them.
  601                  */
  602 
  603                 if (canchunk && atop(entry->end - entry->start) >=
  604                     UVM_AMAP_LARGE) {
  605                         /* convert slots to bytes */
  606                         chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
  607                         startva = (startva / chunksize) * chunksize;
  608                         endva = roundup(endva, chunksize);
  609                         UVMHIST_LOG(maphist, "  chunk amap ==> clip "
  610                             "0x%lx->0x%lx to 0x%lx->0x%lx",
  611                             entry->start, entry->end, startva, endva);
  612                         UVM_MAP_CLIP_START(map, entry, startva);
  613                         /* watch out for endva wrap-around! */
  614                         if (endva >= startva)
  615                                 UVM_MAP_CLIP_END(map, entry, endva);
  616                 }
  617 
  618                 UVMHIST_LOG(maphist, "<- done [creating new amap 0x%lx->0x%lx]",
  619                     entry->start, entry->end, 0, 0);
  620                 entry->aref.ar_pageoff = 0;
  621                 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
  622                     waitf);
  623                 if (entry->aref.ar_amap != NULL)
  624                         entry->etype &= ~UVM_ET_NEEDSCOPY;
  625                 return;
  626         }
  627 
  628         /*
  629          * first check and see if we are the only map entry
  630          * referencing the amap we currently have.  if so, then we can
  631          * just take it over rather than copying it.  note that we are
  632          * reading am_ref with the amap unlocked... the value can only
  633          * be one if we have the only reference to the amap (via our
  634          * locked map).  if we are greater than one we fall through to
  635          * the next case (where we double check the value).
  636          */
  637 
  638         if (entry->aref.ar_amap->am_ref == 1) {
  639                 entry->etype &= ~UVM_ET_NEEDSCOPY;
  640                 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
  641                     0, 0, 0, 0);
  642                 return;
  643         }
  644 
  645         /*
  646          * looks like we need to copy the map.
  647          */
  648 
  649         UVMHIST_LOG(maphist,"  amap=%p, ref=%ld, must copy it", 
  650             entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
  651         AMAP_B2SLOT(slots, entry->end - entry->start);
  652         amap = amap_alloc1(slots, 0, waitf);
  653         if (amap == NULL) {
  654                 UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
  655                 return;
  656         }
  657         srcamap = entry->aref.ar_amap;
  658 
  659         /*
  660          * need to double check reference count now that we've got the
  661          * src amap locked down.  the reference count could have
  662          * changed while we were in malloc.  if the reference count
  663          * dropped down to one we take over the old map rather than
  664          * copying the amap.
  665          */
  666 
  667         if (srcamap->am_ref == 1) {             /* take it over? */
  668                 entry->etype &= ~UVM_ET_NEEDSCOPY;
  669                 amap->am_ref--;         /* drop final reference to map */
  670                 amap_free(amap);        /* dispose of new (unused) amap */
  671                 return;
  672         }
  673 
  674         /*
  675          * we must copy it now.
  676          */
  677 
  678         UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
  679         for (lcv = 0 ; lcv < slots; lcv++) {
  680                 amap->am_anon[lcv] =
  681                     srcamap->am_anon[entry->aref.ar_pageoff + lcv];
  682                 if (amap->am_anon[lcv] == NULL)
  683                         continue;
  684                 simple_lock(&amap->am_anon[lcv]->an_lock);
  685                 amap->am_anon[lcv]->an_ref++;
  686                 simple_unlock(&amap->am_anon[lcv]->an_lock);
  687                 amap->am_bckptr[lcv] = amap->am_nused;
  688                 amap->am_slots[amap->am_nused] = lcv;
  689                 amap->am_nused++;
  690         }
  691         memset(&amap->am_anon[lcv], 0,
  692             (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
  693 
  694         /*
  695          * drop our reference to the old amap (srcamap) and unlock.
  696          * we know that the reference count on srcamap is greater than
  697          * one (we checked above), so there is no way we could drop
  698          * the count to zero.  [and no need to worry about freeing it]
  699          */
  700 
  701         srcamap->am_ref--;
  702         if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
  703                 srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
  704 #ifdef UVM_AMAP_PPREF
  705         if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
  706                 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 
  707                     (entry->end - entry->start) >> PAGE_SHIFT, -1);
  708         }
  709 #endif
  710 
  711         /*
  712          * install new amap.
  713          */
  714 
  715         entry->aref.ar_pageoff = 0;
  716         entry->aref.ar_amap = amap;
  717         entry->etype &= ~UVM_ET_NEEDSCOPY;
  718 
  719         amap_list_insert(amap);
  720 
  721         /*
  722          * done!
  723          */
  724         UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
  725 }
  726 
  727 /*
  728  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
  729  *
  730  *      called during fork(2) when the parent process has a wired map
  731  *      entry.   in that case we want to avoid write-protecting pages
  732  *      in the parent's map (e.g. like what you'd do for a COW page)
  733  *      so we resolve the COW here.
  734  *
  735  * => assume parent's entry was wired, thus all pages are resident.
  736  * => assume pages that are loaned out (loan_count) are already mapped
  737  *      read-only in all maps, and thus no need for us to worry about them
  738  * => assume both parent and child vm_map's are locked
  739  * => caller passes child's map/entry in to us
  740  * => if we run out of memory we will unlock the amap and sleep _with_ the
  741  *      parent and child vm_map's locked(!).    we have to do this since
  742  *      we are in the middle of a fork(2) and we can't let the parent
  743  *      map change until we are done copying all the map entries.
  744  * => XXXCDC: out of memory should cause fork to fail, but there is
  745  *      currently no easy way to do this (needs fix)
  746  * => page queues must be unlocked (we may lock them)
  747  */
  748 
  749 void
  750 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
  751 {
  752         struct vm_amap *amap = entry->aref.ar_amap;
  753         int lcv, slot;
  754         struct vm_anon *anon, *nanon;
  755         struct vm_page *pg, *npg;
  756 
  757         /*
  758          * note that if we unlock the amap then we must ReStart the "lcv" for
  759          * loop because some other process could reorder the anon's in the
  760          * am_anon[] array on us while the lock is dropped.
  761          */
  762 ReStart:
  763         for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
  764 
  765                 /*
  766                  * get the page
  767                  */
  768 
  769                 slot = amap->am_slots[lcv];
  770                 anon = amap->am_anon[slot];
  771                 simple_lock(&anon->an_lock);
  772                 pg = anon->an_page;
  773 
  774                 /*
  775                  * page must be resident since parent is wired
  776                  */
  777 
  778                 if (pg == NULL)
  779                     panic("amap_cow_now: non-resident wired page in anon %p",
  780                         anon);
  781 
  782                 /*
  783                  * if the anon ref count is one and the page is not loaned,
  784                  * then we are safe (the child has exclusive access to the
  785                  * page).  if the page is loaned, then it must already be
  786                  * mapped read-only.
  787                  *
  788                  * we only need to get involved when these are not true.
  789                  * [note: if loan_count == 0, then the anon must own the page]
  790                  */
  791 
  792                 if (anon->an_ref > 1 && pg->loan_count == 0) {
  793 
  794                         /*
  795                          * if the page is busy then we have to unlock, wait for
  796                          * it and then restart.
  797                          */
  798                         if (pg->pg_flags & PG_BUSY) {
  799                                 atomic_setbits_int(&pg->pg_flags, PG_WANTED);
  800                                 UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
  801                                     "cownow", 0);
  802                                 goto ReStart;
  803                         }
  804 
  805                         /*
  806                          * ok, time to do a copy-on-write to a new anon
  807                          */
  808                         nanon = uvm_analloc();
  809                         if (nanon) {
  810                                 npg = uvm_pagealloc(NULL, 0, nanon, 0);
  811                         } else
  812                                 npg = NULL;     /* XXX: quiet gcc warning */
  813 
  814                         if (nanon == NULL || npg == NULL) {
  815                                 /* out of memory */
  816                                 /*
  817                                  * XXXCDC: we should cause fork to fail, but
  818                                  * we can't ...
  819                                  */
  820                                 if (nanon) {
  821                                         simple_lock(&nanon->an_lock);
  822                                         uvm_anfree(nanon);
  823                                 }
  824                                 simple_unlock(&anon->an_lock);
  825                                 uvm_wait("cownowpage");
  826                                 goto ReStart;
  827                         }
  828         
  829                         /*
  830                          * got it... now we can copy the data and replace anon
  831                          * with our new one...
  832                          */
  833                         uvm_pagecopy(pg, npg);          /* old -> new */
  834                         anon->an_ref--;                 /* can't drop to zero */
  835                         amap->am_anon[slot] = nanon;    /* replace */
  836 
  837                         /*
  838                          * drop PG_BUSY on new page ... since we have had it's
  839                          * owner locked the whole time it can't be
  840                          * PG_RELEASED | PG_WANTED.
  841                          */
  842                         atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE);
  843                         UVM_PAGE_OWN(npg, NULL);
  844                         uvm_lock_pageq();
  845                         uvm_pageactivate(npg);
  846                         uvm_unlock_pageq();
  847                 }
  848 
  849                 simple_unlock(&anon->an_lock);
  850                 /*
  851                  * done with this anon, next ...!
  852                  */
  853 
  854         }       /* end of 'for' loop */
  855 }
  856 
  857 /*
  858  * amap_splitref: split a single reference into two separate references
  859  *
  860  * => called from uvm_map's clip routines
  861  * => origref's map should be locked
  862  * => origref->ar_amap should be unlocked (we will lock)
  863  */
  864 void
  865 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
  866 {
  867         int leftslots;
  868 
  869         AMAP_B2SLOT(leftslots, offset);
  870         if (leftslots == 0)
  871                 panic("amap_splitref: split at zero offset");
  872 
  873         /*
  874          * now: amap is locked and we have a valid am_mapped array.
  875          */
  876 
  877         if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
  878                 panic("amap_splitref: map size check failed");
  879 
  880 #ifdef UVM_AMAP_PPREF
  881         /*
  882          * establish ppref before we add a duplicate reference to the amap
  883          */
  884         if (origref->ar_amap->am_ppref == NULL)
  885                 amap_pp_establish(origref->ar_amap);
  886 #endif
  887 
  888         splitref->ar_amap = origref->ar_amap;
  889         splitref->ar_amap->am_ref++;            /* not a share reference */
  890         splitref->ar_pageoff = origref->ar_pageoff + leftslots;
  891 }
  892 
  893 #ifdef UVM_AMAP_PPREF
  894 
  895 /*
  896  * amap_pp_establish: add a ppref array to an amap, if possible
  897  *
  898  * => amap locked by caller
  899  */
  900 void
  901 amap_pp_establish(struct vm_amap *amap)
  902 {
  903 
  904         amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
  905             M_UVMAMAP, M_NOWAIT);
  906 
  907         /*
  908          * if we fail then we just won't use ppref for this amap
  909          */
  910         if (amap->am_ppref == NULL) {
  911                 amap->am_ppref = PPREF_NONE;    /* not using it */
  912                 return;
  913         }
  914 
  915         /*
  916          * init ppref
  917          */
  918         memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
  919         pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
  920 }
  921 
  922 /*
  923  * amap_pp_adjref: adjust reference count to a part of an amap using the
  924  * per-page reference count array.
  925  *
  926  * => map and amap locked by caller
  927  * => caller must check that ppref != PPREF_NONE before calling
  928  */
  929 void
  930 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
  931 {
  932         int stopslot, *ppref, lcv, prevlcv;
  933         int ref, len, prevref, prevlen;
  934 
  935         stopslot = curslot + slotlen;
  936         ppref = amap->am_ppref;
  937         prevlcv = 0;
  938 
  939         /*
  940          * first advance to the correct place in the ppref array,
  941          * fragment if needed.
  942          */
  943 
  944         for (lcv = 0 ; lcv < curslot ; lcv += len) {
  945                 pp_getreflen(ppref, lcv, &ref, &len);
  946                 if (lcv + len > curslot) {     /* goes past start? */
  947                         pp_setreflen(ppref, lcv, ref, curslot - lcv);
  948                         pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
  949                         len = curslot - lcv;   /* new length of entry @ lcv */
  950                 }
  951                 prevlcv = lcv;
  952         }
  953         if (lcv != 0)
  954                 pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
  955         else {
  956                 /* Ensure that the "prevref == ref" test below always
  957                  * fails, since we're starting from the beginning of
  958                  * the ppref array; that is, there is no previous
  959                  * chunk.  
  960                  */
  961                 prevref = -1;
  962                 prevlen = 0;
  963         }
  964 
  965         /*
  966          * now adjust reference counts in range.  merge the first
  967          * changed entry with the last unchanged entry if possible.
  968          */
  969 
  970         if (lcv != curslot)
  971                 panic("amap_pp_adjref: overshot target");
  972 
  973         for (/* lcv already set */; lcv < stopslot ; lcv += len) {
  974                 pp_getreflen(ppref, lcv, &ref, &len);
  975                 if (lcv + len > stopslot) {     /* goes past end? */
  976                         pp_setreflen(ppref, lcv, ref, stopslot - lcv);
  977                         pp_setreflen(ppref, stopslot, ref,
  978                             len - (stopslot - lcv));
  979                         len = stopslot - lcv;
  980                 }
  981                 ref += adjval;
  982                 if (ref < 0)
  983                         panic("amap_pp_adjref: negative reference count");
  984                 if (lcv == prevlcv + prevlen && ref == prevref) {
  985                         pp_setreflen(ppref, prevlcv, ref, prevlen + len);
  986                 } else {
  987                         pp_setreflen(ppref, lcv, ref, len);
  988                 }
  989                 if (ref == 0)
  990                         amap_wiperange(amap, lcv, len);
  991         }
  992 
  993 }
  994 
  995 /*
  996  * amap_wiperange: wipe out a range of an amap
  997  * [different from amap_wipeout because the amap is kept intact]
  998  *
  999  * => both map and amap must be locked by caller.
 1000  */
 1001 void
 1002 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
 1003 {
 1004         int byanon, lcv, stop, curslot, ptr, slotend;
 1005         struct vm_anon *anon;
 1006 
 1007         /*
 1008          * we can either traverse the amap by am_anon or by am_slots depending
 1009          * on which is cheaper.    decide now.
 1010          */
 1011 
 1012         if (slots < amap->am_nused) {
 1013                 byanon = TRUE;
 1014                 lcv = slotoff;
 1015                 stop = slotoff + slots;
 1016         } else {
 1017                 byanon = FALSE;
 1018                 lcv = 0;
 1019                 stop = amap->am_nused;
 1020                 slotend = slotoff + slots;
 1021         }
 1022 
 1023         while (lcv < stop) {
 1024                 int refs;
 1025 
 1026                 if (byanon) {
 1027                         curslot = lcv++;        /* lcv advances here */
 1028                         if (amap->am_anon[curslot] == NULL)
 1029                                 continue;
 1030                 } else {
 1031                         curslot = amap->am_slots[lcv];
 1032                         if (curslot < slotoff || curslot >= slotend) {
 1033                                 lcv++;          /* lcv advances here */
 1034                                 continue;
 1035                         }
 1036                         stop--; /* drop stop, since anon will be removed */
 1037                 }
 1038                 anon = amap->am_anon[curslot];
 1039 
 1040                 /*
 1041                  * remove it from the amap
 1042                  */
 1043                 amap->am_anon[curslot] = NULL;
 1044                 ptr = amap->am_bckptr[curslot];
 1045                 if (ptr != (amap->am_nused - 1)) {
 1046                         amap->am_slots[ptr] =
 1047                             amap->am_slots[amap->am_nused - 1];
 1048                         amap->am_bckptr[amap->am_slots[ptr]] =
 1049                             ptr;    /* back ptr. */
 1050                 }
 1051                 amap->am_nused--;
 1052 
 1053                 /*
 1054                  * drop anon reference count
 1055                  */
 1056                 simple_lock(&anon->an_lock);
 1057                 refs = --anon->an_ref;
 1058                 simple_unlock(&anon->an_lock);
 1059                 if (refs == 0) {
 1060                         /*
 1061                          * we just eliminated the last reference to an anon.
 1062                          * free it.
 1063                          */
 1064                         uvm_anfree(anon);
 1065                 }
 1066         }
 1067 }
 1068 
 1069 #endif
 1070 
 1071 /*
 1072  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
 1073  *
 1074  * => called with swap_syscall_lock held.
 1075  * => note that we don't always traverse all anons.
 1076  *    eg. amaps being wiped out, released anons.
 1077  * => return TRUE if failed.
 1078  */
 1079 
 1080 boolean_t
 1081 amap_swap_off(int startslot, int endslot)
 1082 {
 1083         struct vm_amap *am;
 1084         struct vm_amap *am_next;
 1085         struct vm_amap marker_prev;
 1086         struct vm_amap marker_next;
 1087         boolean_t rv = FALSE;
 1088 
 1089 #if defined(DIAGNOSTIC)
 1090         memset(&marker_prev, 0, sizeof(marker_prev));
 1091         memset(&marker_next, 0, sizeof(marker_next));
 1092 #endif /* defined(DIAGNOSTIC) */
 1093 
 1094         for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
 1095                 int i;
 1096 
 1097                 LIST_INSERT_BEFORE(am, &marker_prev, am_list);
 1098                 LIST_INSERT_AFTER(am, &marker_next, am_list);
 1099 
 1100                 if (am->am_nused <= 0) {
 1101                         goto next;
 1102                 }
 1103 
 1104                 for (i = 0; i < am->am_nused; i++) {
 1105                         int slot;
 1106                         int swslot;
 1107                         struct vm_anon *anon;
 1108 
 1109                         slot = am->am_slots[i];
 1110                         anon = am->am_anon[slot];
 1111                         simple_lock(&anon->an_lock);
 1112 
 1113                         swslot = anon->an_swslot;
 1114                         if (swslot < startslot || endslot <= swslot) {
 1115                                 simple_unlock(&anon->an_lock);
 1116                                 continue;
 1117                         }
 1118 
 1119                         am->am_flags |= AMAP_SWAPOFF;
 1120 
 1121                         rv = uvm_anon_pagein(anon);
 1122 
 1123                         am->am_flags &= ~AMAP_SWAPOFF;
 1124                         if (amap_refs(am) == 0) {
 1125                                 amap_wipeout(am);
 1126                                 am = NULL;
 1127                                 break;
 1128                         }
 1129                         if (rv) {
 1130                                 break;
 1131                         }
 1132                         i = 0;
 1133                 }
 1134 
 1135 next:
 1136                 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
 1137                     LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
 1138                     &marker_next);
 1139                 am_next = LIST_NEXT(&marker_next, am_list);
 1140                 LIST_REMOVE(&marker_prev, am_list);
 1141                 LIST_REMOVE(&marker_next, am_list);
 1142         }
 1143 
 1144         return rv;
 1145 }
/* [<][>][^][v][top][bottom][index][help] */
root/uvm/uvm_amap.c

DEFINITIONS