1 /* $OpenBSD: uvm_amap.c,v 1.39 2007/06/18 21:51:15 pedro Exp $ */
2 /* $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $ */
3
4 /*
5 *
6 * Copyright (c) 1997 Charles D. Cranor and Washington University.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by Charles D. Cranor and
20 * Washington University.
21 * 4. The name of the author may not be used to endorse or promote products
22 * derived from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /*
37 * uvm_amap.c: amap operations
38 */
39
40 /*
41 * this file contains functions that perform operations on amaps. see
42 * uvm_amap.h for a brief explanation of the role of amaps in uvm.
43 */
44
45 #undef UVM_AMAP_INLINE /* enable/disable amap inlines */
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/proc.h>
50 #include <sys/malloc.h>
51 #include <sys/kernel.h>
52 #include <sys/pool.h>
53
54 #define UVM_AMAP_C /* ensure disabled inlines are in */
55 #include <uvm/uvm.h>
56 #include <uvm/uvm_swap.h>
57
58 /*
59 * pool for allocation of vm_map structures. note that the pool has
60 * its own simplelock for its protection. also note that in order to
61 * avoid an endless loop, the amap pool's allocator cannot allocate
62 * memory from an amap (it currently goes through the kernel uobj, so
63 * we are ok).
64 */
65
66 struct pool uvm_amap_pool;
67
68 LIST_HEAD(, vm_amap) amap_list;
69
70 /*
71 * local functions
72 */
73
74 static struct vm_amap *amap_alloc1(int, int, int);
75 static __inline void amap_list_insert(struct vm_amap *);
76 static __inline void amap_list_remove(struct vm_amap *);
77
78 static __inline void
79 amap_list_insert(struct vm_amap *amap)
80 {
81 LIST_INSERT_HEAD(&amap_list, amap, am_list);
82 }
83
84 static __inline void
85 amap_list_remove(struct vm_amap *amap)
86 {
87 LIST_REMOVE(amap, am_list);
88 }
89
90 #ifdef UVM_AMAP_PPREF
91 /*
92 * what is ppref? ppref is an _optional_ amap feature which is used
93 * to keep track of reference counts on a per-page basis. it is enabled
94 * when UVM_AMAP_PPREF is defined.
95 *
96 * when enabled, an array of ints is allocated for the pprefs. this
97 * array is allocated only when a partial reference is added to the
98 * map (either by unmapping part of the amap, or gaining a reference
99 * to only a part of an amap). if the malloc of the array fails
100 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
101 * that we tried to do ppref's but couldn't alloc the array so just
102 * give up (after all, this is an optional feature!).
103 *
104 * the array is divided into page sized "chunks." for chunks of length 1,
105 * the chunk reference count plus one is stored in that chunk's slot.
106 * for chunks of length > 1 the first slot contains (the reference count
107 * plus one) * -1. [the negative value indicates that the length is
108 * greater than one.] the second slot of the chunk contains the length
109 * of the chunk. here is an example:
110 *
111 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1
112 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x
113 * <----------><-><----><-------><----><-><------->
114 * (x = don't care)
115 *
116 * this allows us to allow one int to contain the ref count for the whole
117 * chunk. note that the "plus one" part is needed because a reference
118 * count of zero is neither positive or negative (need a way to tell
119 * if we've got one zero or a bunch of them).
120 *
121 * here are some in-line functions to help us.
122 */
123
124 static __inline void pp_getreflen(int *, int, int *, int *);
125 static __inline void pp_setreflen(int *, int, int, int);
126
127 /*
128 * pp_getreflen: get the reference and length for a specific offset
129 *
130 * => ppref's amap must be locked
131 */
132 static __inline void
133 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
134 {
135
136 if (ppref[offset] > 0) { /* chunk size must be 1 */
137 *refp = ppref[offset] - 1; /* don't forget to adjust */
138 *lenp = 1;
139 } else {
140 *refp = (ppref[offset] * -1) - 1;
141 *lenp = ppref[offset+1];
142 }
143 }
144
145 /*
146 * pp_setreflen: set the reference and length for a specific offset
147 *
148 * => ppref's amap must be locked
149 */
150 static __inline void
151 pp_setreflen(int *ppref, int offset, int ref, int len)
152 {
153 if (len == 1) {
154 ppref[offset] = ref + 1;
155 } else {
156 ppref[offset] = (ref + 1) * -1;
157 ppref[offset+1] = len;
158 }
159 }
160 #endif
161
162 /*
163 * amap_init: called at boot time to init global amap data structures
164 */
165
166 void
167 amap_init(void)
168 {
169 /*
170 * Initialize the vm_amap pool.
171 */
172 pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
173 "amappl", &pool_allocator_nointr);
174 pool_sethiwat(&uvm_amap_pool, 4096);
175 }
176
177 /*
178 * amap_alloc1: internal function that allocates an amap, but does not
179 * init the overlay.
180 *
181 * => lock on returned amap is init'd
182 */
183 static inline struct vm_amap *
184 amap_alloc1(int slots, int padslots, int waitf)
185 {
186 struct vm_amap *amap;
187 int totalslots;
188
189 amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
190 if (amap == NULL)
191 return(NULL);
192
193 totalslots = malloc_roundup((slots + padslots) * sizeof(int)) /
194 sizeof(int);
195 amap->am_ref = 1;
196 amap->am_flags = 0;
197 #ifdef UVM_AMAP_PPREF
198 amap->am_ppref = NULL;
199 #endif
200 amap->am_maxslot = totalslots;
201 amap->am_nslot = slots;
202 amap->am_nused = 0;
203
204 amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
205 waitf);
206 if (amap->am_slots == NULL)
207 goto fail1;
208
209 amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
210 if (amap->am_bckptr == NULL)
211 goto fail2;
212
213 amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
214 M_UVMAMAP, waitf);
215 if (amap->am_anon == NULL)
216 goto fail3;
217
218 return(amap);
219
220 fail3:
221 free(amap->am_bckptr, M_UVMAMAP);
222 fail2:
223 free(amap->am_slots, M_UVMAMAP);
224 fail1:
225 pool_put(&uvm_amap_pool, amap);
226 return (NULL);
227 }
228
229 /*
230 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
231 *
232 * => caller should ensure sz is a multiple of PAGE_SIZE
233 * => reference count to new amap is set to one
234 * => new amap is returned unlocked
235 */
236
237 struct vm_amap *
238 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
239 {
240 struct vm_amap *amap;
241 int slots, padslots;
242 UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
243
244 AMAP_B2SLOT(slots, sz); /* load slots */
245 AMAP_B2SLOT(padslots, padsz);
246
247 amap = amap_alloc1(slots, padslots, waitf);
248 if (amap) {
249 memset(amap->am_anon, 0,
250 amap->am_maxslot * sizeof(struct vm_anon *));
251 amap_list_insert(amap);
252 }
253
254 UVMHIST_LOG(maphist,"<- done, amap = %p, sz=%lu", amap, sz, 0, 0);
255 return(amap);
256 }
257
258
259 /*
260 * amap_free: free an amap
261 *
262 * => the amap must be locked (mainly for simplelock accounting)
263 * => the amap should have a zero reference count and be empty
264 */
265 void
266 amap_free(struct vm_amap *amap)
267 {
268 UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
269
270 KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
271 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
272
273 free(amap->am_slots, M_UVMAMAP);
274 free(amap->am_bckptr, M_UVMAMAP);
275 free(amap->am_anon, M_UVMAMAP);
276 #ifdef UVM_AMAP_PPREF
277 if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
278 free(amap->am_ppref, M_UVMAMAP);
279 #endif
280 pool_put(&uvm_amap_pool, amap);
281
282 UVMHIST_LOG(maphist,"<- done, freed amap = %p", amap, 0, 0, 0);
283 }
284
285 /*
286 * amap_extend: extend the size of an amap (if needed)
287 *
288 * => called from uvm_map when we want to extend an amap to cover
289 * a new mapping (rather than allocate a new one)
290 * => amap should be unlocked (we will lock it)
291 * => to safely extend an amap it should have a reference count of
292 * one (thus it can't be shared)
293 * => XXXCDC: support padding at this level?
294 */
295 int
296 amap_extend(struct vm_map_entry *entry, vsize_t addsize)
297 {
298 struct vm_amap *amap = entry->aref.ar_amap;
299 int slotoff = entry->aref.ar_pageoff;
300 int slotmapped, slotadd, slotneed, slotalloc;
301 #ifdef UVM_AMAP_PPREF
302 int *newppref, *oldppref;
303 #endif
304 u_int *newsl, *newbck, *oldsl, *oldbck;
305 struct vm_anon **newover, **oldover;
306 int slotadded;
307 UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
308
309 UVMHIST_LOG(maphist, " (entry=%p, addsize=%lu)", entry, addsize, 0, 0);
310
311 /*
312 * first, determine how many slots we need in the amap. don't
313 * forget that ar_pageoff could be non-zero: this means that
314 * there are some unused slots before us in the amap.
315 */
316
317 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
318 AMAP_B2SLOT(slotadd, addsize); /* slots to add */
319 slotneed = slotoff + slotmapped + slotadd;
320
321 /*
322 * case 1: we already have enough slots in the map and thus
323 * only need to bump the reference counts on the slots we are
324 * adding.
325 */
326
327 if (amap->am_nslot >= slotneed) {
328 #ifdef UVM_AMAP_PPREF
329 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
330 amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1);
331 }
332 #endif
333 UVMHIST_LOG(maphist,"<- done (case 1), amap = %p, sltneed=%ld",
334 amap, slotneed, 0, 0);
335 return (0);
336 }
337
338 /*
339 * case 2: we pre-allocated slots for use and we just need to
340 * bump nslot up to take account for these slots.
341 */
342
343 if (amap->am_maxslot >= slotneed) {
344 #ifdef UVM_AMAP_PPREF
345 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
346 if ((slotoff + slotmapped) < amap->am_nslot)
347 amap_pp_adjref(amap, slotoff + slotmapped,
348 (amap->am_nslot - (slotoff + slotmapped)),
349 1);
350 pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
351 slotneed - amap->am_nslot);
352 }
353 #endif
354 amap->am_nslot = slotneed;
355
356 /*
357 * no need to zero am_anon since that was done at
358 * alloc time and we never shrink an allocation.
359 */
360 UVMHIST_LOG(maphist,"<- done (case 2), amap = %p, slotneed=%ld",
361 amap, slotneed, 0, 0);
362 return (0);
363 }
364
365 /*
366 * case 3: we need to malloc a new amap and copy all the amap
367 * data over from old amap to the new one.
368 *
369 * XXXCDC: could we take advantage of a kernel realloc()?
370 */
371
372 slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int);
373 #ifdef UVM_AMAP_PPREF
374 newppref = NULL;
375 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
376 newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP,
377 M_WAITOK | M_CANFAIL);
378 if (newppref == NULL) {
379 /* give up if malloc fails */
380 free(amap->am_ppref, M_UVMAMAP);
381 amap->am_ppref = PPREF_NONE;
382 }
383 }
384 #endif
385 newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP,
386 M_WAITOK | M_CANFAIL);
387 newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP,
388 M_WAITOK | M_CANFAIL);
389 newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP,
390 M_WAITOK | M_CANFAIL);
391 if (newsl == NULL || newbck == NULL || newover == NULL) {
392 if (newsl != NULL) {
393 free(newsl, M_UVMAMAP);
394 }
395 if (newbck != NULL) {
396 free(newbck, M_UVMAMAP);
397 }
398 if (newover != NULL) {
399 free(newover, M_UVMAMAP);
400 }
401 return (ENOMEM);
402 }
403 KASSERT(amap->am_maxslot < slotneed);
404
405 /*
406 * now copy everything over to new malloc'd areas...
407 */
408
409 slotadded = slotalloc - amap->am_nslot;
410
411 /* do am_slots */
412 oldsl = amap->am_slots;
413 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
414 amap->am_slots = newsl;
415
416 /* do am_anon */
417 oldover = amap->am_anon;
418 memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
419 memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) *
420 slotadded);
421 amap->am_anon = newover;
422
423 /* do am_bckptr */
424 oldbck = amap->am_bckptr;
425 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
426 memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */
427 amap->am_bckptr = newbck;
428
429 #ifdef UVM_AMAP_PPREF
430 /* do ppref */
431 oldppref = amap->am_ppref;
432 if (newppref) {
433 memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot);
434 memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
435 amap->am_ppref = newppref;
436 if ((slotoff + slotmapped) < amap->am_nslot)
437 amap_pp_adjref(amap, slotoff + slotmapped,
438 (amap->am_nslot - (slotoff + slotmapped)), 1);
439 pp_setreflen(newppref, amap->am_nslot, 1,
440 slotneed - amap->am_nslot);
441 }
442 #endif
443
444 /* update master values */
445 amap->am_nslot = slotneed;
446 amap->am_maxslot = slotalloc;
447
448 /* and free */
449 free(oldsl, M_UVMAMAP);
450 free(oldbck, M_UVMAMAP);
451 free(oldover, M_UVMAMAP);
452 #ifdef UVM_AMAP_PPREF
453 if (oldppref && oldppref != PPREF_NONE)
454 free(oldppref, M_UVMAMAP);
455 #endif
456 UVMHIST_LOG(maphist,"<- done (case 3), amap = %p, slotneed=%ld",
457 amap, slotneed, 0, 0);
458 return (0);
459 }
460
461 /*
462 * amap_share_protect: change protection of anons in a shared amap
463 *
464 * for shared amaps, given the current data structure layout, it is
465 * not possible for us to directly locate all maps referencing the
466 * shared anon (to change the protection). in order to protect data
467 * in shared maps we use pmap_page_protect(). [this is useful for IPC
468 * mechanisms like map entry passing that may want to write-protect
469 * all mappings of a shared amap.] we traverse am_anon or am_slots
470 * depending on the current state of the amap.
471 *
472 * => entry's map and amap must be locked by the caller
473 */
474 void
475 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
476 {
477 struct vm_amap *amap = entry->aref.ar_amap;
478 int slots, lcv, slot, stop;
479
480 AMAP_B2SLOT(slots, (entry->end - entry->start));
481 stop = entry->aref.ar_pageoff + slots;
482
483 if (slots < amap->am_nused) {
484 /* cheaper to traverse am_anon */
485 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
486 if (amap->am_anon[lcv] == NULL)
487 continue;
488 if (amap->am_anon[lcv]->an_page != NULL)
489 pmap_page_protect(amap->am_anon[lcv]->an_page,
490 prot);
491 }
492 return;
493 }
494
495 /* cheaper to traverse am_slots */
496 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
497 slot = amap->am_slots[lcv];
498 if (slot < entry->aref.ar_pageoff || slot >= stop)
499 continue;
500 if (amap->am_anon[slot]->an_page != NULL)
501 pmap_page_protect(amap->am_anon[slot]->an_page, prot);
502 }
503 return;
504 }
505
506 /*
507 * amap_wipeout: wipeout all anon's in an amap; then free the amap!
508 *
509 * => called from amap_unref when the final reference to an amap is
510 * discarded (i.e. when reference count == 1)
511 * => the amap should be locked (by the caller)
512 */
513
514 void
515 amap_wipeout(struct vm_amap *amap)
516 {
517 int lcv, slot;
518 struct vm_anon *anon;
519 UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
520 UVMHIST_LOG(maphist,"(amap=%p)", amap, 0,0,0);
521
522 KASSERT(amap->am_ref == 0);
523
524 if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
525 /*
526 * amap_swap_off will call us again.
527 */
528 return;
529 }
530 amap_list_remove(amap);
531
532 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
533 int refs;
534
535 slot = amap->am_slots[lcv];
536 anon = amap->am_anon[slot];
537
538 if (anon == NULL || anon->an_ref == 0)
539 panic("amap_wipeout: corrupt amap");
540
541 simple_lock(&anon->an_lock); /* lock anon */
542
543 UVMHIST_LOG(maphist," processing anon %p, ref=%ld", anon,
544 anon->an_ref, 0, 0);
545
546 refs = --anon->an_ref;
547 simple_unlock(&anon->an_lock);
548 if (refs == 0) {
549 /*
550 * we had the last reference to a vm_anon. free it.
551 */
552 uvm_anfree(anon);
553 }
554 }
555
556 /*
557 * now we free the map
558 */
559
560 amap->am_ref = 0; /* ... was one */
561 amap->am_nused = 0;
562 amap_free(amap); /* will unlock and free amap */
563 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
564 }
565
566 /*
567 * amap_copy: ensure that a map entry's "needs_copy" flag is false
568 * by copying the amap if necessary.
569 *
570 * => an entry with a null amap pointer will get a new (blank) one.
571 * => the map that the map entry belongs to must be locked by caller.
572 * => the amap currently attached to "entry" (if any) must be unlocked.
573 * => if canchunk is true, then we may clip the entry into a chunk
574 * => "startva" and "endva" are used only if canchunk is true. they are
575 * used to limit chunking (e.g. if you have a large space that you
576 * know you are going to need to allocate amaps for, there is no point
577 * in allowing that to be chunked)
578 */
579
580 void
581 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
582 boolean_t canchunk, vaddr_t startva, vaddr_t endva)
583 {
584 struct vm_amap *amap, *srcamap;
585 int slots, lcv;
586 vaddr_t chunksize;
587 UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
588 UVMHIST_LOG(maphist, " (map=%p, entry=%p, waitf=%ld)",
589 map, entry, waitf, 0);
590
591 /*
592 * is there a map to copy? if not, create one from scratch.
593 */
594
595 if (entry->aref.ar_amap == NULL) {
596
597 /*
598 * check to see if we have a large amap that we can
599 * chunk. we align startva/endva to chunk-sized
600 * boundaries and then clip to them.
601 */
602
603 if (canchunk && atop(entry->end - entry->start) >=
604 UVM_AMAP_LARGE) {
605 /* convert slots to bytes */
606 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
607 startva = (startva / chunksize) * chunksize;
608 endva = roundup(endva, chunksize);
609 UVMHIST_LOG(maphist, " chunk amap ==> clip "
610 "0x%lx->0x%lx to 0x%lx->0x%lx",
611 entry->start, entry->end, startva, endva);
612 UVM_MAP_CLIP_START(map, entry, startva);
613 /* watch out for endva wrap-around! */
614 if (endva >= startva)
615 UVM_MAP_CLIP_END(map, entry, endva);
616 }
617
618 UVMHIST_LOG(maphist, "<- done [creating new amap 0x%lx->0x%lx]",
619 entry->start, entry->end, 0, 0);
620 entry->aref.ar_pageoff = 0;
621 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
622 waitf);
623 if (entry->aref.ar_amap != NULL)
624 entry->etype &= ~UVM_ET_NEEDSCOPY;
625 return;
626 }
627
628 /*
629 * first check and see if we are the only map entry
630 * referencing the amap we currently have. if so, then we can
631 * just take it over rather than copying it. note that we are
632 * reading am_ref with the amap unlocked... the value can only
633 * be one if we have the only reference to the amap (via our
634 * locked map). if we are greater than one we fall through to
635 * the next case (where we double check the value).
636 */
637
638 if (entry->aref.ar_amap->am_ref == 1) {
639 entry->etype &= ~UVM_ET_NEEDSCOPY;
640 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
641 0, 0, 0, 0);
642 return;
643 }
644
645 /*
646 * looks like we need to copy the map.
647 */
648
649 UVMHIST_LOG(maphist," amap=%p, ref=%ld, must copy it",
650 entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
651 AMAP_B2SLOT(slots, entry->end - entry->start);
652 amap = amap_alloc1(slots, 0, waitf);
653 if (amap == NULL) {
654 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0);
655 return;
656 }
657 srcamap = entry->aref.ar_amap;
658
659 /*
660 * need to double check reference count now that we've got the
661 * src amap locked down. the reference count could have
662 * changed while we were in malloc. if the reference count
663 * dropped down to one we take over the old map rather than
664 * copying the amap.
665 */
666
667 if (srcamap->am_ref == 1) { /* take it over? */
668 entry->etype &= ~UVM_ET_NEEDSCOPY;
669 amap->am_ref--; /* drop final reference to map */
670 amap_free(amap); /* dispose of new (unused) amap */
671 return;
672 }
673
674 /*
675 * we must copy it now.
676 */
677
678 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0);
679 for (lcv = 0 ; lcv < slots; lcv++) {
680 amap->am_anon[lcv] =
681 srcamap->am_anon[entry->aref.ar_pageoff + lcv];
682 if (amap->am_anon[lcv] == NULL)
683 continue;
684 simple_lock(&amap->am_anon[lcv]->an_lock);
685 amap->am_anon[lcv]->an_ref++;
686 simple_unlock(&amap->am_anon[lcv]->an_lock);
687 amap->am_bckptr[lcv] = amap->am_nused;
688 amap->am_slots[amap->am_nused] = lcv;
689 amap->am_nused++;
690 }
691 memset(&amap->am_anon[lcv], 0,
692 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
693
694 /*
695 * drop our reference to the old amap (srcamap) and unlock.
696 * we know that the reference count on srcamap is greater than
697 * one (we checked above), so there is no way we could drop
698 * the count to zero. [and no need to worry about freeing it]
699 */
700
701 srcamap->am_ref--;
702 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
703 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */
704 #ifdef UVM_AMAP_PPREF
705 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
706 amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
707 (entry->end - entry->start) >> PAGE_SHIFT, -1);
708 }
709 #endif
710
711 /*
712 * install new amap.
713 */
714
715 entry->aref.ar_pageoff = 0;
716 entry->aref.ar_amap = amap;
717 entry->etype &= ~UVM_ET_NEEDSCOPY;
718
719 amap_list_insert(amap);
720
721 /*
722 * done!
723 */
724 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
725 }
726
727 /*
728 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
729 *
730 * called during fork(2) when the parent process has a wired map
731 * entry. in that case we want to avoid write-protecting pages
732 * in the parent's map (e.g. like what you'd do for a COW page)
733 * so we resolve the COW here.
734 *
735 * => assume parent's entry was wired, thus all pages are resident.
736 * => assume pages that are loaned out (loan_count) are already mapped
737 * read-only in all maps, and thus no need for us to worry about them
738 * => assume both parent and child vm_map's are locked
739 * => caller passes child's map/entry in to us
740 * => if we run out of memory we will unlock the amap and sleep _with_ the
741 * parent and child vm_map's locked(!). we have to do this since
742 * we are in the middle of a fork(2) and we can't let the parent
743 * map change until we are done copying all the map entries.
744 * => XXXCDC: out of memory should cause fork to fail, but there is
745 * currently no easy way to do this (needs fix)
746 * => page queues must be unlocked (we may lock them)
747 */
748
749 void
750 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
751 {
752 struct vm_amap *amap = entry->aref.ar_amap;
753 int lcv, slot;
754 struct vm_anon *anon, *nanon;
755 struct vm_page *pg, *npg;
756
757 /*
758 * note that if we unlock the amap then we must ReStart the "lcv" for
759 * loop because some other process could reorder the anon's in the
760 * am_anon[] array on us while the lock is dropped.
761 */
762 ReStart:
763 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
764
765 /*
766 * get the page
767 */
768
769 slot = amap->am_slots[lcv];
770 anon = amap->am_anon[slot];
771 simple_lock(&anon->an_lock);
772 pg = anon->an_page;
773
774 /*
775 * page must be resident since parent is wired
776 */
777
778 if (pg == NULL)
779 panic("amap_cow_now: non-resident wired page in anon %p",
780 anon);
781
782 /*
783 * if the anon ref count is one and the page is not loaned,
784 * then we are safe (the child has exclusive access to the
785 * page). if the page is loaned, then it must already be
786 * mapped read-only.
787 *
788 * we only need to get involved when these are not true.
789 * [note: if loan_count == 0, then the anon must own the page]
790 */
791
792 if (anon->an_ref > 1 && pg->loan_count == 0) {
793
794 /*
795 * if the page is busy then we have to unlock, wait for
796 * it and then restart.
797 */
798 if (pg->pg_flags & PG_BUSY) {
799 atomic_setbits_int(&pg->pg_flags, PG_WANTED);
800 UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
801 "cownow", 0);
802 goto ReStart;
803 }
804
805 /*
806 * ok, time to do a copy-on-write to a new anon
807 */
808 nanon = uvm_analloc();
809 if (nanon) {
810 npg = uvm_pagealloc(NULL, 0, nanon, 0);
811 } else
812 npg = NULL; /* XXX: quiet gcc warning */
813
814 if (nanon == NULL || npg == NULL) {
815 /* out of memory */
816 /*
817 * XXXCDC: we should cause fork to fail, but
818 * we can't ...
819 */
820 if (nanon) {
821 simple_lock(&nanon->an_lock);
822 uvm_anfree(nanon);
823 }
824 simple_unlock(&anon->an_lock);
825 uvm_wait("cownowpage");
826 goto ReStart;
827 }
828
829 /*
830 * got it... now we can copy the data and replace anon
831 * with our new one...
832 */
833 uvm_pagecopy(pg, npg); /* old -> new */
834 anon->an_ref--; /* can't drop to zero */
835 amap->am_anon[slot] = nanon; /* replace */
836
837 /*
838 * drop PG_BUSY on new page ... since we have had it's
839 * owner locked the whole time it can't be
840 * PG_RELEASED | PG_WANTED.
841 */
842 atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE);
843 UVM_PAGE_OWN(npg, NULL);
844 uvm_lock_pageq();
845 uvm_pageactivate(npg);
846 uvm_unlock_pageq();
847 }
848
849 simple_unlock(&anon->an_lock);
850 /*
851 * done with this anon, next ...!
852 */
853
854 } /* end of 'for' loop */
855 }
856
857 /*
858 * amap_splitref: split a single reference into two separate references
859 *
860 * => called from uvm_map's clip routines
861 * => origref's map should be locked
862 * => origref->ar_amap should be unlocked (we will lock)
863 */
864 void
865 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
866 {
867 int leftslots;
868
869 AMAP_B2SLOT(leftslots, offset);
870 if (leftslots == 0)
871 panic("amap_splitref: split at zero offset");
872
873 /*
874 * now: amap is locked and we have a valid am_mapped array.
875 */
876
877 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
878 panic("amap_splitref: map size check failed");
879
880 #ifdef UVM_AMAP_PPREF
881 /*
882 * establish ppref before we add a duplicate reference to the amap
883 */
884 if (origref->ar_amap->am_ppref == NULL)
885 amap_pp_establish(origref->ar_amap);
886 #endif
887
888 splitref->ar_amap = origref->ar_amap;
889 splitref->ar_amap->am_ref++; /* not a share reference */
890 splitref->ar_pageoff = origref->ar_pageoff + leftslots;
891 }
892
893 #ifdef UVM_AMAP_PPREF
894
895 /*
896 * amap_pp_establish: add a ppref array to an amap, if possible
897 *
898 * => amap locked by caller
899 */
900 void
901 amap_pp_establish(struct vm_amap *amap)
902 {
903
904 amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
905 M_UVMAMAP, M_NOWAIT);
906
907 /*
908 * if we fail then we just won't use ppref for this amap
909 */
910 if (amap->am_ppref == NULL) {
911 amap->am_ppref = PPREF_NONE; /* not using it */
912 return;
913 }
914
915 /*
916 * init ppref
917 */
918 memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
919 pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
920 }
921
922 /*
923 * amap_pp_adjref: adjust reference count to a part of an amap using the
924 * per-page reference count array.
925 *
926 * => map and amap locked by caller
927 * => caller must check that ppref != PPREF_NONE before calling
928 */
929 void
930 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
931 {
932 int stopslot, *ppref, lcv, prevlcv;
933 int ref, len, prevref, prevlen;
934
935 stopslot = curslot + slotlen;
936 ppref = amap->am_ppref;
937 prevlcv = 0;
938
939 /*
940 * first advance to the correct place in the ppref array,
941 * fragment if needed.
942 */
943
944 for (lcv = 0 ; lcv < curslot ; lcv += len) {
945 pp_getreflen(ppref, lcv, &ref, &len);
946 if (lcv + len > curslot) { /* goes past start? */
947 pp_setreflen(ppref, lcv, ref, curslot - lcv);
948 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
949 len = curslot - lcv; /* new length of entry @ lcv */
950 }
951 prevlcv = lcv;
952 }
953 if (lcv != 0)
954 pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
955 else {
956 /* Ensure that the "prevref == ref" test below always
957 * fails, since we're starting from the beginning of
958 * the ppref array; that is, there is no previous
959 * chunk.
960 */
961 prevref = -1;
962 prevlen = 0;
963 }
964
965 /*
966 * now adjust reference counts in range. merge the first
967 * changed entry with the last unchanged entry if possible.
968 */
969
970 if (lcv != curslot)
971 panic("amap_pp_adjref: overshot target");
972
973 for (/* lcv already set */; lcv < stopslot ; lcv += len) {
974 pp_getreflen(ppref, lcv, &ref, &len);
975 if (lcv + len > stopslot) { /* goes past end? */
976 pp_setreflen(ppref, lcv, ref, stopslot - lcv);
977 pp_setreflen(ppref, stopslot, ref,
978 len - (stopslot - lcv));
979 len = stopslot - lcv;
980 }
981 ref += adjval;
982 if (ref < 0)
983 panic("amap_pp_adjref: negative reference count");
984 if (lcv == prevlcv + prevlen && ref == prevref) {
985 pp_setreflen(ppref, prevlcv, ref, prevlen + len);
986 } else {
987 pp_setreflen(ppref, lcv, ref, len);
988 }
989 if (ref == 0)
990 amap_wiperange(amap, lcv, len);
991 }
992
993 }
994
995 /*
996 * amap_wiperange: wipe out a range of an amap
997 * [different from amap_wipeout because the amap is kept intact]
998 *
999 * => both map and amap must be locked by caller.
1000 */
1001 void
1002 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1003 {
1004 int byanon, lcv, stop, curslot, ptr, slotend;
1005 struct vm_anon *anon;
1006
1007 /*
1008 * we can either traverse the amap by am_anon or by am_slots depending
1009 * on which is cheaper. decide now.
1010 */
1011
1012 if (slots < amap->am_nused) {
1013 byanon = TRUE;
1014 lcv = slotoff;
1015 stop = slotoff + slots;
1016 } else {
1017 byanon = FALSE;
1018 lcv = 0;
1019 stop = amap->am_nused;
1020 slotend = slotoff + slots;
1021 }
1022
1023 while (lcv < stop) {
1024 int refs;
1025
1026 if (byanon) {
1027 curslot = lcv++; /* lcv advances here */
1028 if (amap->am_anon[curslot] == NULL)
1029 continue;
1030 } else {
1031 curslot = amap->am_slots[lcv];
1032 if (curslot < slotoff || curslot >= slotend) {
1033 lcv++; /* lcv advances here */
1034 continue;
1035 }
1036 stop--; /* drop stop, since anon will be removed */
1037 }
1038 anon = amap->am_anon[curslot];
1039
1040 /*
1041 * remove it from the amap
1042 */
1043 amap->am_anon[curslot] = NULL;
1044 ptr = amap->am_bckptr[curslot];
1045 if (ptr != (amap->am_nused - 1)) {
1046 amap->am_slots[ptr] =
1047 amap->am_slots[amap->am_nused - 1];
1048 amap->am_bckptr[amap->am_slots[ptr]] =
1049 ptr; /* back ptr. */
1050 }
1051 amap->am_nused--;
1052
1053 /*
1054 * drop anon reference count
1055 */
1056 simple_lock(&anon->an_lock);
1057 refs = --anon->an_ref;
1058 simple_unlock(&anon->an_lock);
1059 if (refs == 0) {
1060 /*
1061 * we just eliminated the last reference to an anon.
1062 * free it.
1063 */
1064 uvm_anfree(anon);
1065 }
1066 }
1067 }
1068
1069 #endif
1070
1071 /*
1072 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1073 *
1074 * => called with swap_syscall_lock held.
1075 * => note that we don't always traverse all anons.
1076 * eg. amaps being wiped out, released anons.
1077 * => return TRUE if failed.
1078 */
1079
1080 boolean_t
1081 amap_swap_off(int startslot, int endslot)
1082 {
1083 struct vm_amap *am;
1084 struct vm_amap *am_next;
1085 struct vm_amap marker_prev;
1086 struct vm_amap marker_next;
1087 boolean_t rv = FALSE;
1088
1089 #if defined(DIAGNOSTIC)
1090 memset(&marker_prev, 0, sizeof(marker_prev));
1091 memset(&marker_next, 0, sizeof(marker_next));
1092 #endif /* defined(DIAGNOSTIC) */
1093
1094 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1095 int i;
1096
1097 LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1098 LIST_INSERT_AFTER(am, &marker_next, am_list);
1099
1100 if (am->am_nused <= 0) {
1101 goto next;
1102 }
1103
1104 for (i = 0; i < am->am_nused; i++) {
1105 int slot;
1106 int swslot;
1107 struct vm_anon *anon;
1108
1109 slot = am->am_slots[i];
1110 anon = am->am_anon[slot];
1111 simple_lock(&anon->an_lock);
1112
1113 swslot = anon->an_swslot;
1114 if (swslot < startslot || endslot <= swslot) {
1115 simple_unlock(&anon->an_lock);
1116 continue;
1117 }
1118
1119 am->am_flags |= AMAP_SWAPOFF;
1120
1121 rv = uvm_anon_pagein(anon);
1122
1123 am->am_flags &= ~AMAP_SWAPOFF;
1124 if (amap_refs(am) == 0) {
1125 amap_wipeout(am);
1126 am = NULL;
1127 break;
1128 }
1129 if (rv) {
1130 break;
1131 }
1132 i = 0;
1133 }
1134
1135 next:
1136 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1137 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1138 &marker_next);
1139 am_next = LIST_NEXT(&marker_next, am_list);
1140 LIST_REMOVE(&marker_prev, am_list);
1141 LIST_REMOVE(&marker_next, am_list);
1142 }
1143
1144 return rv;
1145 }