1 /* $OpenBSD: uvm_map.c,v 1.97 2007/07/18 17:00:20 art Exp $ */
2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by Charles D. Cranor,
24 * Washington University, the University of California, Berkeley and
25 * its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
43 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
44 *
45 *
46 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
47 * All rights reserved.
48 *
49 * Permission to use, copy, modify and distribute this software and
50 * its documentation is hereby granted, provided that both the copyright
51 * notice and this permission notice appear in all copies of the
52 * software, derivative works or modified versions, and any portions
53 * thereof, and that both notices appear in supporting documentation.
54 *
55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58 *
59 * Carnegie Mellon requests users of this software to return to
60 *
61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
62 * School of Computer Science
63 * Carnegie Mellon University
64 * Pittsburgh PA 15213-3890
65 *
66 * any improvements or extensions that they make and grant Carnegie the
67 * rights to redistribute these changes.
68 */
69
70 /*
71 * uvm_map.c: uvm map operations
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mman.h>
77 #include <sys/proc.h>
78 #include <sys/malloc.h>
79 #include <sys/pool.h>
80 #include <sys/kernel.h>
81
82 #include <dev/rndvar.h>
83
84 #ifdef SYSVSHM
85 #include <sys/shm.h>
86 #endif
87
88 #define UVM_MAP
89 #include <uvm/uvm.h>
90 #undef RB_AUGMENT
91 #define RB_AUGMENT(x) uvm_rb_augment(x)
92
93 #ifdef DDB
94 #include <uvm/uvm_ddb.h>
95 #endif
96
97 static struct timeval uvm_kmapent_last_warn_time;
98 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
99
100 struct uvm_cnt uvm_map_call, map_backmerge, map_forwmerge;
101 struct uvm_cnt uvm_mlk_call, uvm_mlk_hint;
102 const char vmmapbsy[] = "vmmapbsy";
103
104 /*
105 * Da history books
106 */
107 UVMHIST_DECL(maphist);
108 UVMHIST_DECL(pdhist);
109
110 /*
111 * pool for vmspace structures.
112 */
113
114 struct pool uvm_vmspace_pool;
115
116 /*
117 * pool for dynamically-allocated map entries.
118 */
119
120 struct pool uvm_map_entry_pool;
121 struct pool uvm_map_entry_kmem_pool;
122
123 #ifdef PMAP_GROWKERNEL
124 /*
125 * This global represents the end of the kernel virtual address
126 * space. If we want to exceed this, we must grow the kernel
127 * virtual address space dynamically.
128 *
129 * Note, this variable is locked by kernel_map's lock.
130 */
131 vaddr_t uvm_maxkaddr;
132 #endif
133
134 /*
135 * macros
136 */
137
138 /*
139 * uvm_map_entry_link: insert entry into a map
140 *
141 * => map must be locked
142 */
143 #define uvm_map_entry_link(map, after_where, entry) do { \
144 (map)->nentries++; \
145 (entry)->prev = (after_where); \
146 (entry)->next = (after_where)->next; \
147 (entry)->prev->next = (entry); \
148 (entry)->next->prev = (entry); \
149 uvm_rb_insert(map, entry); \
150 } while (0)
151
152 /*
153 * uvm_map_entry_unlink: remove entry from a map
154 *
155 * => map must be locked
156 */
157 #define uvm_map_entry_unlink(map, entry) do { \
158 (map)->nentries--; \
159 (entry)->next->prev = (entry)->prev; \
160 (entry)->prev->next = (entry)->next; \
161 uvm_rb_remove(map, entry); \
162 } while (0)
163
164 /*
165 * SAVE_HINT: saves the specified entry as the hint for future lookups.
166 *
167 * => map need not be locked (protected by hint_lock).
168 */
169 #define SAVE_HINT(map,check,value) do { \
170 simple_lock(&(map)->hint_lock); \
171 if ((map)->hint == (check)) \
172 (map)->hint = (value); \
173 simple_unlock(&(map)->hint_lock); \
174 } while (0)
175
176 /*
177 * VM_MAP_RANGE_CHECK: check and correct range
178 *
179 * => map must at least be read locked
180 */
181
182 #define VM_MAP_RANGE_CHECK(map, start, end) do { \
183 if (start < vm_map_min(map)) \
184 start = vm_map_min(map); \
185 if (end > vm_map_max(map)) \
186 end = vm_map_max(map); \
187 if (start > end) \
188 start = end; \
189 } while (0)
190
191 /*
192 * local prototypes
193 */
194
195 void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
196 void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
197 void uvm_map_reference_amap(struct vm_map_entry *, int);
198 void uvm_map_unreference_amap(struct vm_map_entry *, int);
199 int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t,
200 struct vm_map_entry *, voff_t, vsize_t);
201
202 struct vm_map_entry *uvm_mapent_alloc(struct vm_map *);
203 void uvm_mapent_free(struct vm_map_entry *);
204
205
206 /*
207 * Tree manipulation.
208 */
209 void uvm_rb_insert(struct vm_map *, struct vm_map_entry *);
210 void uvm_rb_remove(struct vm_map *, struct vm_map_entry *);
211 vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *);
212
213 #ifdef DEBUG
214 int _uvm_tree_sanity(struct vm_map *map, const char *name);
215 #endif
216 vsize_t uvm_rb_subtree_space(struct vm_map_entry *);
217 void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *);
218
219 static __inline int
220 uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b)
221 {
222 if (a->start < b->start)
223 return (-1);
224 else if (a->start > b->start)
225 return (1);
226
227 return (0);
228 }
229
230
231 static __inline void
232 uvm_rb_augment(struct vm_map_entry *entry)
233 {
234 entry->space = uvm_rb_subtree_space(entry);
235 }
236
237 RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
238
239 RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
240
241 vsize_t
242 uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry)
243 {
244 struct vm_map_entry *next;
245 vaddr_t space;
246
247 if ((next = entry->next) == &map->header)
248 space = map->max_offset - entry->end;
249 else {
250 KASSERT(next);
251 space = next->start - entry->end;
252 }
253 return (space);
254 }
255
256 vsize_t
257 uvm_rb_subtree_space(struct vm_map_entry *entry)
258 {
259 vaddr_t space, tmp;
260
261 space = entry->ownspace;
262 if (RB_LEFT(entry, rb_entry)) {
263 tmp = RB_LEFT(entry, rb_entry)->space;
264 if (tmp > space)
265 space = tmp;
266 }
267
268 if (RB_RIGHT(entry, rb_entry)) {
269 tmp = RB_RIGHT(entry, rb_entry)->space;
270 if (tmp > space)
271 space = tmp;
272 }
273
274 return (space);
275 }
276
277 void
278 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
279 {
280 /* We need to traverse to the very top */
281 do {
282 entry->ownspace = uvm_rb_space(map, entry);
283 entry->space = uvm_rb_subtree_space(entry);
284 } while ((entry = RB_PARENT(entry, rb_entry)) != NULL);
285 }
286
287 void
288 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
289 {
290 vaddr_t space = uvm_rb_space(map, entry);
291 struct vm_map_entry *tmp;
292
293 entry->ownspace = entry->space = space;
294 tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry);
295 #ifdef DIAGNOSTIC
296 if (tmp != NULL)
297 panic("uvm_rb_insert: duplicate entry?");
298 #endif
299 uvm_rb_fixup(map, entry);
300 if (entry->prev != &map->header)
301 uvm_rb_fixup(map, entry->prev);
302 }
303
304 void
305 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
306 {
307 struct vm_map_entry *parent;
308
309 parent = RB_PARENT(entry, rb_entry);
310 RB_REMOVE(uvm_tree, &(map)->rbhead, entry);
311 if (entry->prev != &map->header)
312 uvm_rb_fixup(map, entry->prev);
313 if (parent)
314 uvm_rb_fixup(map, parent);
315 }
316
317 #ifdef DEBUG
318 #define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y)
319 #else
320 #define uvm_tree_sanity(x,y)
321 #endif
322
323 #ifdef DEBUG
324 int
325 _uvm_tree_sanity(struct vm_map *map, const char *name)
326 {
327 struct vm_map_entry *tmp, *trtmp;
328 int n = 0, i = 1;
329
330 RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
331 if (tmp->ownspace != uvm_rb_space(map, tmp)) {
332 printf("%s: %d/%d ownspace %x != %x %s\n",
333 name, n + 1, map->nentries,
334 tmp->ownspace, uvm_rb_space(map, tmp),
335 tmp->next == &map->header ? "(last)" : "");
336 goto error;
337 }
338 }
339 trtmp = NULL;
340 RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
341 if (tmp->space != uvm_rb_subtree_space(tmp)) {
342 printf("%s: space %d != %d\n",
343 name, tmp->space, uvm_rb_subtree_space(tmp));
344 goto error;
345 }
346 if (trtmp != NULL && trtmp->start >= tmp->start) {
347 printf("%s: corrupt: 0x%lx >= 0x%lx\n",
348 name, trtmp->start, tmp->start);
349 goto error;
350 }
351 n++;
352
353 trtmp = tmp;
354 }
355
356 if (n != map->nentries) {
357 printf("%s: nentries: %d vs %d\n",
358 name, n, map->nentries);
359 goto error;
360 }
361
362 for (tmp = map->header.next; tmp && tmp != &map->header;
363 tmp = tmp->next, i++) {
364 trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp);
365 if (trtmp != tmp) {
366 printf("%s: lookup: %d: %p - %p: %p\n",
367 name, i, tmp, trtmp,
368 RB_PARENT(tmp, rb_entry));
369 goto error;
370 }
371 }
372
373 return (0);
374 error:
375 #ifdef DDB
376 /* handy breakpoint location for error case */
377 __asm(".globl treesanity_label\ntreesanity_label:");
378 #endif
379 return (-1);
380 }
381 #endif
382
383 /*
384 * uvm_mapent_alloc: allocate a map entry
385 */
386
387 struct vm_map_entry *
388 uvm_mapent_alloc(struct vm_map *map)
389 {
390 struct vm_map_entry *me, *ne;
391 int s, i;
392 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist);
393
394 if (map->flags & VM_MAP_INTRSAFE || cold) {
395 s = splvm();
396 simple_lock(&uvm.kentry_lock);
397 me = uvm.kentry_free;
398 if (me == NULL) {
399 ne = uvm_km_getpage(0);
400 if (ne == NULL)
401 panic("uvm_mapent_alloc: cannot allocate map "
402 "entry");
403 for (i = 0;
404 i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
405 i++)
406 ne[i].next = &ne[i + 1];
407 ne[i].next = NULL;
408 me = ne;
409 if (ratecheck(&uvm_kmapent_last_warn_time,
410 &uvm_kmapent_warn_rate))
411 printf("uvm_mapent_alloc: out of static "
412 "map entries\n");
413 }
414 uvm.kentry_free = me->next;
415 uvmexp.kmapent++;
416 simple_unlock(&uvm.kentry_lock);
417 splx(s);
418 me->flags = UVM_MAP_STATIC;
419 } else if (map == kernel_map) {
420 splassert(IPL_NONE);
421 me = pool_get(&uvm_map_entry_kmem_pool, PR_WAITOK);
422 me->flags = UVM_MAP_KMEM;
423 } else {
424 splassert(IPL_NONE);
425 me = pool_get(&uvm_map_entry_pool, PR_WAITOK);
426 me->flags = 0;
427 }
428
429 UVMHIST_LOG(maphist, "<- new entry=%p [kentry=%ld]", me,
430 ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0);
431 return(me);
432 }
433
434 /*
435 * uvm_mapent_free: free map entry
436 *
437 * => XXX: static pool for kernel map?
438 */
439
440 void
441 uvm_mapent_free(struct vm_map_entry *me)
442 {
443 int s;
444 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist);
445
446 UVMHIST_LOG(maphist,"<- freeing map entry=%p [flags=%ld]",
447 me, me->flags, 0, 0);
448 if (me->flags & UVM_MAP_STATIC) {
449 s = splvm();
450 simple_lock(&uvm.kentry_lock);
451 me->next = uvm.kentry_free;
452 uvm.kentry_free = me;
453 uvmexp.kmapent--;
454 simple_unlock(&uvm.kentry_lock);
455 splx(s);
456 } else if (me->flags & UVM_MAP_KMEM) {
457 splassert(IPL_NONE);
458 pool_put(&uvm_map_entry_kmem_pool, me);
459 } else {
460 splassert(IPL_NONE);
461 pool_put(&uvm_map_entry_pool, me);
462 }
463 }
464
465 /*
466 * uvm_mapent_copy: copy a map entry, preserving flags
467 */
468
469 void
470 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
471 {
472 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) -
473 ((char *)src));
474 }
475
476 /*
477 * uvm_map_entry_unwire: unwire a map entry
478 *
479 * => map should be locked by caller
480 */
481 void
482 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
483 {
484
485 entry->wired_count = 0;
486 uvm_fault_unwire_locked(map, entry->start, entry->end);
487 }
488
489
490 /*
491 * wrapper for calling amap_ref()
492 */
493 void
494 uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
495 {
496 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
497 (entry->end - entry->start) >> PAGE_SHIFT, flags);
498 }
499
500
501 /*
502 * wrapper for calling amap_unref()
503 */
504 void
505 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
506 {
507 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
508 (entry->end - entry->start) >> PAGE_SHIFT, flags);
509 }
510
511
512 /*
513 * uvm_map_init: init mapping system at boot time. note that we allocate
514 * and init the static pool of structs vm_map_entry for the kernel here.
515 */
516
517 void
518 uvm_map_init(void)
519 {
520 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
521 #if defined(UVMHIST)
522 static struct uvm_history_ent maphistbuf[100];
523 static struct uvm_history_ent pdhistbuf[100];
524 #endif
525 int lcv;
526
527 /*
528 * first, init logging system.
529 */
530
531 UVMHIST_FUNC("uvm_map_init");
532 UVMHIST_INIT_STATIC(maphist, maphistbuf);
533 UVMHIST_INIT_STATIC(pdhist, pdhistbuf);
534 UVMHIST_CALLED(maphist);
535 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);
536 UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0,
537 "# uvm_map() successful calls", 0);
538 UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0);
539 UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward",
540 0);
541 UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0);
542 UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0);
543
544 /*
545 * now set up static pool of kernel map entries ...
546 */
547
548 simple_lock_init(&uvm.kentry_lock);
549 uvm.kentry_free = NULL;
550 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
551 kernel_map_entry[lcv].next = uvm.kentry_free;
552 uvm.kentry_free = &kernel_map_entry[lcv];
553 }
554
555 /*
556 * initialize the map-related pools.
557 */
558 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
559 0, 0, 0, "vmsppl", &pool_allocator_nointr);
560 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
561 0, 0, 0, "vmmpepl", &pool_allocator_nointr);
562 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),
563 0, 0, 0, "vmmpekpl", NULL);
564 pool_sethiwat(&uvm_map_entry_pool, 8192);
565 }
566
567 /*
568 * clippers
569 */
570
571 /*
572 * uvm_map_clip_start: ensure that the entry begins at or after
573 * the starting address, if it doesn't we split the entry.
574 *
575 * => caller should use UVM_MAP_CLIP_START macro rather than calling
576 * this directly
577 * => map must be locked by caller
578 */
579
580 void
581 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
582 vaddr_t start)
583 {
584 struct vm_map_entry *new_entry;
585 vaddr_t new_adj;
586
587 /* uvm_map_simplify_entry(map, entry); */ /* XXX */
588
589 uvm_tree_sanity(map, "clip_start entry");
590
591 /*
592 * Split off the front portion. note that we must insert the new
593 * entry BEFORE this one, so that this entry has the specified
594 * starting address.
595 */
596
597 new_entry = uvm_mapent_alloc(map);
598 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
599
600 new_entry->end = start;
601 new_adj = start - new_entry->start;
602 if (entry->object.uvm_obj)
603 entry->offset += new_adj; /* shift start over */
604
605 /* Does not change order for the RB tree */
606 entry->start = start;
607
608 if (new_entry->aref.ar_amap) {
609 amap_splitref(&new_entry->aref, &entry->aref, new_adj);
610 }
611
612 uvm_map_entry_link(map, entry->prev, new_entry);
613
614 if (UVM_ET_ISSUBMAP(entry)) {
615 /* ... unlikely to happen, but play it safe */
616 uvm_map_reference(new_entry->object.sub_map);
617 } else {
618 if (UVM_ET_ISOBJ(entry) &&
619 entry->object.uvm_obj->pgops &&
620 entry->object.uvm_obj->pgops->pgo_reference)
621 entry->object.uvm_obj->pgops->pgo_reference(
622 entry->object.uvm_obj);
623 }
624
625 uvm_tree_sanity(map, "clip_start leave");
626 }
627
628 /*
629 * uvm_map_clip_end: ensure that the entry ends at or before
630 * the ending address, if it doesn't we split the reference
631 *
632 * => caller should use UVM_MAP_CLIP_END macro rather than calling
633 * this directly
634 * => map must be locked by caller
635 */
636
637 void
638 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end)
639 {
640 struct vm_map_entry *new_entry;
641 vaddr_t new_adj; /* #bytes we move start forward */
642
643 uvm_tree_sanity(map, "clip_end entry");
644 /*
645 * Create a new entry and insert it
646 * AFTER the specified entry
647 */
648
649 new_entry = uvm_mapent_alloc(map);
650 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
651
652 new_entry->start = entry->end = end;
653 new_adj = end - entry->start;
654 if (new_entry->object.uvm_obj)
655 new_entry->offset += new_adj;
656
657 if (entry->aref.ar_amap)
658 amap_splitref(&entry->aref, &new_entry->aref, new_adj);
659
660 uvm_rb_fixup(map, entry);
661
662 uvm_map_entry_link(map, entry, new_entry);
663
664 if (UVM_ET_ISSUBMAP(entry)) {
665 /* ... unlikely to happen, but play it safe */
666 uvm_map_reference(new_entry->object.sub_map);
667 } else {
668 if (UVM_ET_ISOBJ(entry) &&
669 entry->object.uvm_obj->pgops &&
670 entry->object.uvm_obj->pgops->pgo_reference)
671 entry->object.uvm_obj->pgops->pgo_reference(
672 entry->object.uvm_obj);
673 }
674 uvm_tree_sanity(map, "clip_end leave");
675 }
676
677
678 /*
679 * M A P - m a i n e n t r y p o i n t
680 */
681 /*
682 * uvm_map: establish a valid mapping in a map
683 *
684 * => assume startp is page aligned.
685 * => assume size is a multiple of PAGE_SIZE.
686 * => assume sys_mmap provides enough of a "hint" to have us skip
687 * over text/data/bss area.
688 * => map must be unlocked (we will lock it)
689 * => <uobj,uoffset> value meanings (4 cases):
690 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER
691 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER
692 * [3] <uobj,uoffset> == normal mapping
693 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA
694 *
695 * case [4] is for kernel mappings where we don't know the offset until
696 * we've found a virtual address. note that kernel object offsets are
697 * always relative to vm_map_min(kernel_map).
698 *
699 * => if `align' is non-zero, we try to align the virtual address to
700 * the specified alignment. this is only a hint; if we can't
701 * do it, the address will be unaligned. this is provided as
702 * a mechanism for large pages.
703 *
704 * => XXXCDC: need way to map in external amap?
705 */
706
707 int
708 uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size,
709 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
710 struct proc *p)
711 {
712 struct vm_map_entry *prev_entry, *new_entry;
713 vm_prot_t prot = UVM_PROTECTION(flags), maxprot =
714 UVM_MAXPROTECTION(flags);
715 vm_inherit_t inherit = UVM_INHERIT(flags);
716 int advice = UVM_ADVICE(flags);
717 int error;
718 UVMHIST_FUNC("uvm_map");
719 UVMHIST_CALLED(maphist);
720
721 UVMHIST_LOG(maphist, "(map=%p, *startp=0x%lx, size=%ld, flags=0x%lx)",
722 map, *startp, size, flags);
723 UVMHIST_LOG(maphist, " uobj/offset %p/%ld", uobj, (u_long)uoffset,0,0);
724
725 uvm_tree_sanity(map, "map entry");
726
727 if ((map->flags & VM_MAP_INTRSAFE) == 0)
728 splassert(IPL_NONE);
729
730 /*
731 * step 0: sanity check of protection code
732 */
733
734 if ((prot & maxprot) != prot) {
735 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%lx, max=0x%lx",
736 prot, maxprot,0,0);
737 return (EACCES);
738 }
739
740 /*
741 * step 1: figure out where to put new VM range
742 */
743
744 if (vm_map_lock_try(map) == FALSE) {
745 if (flags & UVM_FLAG_TRYLOCK)
746 return (EFAULT);
747 vm_map_lock(map); /* could sleep here */
748 }
749 if ((prev_entry = uvm_map_findspace(map, *startp, size, startp,
750 uobj, uoffset, align, flags)) == NULL) {
751 UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0);
752 vm_map_unlock(map);
753 return (ENOMEM);
754 }
755
756 #ifdef PMAP_GROWKERNEL
757 {
758 /*
759 * If the kernel pmap can't map the requested space,
760 * then allocate more resources for it.
761 */
762 if (map == kernel_map && uvm_maxkaddr < (*startp + size))
763 uvm_maxkaddr = pmap_growkernel(*startp + size);
764 }
765 #endif
766
767 UVMCNT_INCR(uvm_map_call);
768
769 /*
770 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
771 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in
772 * either case we want to zero it before storing it in the map entry
773 * (because it looks strange and confusing when debugging...)
774 *
775 * if uobj is not null
776 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
777 * and we do not need to change uoffset.
778 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
779 * now (based on the starting address of the map). this case is
780 * for kernel object mappings where we don't know the offset until
781 * the virtual address is found (with uvm_map_findspace). the
782 * offset is the distance we are from the start of the map.
783 */
784
785 if (uobj == NULL) {
786 uoffset = 0;
787 } else {
788 if (uoffset == UVM_UNKNOWN_OFFSET) {
789 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
790 uoffset = *startp - vm_map_min(kernel_map);
791 }
792 }
793
794 /*
795 * step 2: try and insert in map by extending previous entry, if
796 * possible
797 * XXX: we don't try and pull back the next entry. might be useful
798 * for a stack, but we are currently allocating our stack in advance.
799 */
800
801 if ((flags & UVM_FLAG_NOMERGE) == 0 &&
802 prev_entry->end == *startp && prev_entry != &map->header &&
803 prev_entry->object.uvm_obj == uobj) {
804
805 if (uobj && prev_entry->offset +
806 (prev_entry->end - prev_entry->start) != uoffset)
807 goto step3;
808
809 if (UVM_ET_ISSUBMAP(prev_entry))
810 goto step3;
811
812 if (prev_entry->protection != prot ||
813 prev_entry->max_protection != maxprot)
814 goto step3;
815
816 if (prev_entry->inheritance != inherit ||
817 prev_entry->advice != advice)
818 goto step3;
819
820 /* wiring status must match (new area is unwired) */
821 if (VM_MAPENT_ISWIRED(prev_entry))
822 goto step3;
823
824 /*
825 * can't extend a shared amap. note: no need to lock amap to
826 * look at refs since we don't care about its exact value.
827 * if it is one (i.e. we have only reference) it will stay there
828 */
829
830 if (prev_entry->aref.ar_amap &&
831 amap_refs(prev_entry->aref.ar_amap) != 1) {
832 goto step3;
833 }
834
835 if (prev_entry->aref.ar_amap) {
836 error = amap_extend(prev_entry, size);
837 if (error) {
838 vm_map_unlock(map);
839 return (error);
840 }
841 }
842
843 UVMCNT_INCR(map_backmerge);
844 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0);
845
846 /*
847 * drop our reference to uobj since we are extending a reference
848 * that we already have (the ref count can not drop to zero).
849 */
850
851 if (uobj && uobj->pgops->pgo_detach)
852 uobj->pgops->pgo_detach(uobj);
853
854 prev_entry->end += size;
855 uvm_rb_fixup(map, prev_entry);
856 map->size += size;
857 if (p && uobj == NULL)
858 p->p_vmspace->vm_dused += btoc(size);
859
860 uvm_tree_sanity(map, "map leave 2");
861
862 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
863 vm_map_unlock(map);
864 return (0);
865
866 }
867 step3:
868 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0);
869
870 /*
871 * check for possible forward merge (which we don't do) and count
872 * the number of times we missed a *possible* chance to merge more
873 */
874
875 if ((flags & UVM_FLAG_NOMERGE) == 0 &&
876 prev_entry->next != &map->header &&
877 prev_entry->next->start == (*startp + size))
878 UVMCNT_INCR(map_forwmerge);
879
880 /*
881 * step 3: allocate new entry and link it in
882 */
883
884 new_entry = uvm_mapent_alloc(map);
885 new_entry->start = *startp;
886 new_entry->end = new_entry->start + size;
887 new_entry->object.uvm_obj = uobj;
888 new_entry->offset = uoffset;
889
890 if (uobj)
891 new_entry->etype = UVM_ET_OBJ;
892 else
893 new_entry->etype = 0;
894
895 if (flags & UVM_FLAG_COPYONW) {
896 new_entry->etype |= UVM_ET_COPYONWRITE;
897 if ((flags & UVM_FLAG_OVERLAY) == 0)
898 new_entry->etype |= UVM_ET_NEEDSCOPY;
899 }
900
901 new_entry->protection = prot;
902 new_entry->max_protection = maxprot;
903 new_entry->inheritance = inherit;
904 new_entry->wired_count = 0;
905 new_entry->advice = advice;
906 if (flags & UVM_FLAG_OVERLAY) {
907 /*
908 * to_add: for BSS we overallocate a little since we
909 * are likely to extend
910 */
911 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
912 UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
913 struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK);
914 new_entry->aref.ar_pageoff = 0;
915 new_entry->aref.ar_amap = amap;
916 } else {
917 new_entry->aref.ar_pageoff = 0;
918 new_entry->aref.ar_amap = NULL;
919 }
920
921 uvm_map_entry_link(map, prev_entry, new_entry);
922
923 map->size += size;
924 if (p && uobj == NULL)
925 p->p_vmspace->vm_dused += btoc(size);
926
927
928 /*
929 * Update the free space hint
930 */
931
932 if ((map->first_free == prev_entry) &&
933 (prev_entry->end >= new_entry->start))
934 map->first_free = new_entry;
935
936 uvm_tree_sanity(map, "map leave");
937
938 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
939 vm_map_unlock(map);
940 return (0);
941 }
942
943 /*
944 * uvm_map_lookup_entry: find map entry at or before an address
945 *
946 * => map must at least be read-locked by caller
947 * => entry is returned in "entry"
948 * => return value is true if address is in the returned entry
949 */
950
951 boolean_t
952 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
953 struct vm_map_entry **entry)
954 {
955 struct vm_map_entry *cur;
956 struct vm_map_entry *last;
957 int use_tree = 0;
958 UVMHIST_FUNC("uvm_map_lookup_entry");
959 UVMHIST_CALLED(maphist);
960
961 UVMHIST_LOG(maphist,"(map=%p,addr=0x%lx,ent=%p)",
962 map, address, entry, 0);
963
964 /*
965 * start looking either from the head of the
966 * list, or from the hint.
967 */
968
969 simple_lock(&map->hint_lock);
970 cur = map->hint;
971 simple_unlock(&map->hint_lock);
972
973 if (cur == &map->header)
974 cur = cur->next;
975
976 UVMCNT_INCR(uvm_mlk_call);
977 if (address >= cur->start) {
978 /*
979 * go from hint to end of list.
980 *
981 * but first, make a quick check to see if
982 * we are already looking at the entry we
983 * want (which is usually the case).
984 * note also that we don't need to save the hint
985 * here... it is the same hint (unless we are
986 * at the header, in which case the hint didn't
987 * buy us anything anyway).
988 */
989 last = &map->header;
990 if ((cur != last) && (cur->end > address)) {
991 UVMCNT_INCR(uvm_mlk_hint);
992 *entry = cur;
993 UVMHIST_LOG(maphist,"<- got it via hint (%p)",
994 cur, 0, 0, 0);
995 return (TRUE);
996 }
997
998 if (map->nentries > 30)
999 use_tree = 1;
1000 } else {
1001 /*
1002 * go from start to hint, *inclusively*
1003 */
1004 last = cur->next;
1005 cur = map->header.next;
1006 use_tree = 1;
1007 }
1008
1009 uvm_tree_sanity(map, __func__);
1010
1011 if (use_tree) {
1012 struct vm_map_entry *prev = &map->header;
1013 cur = RB_ROOT(&map->rbhead);
1014
1015 /*
1016 * Simple lookup in the tree. Happens when the hint is
1017 * invalid, or nentries reach a threshold.
1018 */
1019 while (cur) {
1020 if (address >= cur->start) {
1021 if (address < cur->end) {
1022 *entry = cur;
1023 SAVE_HINT(map, map->hint, cur);
1024 return (TRUE);
1025 }
1026 prev = cur;
1027 cur = RB_RIGHT(cur, rb_entry);
1028 } else
1029 cur = RB_LEFT(cur, rb_entry);
1030 }
1031 *entry = prev;
1032 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
1033 return (FALSE);
1034 }
1035
1036 /*
1037 * search linearly
1038 */
1039
1040 while (cur != last) {
1041 if (cur->end > address) {
1042 if (address >= cur->start) {
1043 /*
1044 * save this lookup for future
1045 * hints, and return
1046 */
1047
1048 *entry = cur;
1049 SAVE_HINT(map, map->hint, cur);
1050 UVMHIST_LOG(maphist,"<- search got it (%p)",
1051 cur, 0, 0, 0);
1052 return (TRUE);
1053 }
1054 break;
1055 }
1056 cur = cur->next;
1057 }
1058
1059 *entry = cur->prev;
1060 SAVE_HINT(map, map->hint, *entry);
1061 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
1062 return (FALSE);
1063 }
1064
1065 /*
1066 * Checks if address pointed to be phint fits into the empty
1067 * space before the vm_map_entry after. Takes aligment and
1068 * offset into consideration.
1069 */
1070
1071 int
1072 uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length,
1073 struct vm_map_entry *after, voff_t uoffset, vsize_t align)
1074 {
1075 vaddr_t hint = *phint;
1076 vaddr_t end;
1077
1078 #ifdef PMAP_PREFER
1079 /*
1080 * push hint forward as needed to avoid VAC alias problems.
1081 * we only do this if a valid offset is specified.
1082 */
1083 if (uoffset != UVM_UNKNOWN_OFFSET)
1084 PMAP_PREFER(uoffset, &hint);
1085 #endif
1086 if (align != 0)
1087 if ((hint & (align - 1)) != 0)
1088 hint = roundup(hint, align);
1089 *phint = hint;
1090
1091 end = hint + length;
1092 if (end > map->max_offset || end < hint)
1093 return (FALSE);
1094 if (after != NULL && after != &map->header && after->start < end)
1095 return (FALSE);
1096
1097 return (TRUE);
1098 }
1099
1100 /*
1101 * uvm_map_hint: return the beginning of the best area suitable for
1102 * creating a new mapping with "prot" protection.
1103 */
1104 vaddr_t
1105 uvm_map_hint(struct proc *p, vm_prot_t prot)
1106 {
1107 vaddr_t addr;
1108
1109 #ifdef __i386__
1110 /*
1111 * If executable skip first two pages, otherwise start
1112 * after data + heap region.
1113 */
1114 if ((prot & VM_PROT_EXECUTE) &&
1115 ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) {
1116 addr = (PAGE_SIZE*2) +
1117 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
1118 return (round_page(addr));
1119 }
1120 #endif
1121 addr = (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ;
1122 #if !defined(__vax__)
1123 addr += arc4random() & (MIN((256 * 1024 * 1024), MAXDSIZ) - 1);
1124 #else
1125 /* start malloc/mmap after the brk */
1126 addr = (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ;
1127 #endif
1128 return (round_page(addr));
1129 }
1130
1131 /*
1132 * uvm_map_findspace: find "length" sized space in "map".
1133 *
1134 * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is
1135 * set (in which case we insist on using "hint").
1136 * => "result" is VA returned
1137 * => uobj/uoffset are to be used to handle VAC alignment, if required
1138 * => if `align' is non-zero, we attempt to align to that value.
1139 * => caller must at least have read-locked map
1140 * => returns NULL on failure, or pointer to prev. map entry if success
1141 * => note this is a cross between the old vm_map_findspace and vm_map_find
1142 */
1143
1144 struct vm_map_entry *
1145 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
1146 vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align,
1147 int flags)
1148 {
1149 struct vm_map_entry *entry, *next, *tmp;
1150 struct vm_map_entry *child, *prev = NULL;
1151
1152 vaddr_t end, orig_hint;
1153 UVMHIST_FUNC("uvm_map_findspace");
1154 UVMHIST_CALLED(maphist);
1155
1156 UVMHIST_LOG(maphist, "(map=%p, hint=0x%lx, len=%ld, flags=0x%lx)",
1157 map, hint, length, flags);
1158 KASSERT((align & (align - 1)) == 0);
1159 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
1160
1161 uvm_tree_sanity(map, "map_findspace entry");
1162
1163 /*
1164 * remember the original hint. if we are aligning, then we
1165 * may have to try again with no alignment constraint if
1166 * we fail the first time.
1167 */
1168
1169 orig_hint = hint;
1170 if (hint < map->min_offset) { /* check ranges ... */
1171 if (flags & UVM_FLAG_FIXED) {
1172 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
1173 return(NULL);
1174 }
1175 hint = map->min_offset;
1176 }
1177 if (hint > map->max_offset) {
1178 UVMHIST_LOG(maphist,"<- VA 0x%lx > range [0x%lx->0x%lx]",
1179 hint, map->min_offset, map->max_offset, 0);
1180 return(NULL);
1181 }
1182
1183 /*
1184 * Look for the first possible address; if there's already
1185 * something at this address, we have to start after it.
1186 */
1187
1188 if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) {
1189 if ((entry = map->first_free) != &map->header)
1190 hint = entry->end;
1191 } else {
1192 if (uvm_map_lookup_entry(map, hint, &tmp)) {
1193 /* "hint" address already in use ... */
1194 if (flags & UVM_FLAG_FIXED) {
1195 UVMHIST_LOG(maphist,"<- fixed & VA in use",
1196 0, 0, 0, 0);
1197 return(NULL);
1198 }
1199 hint = tmp->end;
1200 }
1201 entry = tmp;
1202 }
1203
1204 if (flags & UVM_FLAG_FIXED) {
1205 end = hint + length;
1206 if (end > map->max_offset || end < hint) {
1207 UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0);
1208 goto error;
1209 }
1210 next = entry->next;
1211 if (next == &map->header || next->start >= end)
1212 goto found;
1213 UVMHIST_LOG(maphist,"<- fixed mapping failed", 0,0,0,0);
1214 return(NULL); /* only one shot at it ... */
1215 }
1216
1217 /* Try to find the space in the red-black tree */
1218
1219 /* Check slot before any entry */
1220 if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align))
1221 goto found;
1222
1223 /* If there is not enough space in the whole tree, we fail */
1224 tmp = RB_ROOT(&map->rbhead);
1225 if (tmp == NULL || tmp->space < length)
1226 goto error;
1227
1228 /* Find an entry close to hint that has enough space */
1229 for (; tmp;) {
1230 if (tmp->end >= hint &&
1231 (prev == NULL || tmp->end < prev->end)) {
1232 if (tmp->ownspace >= length)
1233 prev = tmp;
1234 else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL &&
1235 child->space >= length)
1236 prev = tmp;
1237 }
1238 if (tmp->end < hint)
1239 child = RB_RIGHT(tmp, rb_entry);
1240 else if (tmp->end > hint)
1241 child = RB_LEFT(tmp, rb_entry);
1242 else {
1243 if (tmp->ownspace >= length)
1244 break;
1245 child = RB_RIGHT(tmp, rb_entry);
1246 }
1247 if (child == NULL || child->space < length)
1248 break;
1249 tmp = child;
1250 }
1251
1252 if (tmp != NULL && hint < tmp->end + tmp->ownspace) {
1253 /*
1254 * Check if the entry that we found satifies the
1255 * space requirement
1256 */
1257 if (hint < tmp->end)
1258 hint = tmp->end;
1259 if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset,
1260 align)) {
1261 entry = tmp;
1262 goto found;
1263 } else if (tmp->ownspace >= length)
1264 goto listsearch;
1265 }
1266 if (prev == NULL)
1267 goto error;
1268
1269 hint = prev->end;
1270 if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset,
1271 align)) {
1272 entry = prev;
1273 goto found;
1274 } else if (prev->ownspace >= length)
1275 goto listsearch;
1276
1277 tmp = RB_RIGHT(prev, rb_entry);
1278 for (;;) {
1279 KASSERT(tmp && tmp->space >= length);
1280 child = RB_LEFT(tmp, rb_entry);
1281 if (child && child->space >= length) {
1282 tmp = child;
1283 continue;
1284 }
1285 if (tmp->ownspace >= length)
1286 break;
1287 tmp = RB_RIGHT(tmp, rb_entry);
1288 }
1289
1290 hint = tmp->end;
1291 if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) {
1292 entry = tmp;
1293 goto found;
1294 }
1295
1296 /*
1297 * The tree fails to find an entry because of offset or alignment
1298 * restrictions. Search the list instead.
1299 */
1300 listsearch:
1301 /*
1302 * Look through the rest of the map, trying to fit a new region in
1303 * the gap between existing regions, or after the very last region.
1304 * note: entry->end = base VA of current gap,
1305 * next->start = VA of end of current gap
1306 */
1307 for (;; hint = (entry = next)->end) {
1308 /*
1309 * Find the end of the proposed new region. Be sure we didn't
1310 * go beyond the end of the map, or wrap around the address;
1311 * if so, we lose. Otherwise, if this is the last entry, or
1312 * if the proposed new region fits before the next entry, we
1313 * win.
1314 */
1315
1316 #ifdef PMAP_PREFER
1317 /*
1318 * push hint forward as needed to avoid VAC alias problems.
1319 * we only do this if a valid offset is specified.
1320 */
1321 if (uoffset != UVM_UNKNOWN_OFFSET)
1322 PMAP_PREFER(uoffset, &hint);
1323 #endif
1324 if (align != 0) {
1325 if ((hint & (align - 1)) != 0)
1326 hint = roundup(hint, align);
1327 /*
1328 * XXX Should we PMAP_PREFER() here again?
1329 */
1330 }
1331 end = hint + length;
1332 if (end > map->max_offset || end < hint) {
1333 UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0);
1334 goto error;
1335 }
1336 next = entry->next;
1337 if (next == &map->header || next->start >= end)
1338 break;
1339 }
1340 found:
1341 SAVE_HINT(map, map->hint, entry);
1342 *result = hint;
1343 UVMHIST_LOG(maphist,"<- got it! (result=0x%lx)", hint, 0,0,0);
1344 return (entry);
1345
1346 error:
1347 if (align != 0) {
1348 UVMHIST_LOG(maphist,
1349 "calling recursively, no align",
1350 0,0,0,0);
1351 return (uvm_map_findspace(map, orig_hint,
1352 length, result, uobj, uoffset, 0, flags));
1353 }
1354 return (NULL);
1355 }
1356
1357 /*
1358 * U N M A P - m a i n h e l p e r f u n c t i o n s
1359 */
1360
1361 /*
1362 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
1363 *
1364 * => caller must check alignment and size
1365 * => map must be locked by caller
1366 * => we return a list of map entries that we've remove from the map
1367 * in "entry_list"
1368 */
1369
1370 void
1371 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
1372 struct vm_map_entry **entry_list, struct proc *p)
1373 {
1374 struct vm_map_entry *entry, *first_entry, *next;
1375 vaddr_t len;
1376 UVMHIST_FUNC("uvm_unmap_remove");
1377 UVMHIST_CALLED(maphist);
1378
1379 UVMHIST_LOG(maphist,"(map=%p, start=0x%lx, end=0x%lx)",
1380 map, start, end, 0);
1381
1382 VM_MAP_RANGE_CHECK(map, start, end);
1383
1384 uvm_tree_sanity(map, "unmap_remove entry");
1385
1386 if ((map->flags & VM_MAP_INTRSAFE) == 0)
1387 splassert(IPL_NONE);
1388
1389 /*
1390 * find first entry
1391 */
1392 if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {
1393 /* clip and go... */
1394 entry = first_entry;
1395 UVM_MAP_CLIP_START(map, entry, start);
1396 /* critical! prevents stale hint */
1397 SAVE_HINT(map, entry, entry->prev);
1398
1399 } else {
1400 entry = first_entry->next;
1401 }
1402
1403 /*
1404 * Save the free space hint
1405 */
1406
1407 if (map->first_free->start >= start)
1408 map->first_free = entry->prev;
1409
1410 /*
1411 * note: we now re-use first_entry for a different task. we remove
1412 * a number of map entries from the map and save them in a linked
1413 * list headed by "first_entry". once we remove them from the map
1414 * the caller should unlock the map and drop the references to the
1415 * backing objects [c.f. uvm_unmap_detach]. the object is to
1416 * separate unmapping from reference dropping. why?
1417 * [1] the map has to be locked for unmapping
1418 * [2] the map need not be locked for reference dropping
1419 * [3] dropping references may trigger pager I/O, and if we hit
1420 * a pager that does synchronous I/O we may have to wait for it.
1421 * [4] we would like all waiting for I/O to occur with maps unlocked
1422 * so that we don't block other threads.
1423 */
1424 first_entry = NULL;
1425 *entry_list = NULL; /* to be safe */
1426
1427 /*
1428 * break up the area into map entry sized regions and unmap. note
1429 * that all mappings have to be removed before we can even consider
1430 * dropping references to amaps or VM objects (otherwise we could end
1431 * up with a mapping to a page on the free list which would be very bad)
1432 */
1433
1434 while ((entry != &map->header) && (entry->start < end)) {
1435
1436 UVM_MAP_CLIP_END(map, entry, end);
1437 next = entry->next;
1438 len = entry->end - entry->start;
1439 if (p && entry->object.uvm_obj == NULL)
1440 p->p_vmspace->vm_dused -= btoc(len);
1441
1442 /*
1443 * unwire before removing addresses from the pmap; otherwise
1444 * unwiring will put the entries back into the pmap (XXX).
1445 */
1446
1447 if (VM_MAPENT_ISWIRED(entry))
1448 uvm_map_entry_unwire(map, entry);
1449
1450 /*
1451 * special case: handle mappings to anonymous kernel objects.
1452 * we want to free these pages right away...
1453 */
1454 if (map->flags & VM_MAP_INTRSAFE) {
1455 uvm_km_pgremove_intrsafe(entry->start, entry->end);
1456 pmap_kremove(entry->start, len);
1457 } else if (UVM_ET_ISOBJ(entry) &&
1458 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1459 KASSERT(vm_map_pmap(map) == pmap_kernel());
1460
1461 /*
1462 * note: kernel object mappings are currently used in
1463 * two ways:
1464 * [1] "normal" mappings of pages in the kernel object
1465 * [2] uvm_km_valloc'd allocations in which we
1466 * pmap_enter in some non-kernel-object page
1467 * (e.g. vmapbuf).
1468 *
1469 * for case [1], we need to remove the mapping from
1470 * the pmap and then remove the page from the kernel
1471 * object (because, once pages in a kernel object are
1472 * unmapped they are no longer needed, unlike, say,
1473 * a vnode where you might want the data to persist
1474 * until flushed out of a queue).
1475 *
1476 * for case [2], we need to remove the mapping from
1477 * the pmap. there shouldn't be any pages at the
1478 * specified offset in the kernel object [but it
1479 * doesn't hurt to call uvm_km_pgremove just to be
1480 * safe?]
1481 *
1482 * uvm_km_pgremove currently does the following:
1483 * for pages in the kernel object in range:
1484 * - drops the swap slot
1485 * - uvm_pagefree the page
1486 *
1487 * note there is version of uvm_km_pgremove() that
1488 * is used for "intrsafe" objects.
1489 */
1490
1491 /*
1492 * remove mappings from pmap and drop the pages
1493 * from the object. offsets are always relative
1494 * to vm_map_min(kernel_map).
1495 */
1496 pmap_remove(pmap_kernel(), entry->start, entry->end);
1497 uvm_km_pgremove(entry->object.uvm_obj,
1498 entry->start - vm_map_min(kernel_map),
1499 entry->end - vm_map_min(kernel_map));
1500
1501 /*
1502 * null out kernel_object reference, we've just
1503 * dropped it
1504 */
1505 entry->etype &= ~UVM_ET_OBJ;
1506 entry->object.uvm_obj = NULL; /* to be safe */
1507
1508 } else {
1509 /*
1510 * remove mappings the standard way.
1511 */
1512 pmap_remove(map->pmap, entry->start, entry->end);
1513 }
1514
1515 /*
1516 * remove entry from map and put it on our list of entries
1517 * that we've nuked. then go do next entry.
1518 */
1519 UVMHIST_LOG(maphist, " removed map entry %p", entry, 0, 0,0);
1520
1521 /* critical! prevents stale hint */
1522 SAVE_HINT(map, entry, entry->prev);
1523
1524 uvm_map_entry_unlink(map, entry);
1525 map->size -= len;
1526 entry->next = first_entry;
1527 first_entry = entry;
1528 entry = next; /* next entry, please */
1529 }
1530 /* if ((map->flags & VM_MAP_DYING) == 0) { */
1531 pmap_update(vm_map_pmap(map));
1532 /* } */
1533
1534
1535 uvm_tree_sanity(map, "unmap_remove leave");
1536
1537 /*
1538 * now we've cleaned up the map and are ready for the caller to drop
1539 * references to the mapped objects.
1540 */
1541
1542 *entry_list = first_entry;
1543 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1544 }
1545
1546 /*
1547 * uvm_unmap_detach: drop references in a chain of map entries
1548 *
1549 * => we will free the map entries as we traverse the list.
1550 */
1551
1552 void
1553 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
1554 {
1555 struct vm_map_entry *next_entry;
1556 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
1557
1558 while (first_entry) {
1559 KASSERT(!VM_MAPENT_ISWIRED(first_entry));
1560 UVMHIST_LOG(maphist,
1561 " detach 0x%lx: amap=%p, obj=%p, submap?=%ld",
1562 first_entry, first_entry->aref.ar_amap,
1563 first_entry->object.uvm_obj,
1564 UVM_ET_ISSUBMAP(first_entry));
1565
1566 /*
1567 * drop reference to amap, if we've got one
1568 */
1569
1570 if (first_entry->aref.ar_amap)
1571 uvm_map_unreference_amap(first_entry, flags);
1572
1573 /*
1574 * drop reference to our backing object, if we've got one
1575 */
1576
1577 if (UVM_ET_ISSUBMAP(first_entry)) {
1578 /* ... unlikely to happen, but play it safe */
1579 uvm_map_deallocate(first_entry->object.sub_map);
1580 } else {
1581 if (UVM_ET_ISOBJ(first_entry) &&
1582 first_entry->object.uvm_obj->pgops->pgo_detach)
1583 first_entry->object.uvm_obj->pgops->
1584 pgo_detach(first_entry->object.uvm_obj);
1585 }
1586
1587 next_entry = first_entry->next;
1588 uvm_mapent_free(first_entry);
1589 first_entry = next_entry;
1590 }
1591 UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
1592 }
1593
1594 /*
1595 * E X T R A C T I O N F U N C T I O N S
1596 */
1597
1598 /*
1599 * uvm_map_reserve: reserve space in a vm_map for future use.
1600 *
1601 * => we reserve space in a map by putting a dummy map entry in the
1602 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
1603 * => map should be unlocked (we will write lock it)
1604 * => we return true if we were able to reserve space
1605 * => XXXCDC: should be inline?
1606 */
1607
1608 int
1609 uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset,
1610 vsize_t align, vaddr_t *raddr)
1611 {
1612 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist);
1613
1614 UVMHIST_LOG(maphist, "(map=%p, size=0x%lx, offset=0x%lx,addr=0x%lx)",
1615 map,size,offset,raddr);
1616
1617 size = round_page(size);
1618 if (*raddr < vm_map_min(map))
1619 *raddr = vm_map_min(map); /* hint */
1620
1621 /*
1622 * reserve some virtual space.
1623 */
1624
1625 if (uvm_map(map, raddr, size, NULL, offset, 0,
1626 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
1627 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
1628 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
1629 return (FALSE);
1630 }
1631
1632 UVMHIST_LOG(maphist, "<- done (*raddr=0x%lx)", *raddr,0,0,0);
1633 return (TRUE);
1634 }
1635
1636 /*
1637 * uvm_map_replace: replace a reserved (blank) area of memory with
1638 * real mappings.
1639 *
1640 * => caller must WRITE-LOCK the map
1641 * => we return TRUE if replacement was a success
1642 * => we expect the newents chain to have nnewents entries on it and
1643 * we expect newents->prev to point to the last entry on the list
1644 * => note newents is allowed to be NULL
1645 */
1646
1647 int
1648 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
1649 struct vm_map_entry *newents, int nnewents)
1650 {
1651 struct vm_map_entry *oldent, *last;
1652
1653 uvm_tree_sanity(map, "map_replace entry");
1654
1655 /*
1656 * first find the blank map entry at the specified address
1657 */
1658
1659 if (!uvm_map_lookup_entry(map, start, &oldent)) {
1660 return(FALSE);
1661 }
1662
1663 /*
1664 * check to make sure we have a proper blank entry
1665 */
1666
1667 if (oldent->start != start || oldent->end != end ||
1668 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
1669 return (FALSE);
1670 }
1671
1672 #ifdef DIAGNOSTIC
1673 /*
1674 * sanity check the newents chain
1675 */
1676 {
1677 struct vm_map_entry *tmpent = newents;
1678 int nent = 0;
1679 vaddr_t cur = start;
1680
1681 while (tmpent) {
1682 nent++;
1683 if (tmpent->start < cur)
1684 panic("uvm_map_replace1");
1685 if (tmpent->start > tmpent->end || tmpent->end > end) {
1686 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n",
1687 tmpent->start, tmpent->end, end);
1688 panic("uvm_map_replace2");
1689 }
1690 cur = tmpent->end;
1691 if (tmpent->next) {
1692 if (tmpent->next->prev != tmpent)
1693 panic("uvm_map_replace3");
1694 } else {
1695 if (newents->prev != tmpent)
1696 panic("uvm_map_replace4");
1697 }
1698 tmpent = tmpent->next;
1699 }
1700 if (nent != nnewents)
1701 panic("uvm_map_replace5");
1702 }
1703 #endif
1704
1705 /*
1706 * map entry is a valid blank! replace it. (this does all the
1707 * work of map entry link/unlink...).
1708 */
1709
1710 if (newents) {
1711 last = newents->prev; /* we expect this */
1712
1713 /* critical: flush stale hints out of map */
1714 SAVE_HINT(map, map->hint, newents);
1715 if (map->first_free == oldent)
1716 map->first_free = last;
1717
1718 last->next = oldent->next;
1719 last->next->prev = last;
1720
1721 /* Fix RB tree */
1722 uvm_rb_remove(map, oldent);
1723
1724 newents->prev = oldent->prev;
1725 newents->prev->next = newents;
1726 map->nentries = map->nentries + (nnewents - 1);
1727
1728 /* Fixup the RB tree */
1729 {
1730 int i;
1731 struct vm_map_entry *tmp;
1732
1733 tmp = newents;
1734 for (i = 0; i < nnewents && tmp; i++) {
1735 uvm_rb_insert(map, tmp);
1736 tmp = tmp->next;
1737 }
1738 }
1739 } else {
1740
1741 /* critical: flush stale hints out of map */
1742 SAVE_HINT(map, map->hint, oldent->prev);
1743 if (map->first_free == oldent)
1744 map->first_free = oldent->prev;
1745
1746 /* NULL list of new entries: just remove the old one */
1747 uvm_map_entry_unlink(map, oldent);
1748 }
1749
1750
1751 uvm_tree_sanity(map, "map_replace leave");
1752
1753 /*
1754 * now we can free the old blank entry, unlock the map and return.
1755 */
1756
1757 uvm_mapent_free(oldent);
1758 return(TRUE);
1759 }
1760
1761 /*
1762 * uvm_map_extract: extract a mapping from a map and put it somewhere
1763 * (maybe removing the old mapping)
1764 *
1765 * => maps should be unlocked (we will write lock them)
1766 * => returns 0 on success, error code otherwise
1767 * => start must be page aligned
1768 * => len must be page sized
1769 * => flags:
1770 * UVM_EXTRACT_REMOVE: remove mappings from srcmap
1771 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
1772 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
1773 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
1774 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
1775 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
1776 * be used from within the kernel in a kernel level map <<<
1777 */
1778
1779 int
1780 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
1781 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
1782 {
1783 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge,
1784 oldstart;
1785 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry;
1786 struct vm_map_entry *deadentry, *oldentry;
1787 vsize_t elen;
1788 int nchain, error, copy_ok;
1789 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
1790
1791 UVMHIST_LOG(maphist,"(srcmap=%p,start=0x%lx, len=0x%lx", srcmap, start,
1792 len,0);
1793 UVMHIST_LOG(maphist," ...,dstmap=%p, flags=0x%lx)", dstmap,flags,0,0);
1794
1795 uvm_tree_sanity(srcmap, "map_extract src enter");
1796 uvm_tree_sanity(dstmap, "map_extract dst enter");
1797
1798 /*
1799 * step 0: sanity check: start must be on a page boundary, length
1800 * must be page sized. can't ask for CONTIG/QREF if you asked for
1801 * REMOVE.
1802 */
1803
1804 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
1805 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
1806 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);
1807
1808 /*
1809 * step 1: reserve space in the target map for the extracted area
1810 */
1811
1812 dstaddr = vm_map_min(dstmap);
1813 if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE)
1814 return(ENOMEM);
1815 *dstaddrp = dstaddr; /* pass address back to caller */
1816 UVMHIST_LOG(maphist, " dstaddr=0x%lx", dstaddr,0,0,0);
1817
1818 /*
1819 * step 2: setup for the extraction process loop by init'ing the
1820 * map entry chain, locking src map, and looking up the first useful
1821 * entry in the map.
1822 */
1823
1824 end = start + len;
1825 newend = dstaddr + len;
1826 chain = endchain = NULL;
1827 nchain = 0;
1828 vm_map_lock(srcmap);
1829
1830 if (uvm_map_lookup_entry(srcmap, start, &entry)) {
1831
1832 /* "start" is within an entry */
1833 if (flags & UVM_EXTRACT_QREF) {
1834
1835 /*
1836 * for quick references we don't clip the entry, so
1837 * the entry may map space "before" the starting
1838 * virtual address... this is the "fudge" factor
1839 * (which can be non-zero only the first time
1840 * through the "while" loop in step 3).
1841 */
1842
1843 fudge = start - entry->start;
1844 } else {
1845
1846 /*
1847 * normal reference: we clip the map to fit (thus
1848 * fudge is zero)
1849 */
1850
1851 UVM_MAP_CLIP_START(srcmap, entry, start);
1852 SAVE_HINT(srcmap, srcmap->hint, entry->prev);
1853 fudge = 0;
1854 }
1855 } else {
1856
1857 /* "start" is not within an entry ... skip to next entry */
1858 if (flags & UVM_EXTRACT_CONTIG) {
1859 error = EINVAL;
1860 goto bad; /* definite hole here ... */
1861 }
1862
1863 entry = entry->next;
1864 fudge = 0;
1865 }
1866
1867 /* save values from srcmap for step 6 */
1868 orig_entry = entry;
1869 orig_fudge = fudge;
1870
1871 /*
1872 * step 3: now start looping through the map entries, extracting
1873 * as we go.
1874 */
1875
1876 while (entry->start < end && entry != &srcmap->header) {
1877
1878 /* if we are not doing a quick reference, clip it */
1879 if ((flags & UVM_EXTRACT_QREF) == 0)
1880 UVM_MAP_CLIP_END(srcmap, entry, end);
1881
1882 /* clear needs_copy (allow chunking) */
1883 if (UVM_ET_ISNEEDSCOPY(entry)) {
1884 if (fudge)
1885 oldstart = entry->start;
1886 else
1887 oldstart = 0; /* XXX: gcc */
1888 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
1889 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */
1890 error = ENOMEM;
1891 goto bad;
1892 }
1893
1894 /* amap_copy could clip (during chunk)! update fudge */
1895 if (fudge) {
1896 fudge = fudge - (entry->start - oldstart);
1897 orig_fudge = fudge;
1898 }
1899 }
1900
1901 /* calculate the offset of this from "start" */
1902 oldoffset = (entry->start + fudge) - start;
1903
1904 /* allocate a new map entry */
1905 newentry = uvm_mapent_alloc(dstmap);
1906 if (newentry == NULL) {
1907 error = ENOMEM;
1908 goto bad;
1909 }
1910
1911 /* set up new map entry */
1912 newentry->next = NULL;
1913 newentry->prev = endchain;
1914 newentry->start = dstaddr + oldoffset;
1915 newentry->end =
1916 newentry->start + (entry->end - (entry->start + fudge));
1917 if (newentry->end > newend || newentry->end < newentry->start)
1918 newentry->end = newend;
1919 newentry->object.uvm_obj = entry->object.uvm_obj;
1920 if (newentry->object.uvm_obj) {
1921 if (newentry->object.uvm_obj->pgops->pgo_reference)
1922 newentry->object.uvm_obj->pgops->
1923 pgo_reference(newentry->object.uvm_obj);
1924 newentry->offset = entry->offset + fudge;
1925 } else {
1926 newentry->offset = 0;
1927 }
1928 newentry->etype = entry->etype;
1929 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
1930 entry->max_protection : entry->protection;
1931 newentry->max_protection = entry->max_protection;
1932 newentry->inheritance = entry->inheritance;
1933 newentry->wired_count = 0;
1934 newentry->aref.ar_amap = entry->aref.ar_amap;
1935 if (newentry->aref.ar_amap) {
1936 newentry->aref.ar_pageoff =
1937 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
1938 uvm_map_reference_amap(newentry, AMAP_SHARED |
1939 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
1940 } else {
1941 newentry->aref.ar_pageoff = 0;
1942 }
1943 newentry->advice = entry->advice;
1944
1945 /* now link it on the chain */
1946 nchain++;
1947 if (endchain == NULL) {
1948 chain = endchain = newentry;
1949 } else {
1950 endchain->next = newentry;
1951 endchain = newentry;
1952 }
1953
1954 /* end of 'while' loop! */
1955 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
1956 (entry->next == &srcmap->header ||
1957 entry->next->start != entry->end)) {
1958 error = EINVAL;
1959 goto bad;
1960 }
1961 entry = entry->next;
1962 fudge = 0;
1963 }
1964
1965 /*
1966 * step 4: close off chain (in format expected by uvm_map_replace)
1967 */
1968
1969 if (chain)
1970 chain->prev = endchain;
1971
1972 /*
1973 * step 5: attempt to lock the dest map so we can pmap_copy.
1974 * note usage of copy_ok:
1975 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
1976 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
1977 */
1978
1979 if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) {
1980 copy_ok = 1;
1981 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
1982 nchain)) {
1983 if (srcmap != dstmap)
1984 vm_map_unlock(dstmap);
1985 error = EIO;
1986 goto bad;
1987 }
1988 } else {
1989 copy_ok = 0;
1990 /* replace defered until step 7 */
1991 }
1992
1993 /*
1994 * step 6: traverse the srcmap a second time to do the following:
1995 * - if we got a lock on the dstmap do pmap_copy
1996 * - if UVM_EXTRACT_REMOVE remove the entries
1997 * we make use of orig_entry and orig_fudge (saved in step 2)
1998 */
1999
2000 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
2001
2002 /* purge possible stale hints from srcmap */
2003 if (flags & UVM_EXTRACT_REMOVE) {
2004 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
2005 if (srcmap->first_free->start >= start)
2006 srcmap->first_free = orig_entry->prev;
2007 }
2008
2009 entry = orig_entry;
2010 fudge = orig_fudge;
2011 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */
2012
2013 while (entry->start < end && entry != &srcmap->header) {
2014 if (copy_ok) {
2015 oldoffset = (entry->start + fudge) - start;
2016 elen = MIN(end, entry->end) -
2017 (entry->start + fudge);
2018 pmap_copy(dstmap->pmap, srcmap->pmap,
2019 dstaddr + oldoffset, elen,
2020 entry->start + fudge);
2021 }
2022
2023 /* we advance "entry" in the following if statement */
2024 if (flags & UVM_EXTRACT_REMOVE) {
2025 pmap_remove(srcmap->pmap, entry->start,
2026 entry->end);
2027 oldentry = entry; /* save entry */
2028 entry = entry->next; /* advance */
2029 uvm_map_entry_unlink(srcmap, oldentry);
2030 /* add to dead list */
2031 oldentry->next = deadentry;
2032 deadentry = oldentry;
2033 } else {
2034 entry = entry->next; /* advance */
2035 }
2036
2037 /* end of 'while' loop */
2038 fudge = 0;
2039 }
2040 pmap_update(srcmap->pmap);
2041
2042 /*
2043 * unlock dstmap. we will dispose of deadentry in
2044 * step 7 if needed
2045 */
2046
2047 if (copy_ok && srcmap != dstmap)
2048 vm_map_unlock(dstmap);
2049
2050 }
2051 else
2052 deadentry = NULL; /* XXX: gcc */
2053
2054 /*
2055 * step 7: we are done with the source map, unlock. if copy_ok
2056 * is 0 then we have not replaced the dummy mapping in dstmap yet
2057 * and we need to do so now.
2058 */
2059
2060 vm_map_unlock(srcmap);
2061 if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
2062 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */
2063
2064 /* now do the replacement if we didn't do it in step 5 */
2065 if (copy_ok == 0) {
2066 vm_map_lock(dstmap);
2067 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
2068 nchain);
2069 vm_map_unlock(dstmap);
2070
2071 if (error == FALSE) {
2072 error = EIO;
2073 goto bad2;
2074 }
2075 }
2076
2077 uvm_tree_sanity(srcmap, "map_extract src leave");
2078 uvm_tree_sanity(dstmap, "map_extract dst leave");
2079
2080 return(0);
2081
2082 /*
2083 * bad: failure recovery
2084 */
2085 bad:
2086 vm_map_unlock(srcmap);
2087 bad2: /* src already unlocked */
2088 if (chain)
2089 uvm_unmap_detach(chain,
2090 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
2091
2092 uvm_tree_sanity(srcmap, "map_extract src err leave");
2093 uvm_tree_sanity(dstmap, "map_extract dst err leave");
2094
2095 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */
2096 return(error);
2097 }
2098
2099 /* end of extraction functions */
2100
2101 /*
2102 * uvm_map_submap: punch down part of a map into a submap
2103 *
2104 * => only the kernel_map is allowed to be submapped
2105 * => the purpose of submapping is to break up the locking granularity
2106 * of a larger map
2107 * => the range specified must have been mapped previously with a uvm_map()
2108 * call [with uobj==NULL] to create a blank map entry in the main map.
2109 * [And it had better still be blank!]
2110 * => maps which contain submaps should never be copied or forked.
2111 * => to remove a submap, use uvm_unmap() on the main map
2112 * and then uvm_map_deallocate() the submap.
2113 * => main map must be unlocked.
2114 * => submap must have been init'd and have a zero reference count.
2115 * [need not be locked as we don't actually reference it]
2116 */
2117
2118 int
2119 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
2120 struct vm_map *submap)
2121 {
2122 struct vm_map_entry *entry;
2123 int result;
2124
2125 vm_map_lock(map);
2126
2127 VM_MAP_RANGE_CHECK(map, start, end);
2128
2129 if (uvm_map_lookup_entry(map, start, &entry)) {
2130 UVM_MAP_CLIP_START(map, entry, start);
2131 UVM_MAP_CLIP_END(map, entry, end); /* to be safe */
2132 } else {
2133 entry = NULL;
2134 }
2135
2136 if (entry != NULL &&
2137 entry->start == start && entry->end == end &&
2138 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
2139 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
2140 entry->etype |= UVM_ET_SUBMAP;
2141 entry->object.sub_map = submap;
2142 entry->offset = 0;
2143 uvm_map_reference(submap);
2144 result = 0;
2145 } else {
2146 result = EINVAL;
2147 }
2148 vm_map_unlock(map);
2149 return(result);
2150 }
2151
2152
2153 /*
2154 * uvm_map_protect: change map protection
2155 *
2156 * => set_max means set max_protection.
2157 * => map must be unlocked.
2158 */
2159
2160 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \
2161 ~VM_PROT_WRITE : VM_PROT_ALL)
2162 #define max(a,b) ((a) > (b) ? (a) : (b))
2163
2164 int
2165 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
2166 vm_prot_t new_prot, boolean_t set_max)
2167 {
2168 struct vm_map_entry *current, *entry;
2169 int error = 0;
2170 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
2171 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_prot=0x%lx)",
2172 map, start, end, new_prot);
2173
2174 vm_map_lock(map);
2175
2176 VM_MAP_RANGE_CHECK(map, start, end);
2177
2178 if (uvm_map_lookup_entry(map, start, &entry)) {
2179 UVM_MAP_CLIP_START(map, entry, start);
2180 } else {
2181 entry = entry->next;
2182 }
2183
2184 /*
2185 * make a first pass to check for protection violations.
2186 */
2187
2188 current = entry;
2189 while ((current != &map->header) && (current->start < end)) {
2190 if (UVM_ET_ISSUBMAP(current)) {
2191 error = EINVAL;
2192 goto out;
2193 }
2194 if ((new_prot & current->max_protection) != new_prot) {
2195 error = EACCES;
2196 goto out;
2197 }
2198 current = current->next;
2199 }
2200
2201 /* go back and fix up protections (no need to clip this time). */
2202
2203 current = entry;
2204
2205 while ((current != &map->header) && (current->start < end)) {
2206 vm_prot_t old_prot;
2207
2208 UVM_MAP_CLIP_END(map, current, end);
2209
2210 old_prot = current->protection;
2211 if (set_max)
2212 current->protection =
2213 (current->max_protection = new_prot) & old_prot;
2214 else
2215 current->protection = new_prot;
2216
2217 /*
2218 * update physical map if necessary. worry about copy-on-write
2219 * here -- CHECK THIS XXX
2220 */
2221
2222 if (current->protection != old_prot) {
2223 /* update pmap! */
2224 if ((current->protection & MASK(entry)) == PROT_NONE &&
2225 VM_MAPENT_ISWIRED(entry))
2226 current->wired_count--;
2227 pmap_protect(map->pmap, current->start, current->end,
2228 current->protection & MASK(entry));
2229 }
2230
2231 /*
2232 * If the map is configured to lock any future mappings,
2233 * wire this entry now if the old protection was VM_PROT_NONE
2234 * and the new protection is not VM_PROT_NONE.
2235 */
2236
2237 if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
2238 VM_MAPENT_ISWIRED(entry) == 0 &&
2239 old_prot == VM_PROT_NONE &&
2240 new_prot != VM_PROT_NONE) {
2241 if (uvm_map_pageable(map, entry->start, entry->end,
2242 FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
2243 /*
2244 * If locking the entry fails, remember the
2245 * error if it's the first one. Note we
2246 * still continue setting the protection in
2247 * the map, but will return the resource
2248 * shortage condition regardless.
2249 *
2250 * XXX Ignore what the actual error is,
2251 * XXX just call it a resource shortage
2252 * XXX so that it doesn't get confused
2253 * XXX what uvm_map_protect() itself would
2254 * XXX normally return.
2255 */
2256 error = ENOMEM;
2257 }
2258 }
2259
2260 current = current->next;
2261 }
2262 pmap_update(map->pmap);
2263
2264 out:
2265 vm_map_unlock(map);
2266 UVMHIST_LOG(maphist, "<- done, rv=%ld",error,0,0,0);
2267 return (error);
2268 }
2269
2270 #undef max
2271 #undef MASK
2272
2273 /*
2274 * uvm_map_inherit: set inheritance code for range of addrs in map.
2275 *
2276 * => map must be unlocked
2277 * => note that the inherit code is used during a "fork". see fork
2278 * code for details.
2279 */
2280
2281 int
2282 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
2283 vm_inherit_t new_inheritance)
2284 {
2285 struct vm_map_entry *entry, *temp_entry;
2286 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
2287 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_inh=0x%lx)",
2288 map, start, end, new_inheritance);
2289
2290 switch (new_inheritance) {
2291 case MAP_INHERIT_NONE:
2292 case MAP_INHERIT_COPY:
2293 case MAP_INHERIT_SHARE:
2294 break;
2295 default:
2296 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
2297 return (EINVAL);
2298 }
2299
2300 vm_map_lock(map);
2301
2302 VM_MAP_RANGE_CHECK(map, start, end);
2303
2304 if (uvm_map_lookup_entry(map, start, &temp_entry)) {
2305 entry = temp_entry;
2306 UVM_MAP_CLIP_START(map, entry, start);
2307 } else {
2308 entry = temp_entry->next;
2309 }
2310
2311 while ((entry != &map->header) && (entry->start < end)) {
2312 UVM_MAP_CLIP_END(map, entry, end);
2313 entry->inheritance = new_inheritance;
2314 entry = entry->next;
2315 }
2316
2317 vm_map_unlock(map);
2318 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
2319 return (0);
2320 }
2321
2322 /*
2323 * uvm_map_advice: set advice code for range of addrs in map.
2324 *
2325 * => map must be unlocked
2326 */
2327
2328 int
2329 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
2330 {
2331 struct vm_map_entry *entry, *temp_entry;
2332 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
2333 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_adv=0x%lx)",
2334 map, start, end, new_advice);
2335
2336 vm_map_lock(map);
2337 VM_MAP_RANGE_CHECK(map, start, end);
2338 if (uvm_map_lookup_entry(map, start, &temp_entry)) {
2339 entry = temp_entry;
2340 UVM_MAP_CLIP_START(map, entry, start);
2341 } else {
2342 entry = temp_entry->next;
2343 }
2344
2345 /*
2346 * XXXJRT: disallow holes?
2347 */
2348
2349 while ((entry != &map->header) && (entry->start < end)) {
2350 UVM_MAP_CLIP_END(map, entry, end);
2351
2352 switch (new_advice) {
2353 case MADV_NORMAL:
2354 case MADV_RANDOM:
2355 case MADV_SEQUENTIAL:
2356 /* nothing special here */
2357 break;
2358
2359 default:
2360 vm_map_unlock(map);
2361 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
2362 return (EINVAL);
2363 }
2364 entry->advice = new_advice;
2365 entry = entry->next;
2366 }
2367
2368 vm_map_unlock(map);
2369 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
2370 return (0);
2371 }
2372
2373 /*
2374 * uvm_map_pageable: sets the pageability of a range in a map.
2375 *
2376 * => wires map entries. should not be used for transient page locking.
2377 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
2378 * => regions sepcified as not pageable require lock-down (wired) memory
2379 * and page tables.
2380 * => map must never be read-locked
2381 * => if islocked is TRUE, map is already write-locked
2382 * => we always unlock the map, since we must downgrade to a read-lock
2383 * to call uvm_fault_wire()
2384 * => XXXCDC: check this and try and clean it up.
2385 */
2386
2387 int
2388 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2389 boolean_t new_pageable, int lockflags)
2390 {
2391 struct vm_map_entry *entry, *start_entry, *failed_entry;
2392 int rv;
2393 #ifdef DIAGNOSTIC
2394 u_int timestamp_save;
2395 #endif
2396 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
2397 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_pageable=0x%lx)",
2398 map, start, end, new_pageable);
2399 KASSERT(map->flags & VM_MAP_PAGEABLE);
2400
2401 if ((lockflags & UVM_LK_ENTER) == 0)
2402 vm_map_lock(map);
2403
2404 VM_MAP_RANGE_CHECK(map, start, end);
2405
2406 /*
2407 * only one pageability change may take place at one time, since
2408 * uvm_fault_wire assumes it will be called only once for each
2409 * wiring/unwiring. therefore, we have to make sure we're actually
2410 * changing the pageability for the entire region. we do so before
2411 * making any changes.
2412 */
2413
2414 if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {
2415 if ((lockflags & UVM_LK_EXIT) == 0)
2416 vm_map_unlock(map);
2417
2418 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
2419 return (EFAULT);
2420 }
2421 entry = start_entry;
2422
2423 /*
2424 * handle wiring and unwiring separately.
2425 */
2426
2427 if (new_pageable) { /* unwire */
2428 UVM_MAP_CLIP_START(map, entry, start);
2429
2430 /*
2431 * unwiring. first ensure that the range to be unwired is
2432 * really wired down and that there are no holes.
2433 */
2434
2435 while ((entry != &map->header) && (entry->start < end)) {
2436 if (entry->wired_count == 0 ||
2437 (entry->end < end &&
2438 (entry->next == &map->header ||
2439 entry->next->start > entry->end))) {
2440 if ((lockflags & UVM_LK_EXIT) == 0)
2441 vm_map_unlock(map);
2442 UVMHIST_LOG(maphist,
2443 "<- done (INVALID UNWIRE ARG)",0,0,0,0);
2444 return (EINVAL);
2445 }
2446 entry = entry->next;
2447 }
2448
2449 /*
2450 * POSIX 1003.1b - a single munlock call unlocks a region,
2451 * regardless of the number of mlock calls made on that
2452 * region.
2453 */
2454
2455 entry = start_entry;
2456 while ((entry != &map->header) && (entry->start < end)) {
2457 UVM_MAP_CLIP_END(map, entry, end);
2458 if (VM_MAPENT_ISWIRED(entry))
2459 uvm_map_entry_unwire(map, entry);
2460 entry = entry->next;
2461 }
2462 if ((lockflags & UVM_LK_EXIT) == 0)
2463 vm_map_unlock(map);
2464 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
2465 return (0);
2466 }
2467
2468 /*
2469 * wire case: in two passes [XXXCDC: ugly block of code here]
2470 *
2471 * 1: holding the write lock, we create any anonymous maps that need
2472 * to be created. then we clip each map entry to the region to
2473 * be wired and increment its wiring count.
2474 *
2475 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2476 * in the pages for any newly wired area (wired_count == 1).
2477 *
2478 * downgrading to a read lock for uvm_fault_wire avoids a possible
2479 * deadlock with another thread that may have faulted on one of
2480 * the pages to be wired (it would mark the page busy, blocking
2481 * us, then in turn block on the map lock that we hold). because
2482 * of problems in the recursive lock package, we cannot upgrade
2483 * to a write lock in vm_map_lookup. thus, any actions that
2484 * require the write lock must be done beforehand. because we
2485 * keep the read lock on the map, the copy-on-write status of the
2486 * entries we modify here cannot change.
2487 */
2488
2489 while ((entry != &map->header) && (entry->start < end)) {
2490 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
2491
2492 /*
2493 * perform actions of vm_map_lookup that need the
2494 * write lock on the map: create an anonymous map
2495 * for a copy-on-write region, or an anonymous map
2496 * for a zero-fill region. (XXXCDC: submap case
2497 * ok?)
2498 */
2499
2500 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
2501 if (UVM_ET_ISNEEDSCOPY(entry) &&
2502 ((entry->protection & VM_PROT_WRITE) ||
2503 (entry->object.uvm_obj == NULL))) {
2504 amap_copy(map, entry, M_WAITOK, TRUE,
2505 start, end);
2506 /* XXXCDC: wait OK? */
2507 }
2508 }
2509 }
2510 UVM_MAP_CLIP_START(map, entry, start);
2511 UVM_MAP_CLIP_END(map, entry, end);
2512 entry->wired_count++;
2513
2514 /*
2515 * Check for holes
2516 */
2517
2518 if (entry->protection == VM_PROT_NONE ||
2519 (entry->end < end &&
2520 (entry->next == &map->header ||
2521 entry->next->start > entry->end))) {
2522
2523 /*
2524 * found one. amap creation actions do not need to
2525 * be undone, but the wired counts need to be restored.
2526 */
2527
2528 while (entry != &map->header && entry->end > start) {
2529 entry->wired_count--;
2530 entry = entry->prev;
2531 }
2532 if ((lockflags & UVM_LK_EXIT) == 0)
2533 vm_map_unlock(map);
2534 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
2535 return (EINVAL);
2536 }
2537 entry = entry->next;
2538 }
2539
2540 /*
2541 * Pass 2.
2542 */
2543
2544 #ifdef DIAGNOSTIC
2545 timestamp_save = map->timestamp;
2546 #endif
2547 vm_map_busy(map);
2548 vm_map_downgrade(map);
2549
2550 rv = 0;
2551 entry = start_entry;
2552 while (entry != &map->header && entry->start < end) {
2553 if (entry->wired_count == 1) {
2554 rv = uvm_fault_wire(map, entry->start, entry->end,
2555 entry->protection);
2556 if (rv) {
2557 /*
2558 * wiring failed. break out of the loop.
2559 * we'll clean up the map below, once we
2560 * have a write lock again.
2561 */
2562 break;
2563 }
2564 }
2565 entry = entry->next;
2566 }
2567
2568 if (rv) { /* failed? */
2569
2570 /*
2571 * Get back to an exclusive (write) lock.
2572 */
2573
2574 vm_map_upgrade(map);
2575 vm_map_unbusy(map);
2576
2577 #ifdef DIAGNOSTIC
2578 if (timestamp_save != map->timestamp)
2579 panic("uvm_map_pageable: stale map");
2580 #endif
2581
2582 /*
2583 * first drop the wiring count on all the entries
2584 * which haven't actually been wired yet.
2585 */
2586
2587 failed_entry = entry;
2588 while (entry != &map->header && entry->start < end) {
2589 entry->wired_count--;
2590 entry = entry->next;
2591 }
2592
2593 /*
2594 * now, unwire all the entries that were successfully
2595 * wired above.
2596 */
2597
2598 entry = start_entry;
2599 while (entry != failed_entry) {
2600 entry->wired_count--;
2601 if (VM_MAPENT_ISWIRED(entry) == 0)
2602 uvm_map_entry_unwire(map, entry);
2603 entry = entry->next;
2604 }
2605 if ((lockflags & UVM_LK_EXIT) == 0)
2606 vm_map_unlock(map);
2607 UVMHIST_LOG(maphist, "<- done (RV=%ld)", rv,0,0,0);
2608 return(rv);
2609 }
2610
2611 /* We are holding a read lock here. */
2612 if ((lockflags & UVM_LK_EXIT) == 0) {
2613 vm_map_unbusy(map);
2614 vm_map_unlock_read(map);
2615 } else {
2616
2617 /*
2618 * Get back to an exclusive (write) lock.
2619 */
2620
2621 vm_map_upgrade(map);
2622 vm_map_unbusy(map);
2623 }
2624
2625 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
2626 return (0);
2627 }
2628
2629 /*
2630 * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2631 * all mapped regions.
2632 *
2633 * => map must not be locked.
2634 * => if no flags are specified, all regions are unwired.
2635 * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
2636 */
2637
2638 int
2639 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2640 {
2641 struct vm_map_entry *entry, *failed_entry;
2642 vsize_t size;
2643 int error;
2644 #ifdef DIAGNOSTIC
2645 u_int timestamp_save;
2646 #endif
2647 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist);
2648 UVMHIST_LOG(maphist,"(map=%p,flags=0x%lx)", map, flags, 0, 0);
2649
2650 KASSERT(map->flags & VM_MAP_PAGEABLE);
2651
2652 vm_map_lock(map);
2653
2654 /*
2655 * handle wiring and unwiring separately.
2656 */
2657
2658 if (flags == 0) { /* unwire */
2659 /*
2660 * POSIX 1003.1b -- munlockall unlocks all regions,
2661 * regardless of how many times mlockall has been called.
2662 */
2663 for (entry = map->header.next; entry != &map->header;
2664 entry = entry->next) {
2665 if (VM_MAPENT_ISWIRED(entry))
2666 uvm_map_entry_unwire(map, entry);
2667 }
2668 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2669 vm_map_unlock(map);
2670 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
2671 return (0);
2672
2673 /*
2674 * end of unwire case!
2675 */
2676 }
2677
2678 if (flags & MCL_FUTURE) {
2679 /*
2680 * must wire all future mappings; remember this.
2681 */
2682 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2683 }
2684
2685 if ((flags & MCL_CURRENT) == 0) {
2686 /*
2687 * no more work to do!
2688 */
2689 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
2690 vm_map_unlock(map);
2691 return (0);
2692 }
2693
2694 /*
2695 * wire case: in three passes [XXXCDC: ugly block of code here]
2696 *
2697 * 1: holding the write lock, count all pages mapped by non-wired
2698 * entries. if this would cause us to go over our limit, we fail.
2699 *
2700 * 2: still holding the write lock, we create any anonymous maps that
2701 * need to be created. then we increment its wiring count.
2702 *
2703 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
2704 * in the pages for any newly wired area (wired_count == 1).
2705 *
2706 * downgrading to a read lock for uvm_fault_wire avoids a possible
2707 * deadlock with another thread that may have faulted on one of
2708 * the pages to be wired (it would mark the page busy, blocking
2709 * us, then in turn block on the map lock that we hold). because
2710 * of problems in the recursive lock package, we cannot upgrade
2711 * to a write lock in vm_map_lookup. thus, any actions that
2712 * require the write lock must be done beforehand. because we
2713 * keep the read lock on the map, the copy-on-write status of the
2714 * entries we modify here cannot change.
2715 */
2716
2717 for (size = 0, entry = map->header.next; entry != &map->header;
2718 entry = entry->next) {
2719 if (entry->protection != VM_PROT_NONE &&
2720 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
2721 size += entry->end - entry->start;
2722 }
2723 }
2724
2725 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2726 vm_map_unlock(map);
2727 return (ENOMEM); /* XXX overloaded */
2728 }
2729
2730 /* XXX non-pmap_wired_count case must be handled by caller */
2731 #ifdef pmap_wired_count
2732 if (limit != 0 &&
2733 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
2734 vm_map_unlock(map);
2735 return (ENOMEM); /* XXX overloaded */
2736 }
2737 #endif
2738
2739 /*
2740 * Pass 2.
2741 */
2742
2743 for (entry = map->header.next; entry != &map->header;
2744 entry = entry->next) {
2745 if (entry->protection == VM_PROT_NONE)
2746 continue;
2747 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
2748 /*
2749 * perform actions of vm_map_lookup that need the
2750 * write lock on the map: create an anonymous map
2751 * for a copy-on-write region, or an anonymous map
2752 * for a zero-fill region. (XXXCDC: submap case
2753 * ok?)
2754 */
2755 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
2756 if (UVM_ET_ISNEEDSCOPY(entry) &&
2757 ((entry->protection & VM_PROT_WRITE) ||
2758 (entry->object.uvm_obj == NULL))) {
2759 amap_copy(map, entry, M_WAITOK, TRUE,
2760 entry->start, entry->end);
2761 /* XXXCDC: wait OK? */
2762 }
2763 }
2764 }
2765 entry->wired_count++;
2766 }
2767
2768 /*
2769 * Pass 3.
2770 */
2771
2772 #ifdef DIAGNOSTIC
2773 timestamp_save = map->timestamp;
2774 #endif
2775 vm_map_busy(map);
2776 vm_map_downgrade(map);
2777
2778 for (error = 0, entry = map->header.next;
2779 entry != &map->header && error == 0;
2780 entry = entry->next) {
2781 if (entry->wired_count == 1) {
2782 error = uvm_fault_wire(map, entry->start, entry->end,
2783 entry->protection);
2784 }
2785 }
2786
2787 if (error) { /* failed? */
2788 /*
2789 * Get back an exclusive (write) lock.
2790 */
2791 vm_map_upgrade(map);
2792 vm_map_unbusy(map);
2793
2794 #ifdef DIAGNOSTIC
2795 if (timestamp_save != map->timestamp)
2796 panic("uvm_map_pageable_all: stale map");
2797 #endif
2798
2799 /*
2800 * first drop the wiring count on all the entries
2801 * which haven't actually been wired yet.
2802 *
2803 * Skip VM_PROT_NONE entries like we did above.
2804 */
2805 failed_entry = entry;
2806 for (/* nothing */; entry != &map->header;
2807 entry = entry->next) {
2808 if (entry->protection == VM_PROT_NONE)
2809 continue;
2810 entry->wired_count--;
2811 }
2812
2813 /*
2814 * now, unwire all the entries that were successfully
2815 * wired above.
2816 *
2817 * Skip VM_PROT_NONE entries like we did above.
2818 */
2819 for (entry = map->header.next; entry != failed_entry;
2820 entry = entry->next) {
2821 if (entry->protection == VM_PROT_NONE)
2822 continue;
2823 entry->wired_count--;
2824 if (VM_MAPENT_ISWIRED(entry))
2825 uvm_map_entry_unwire(map, entry);
2826 }
2827 vm_map_unlock(map);
2828 UVMHIST_LOG(maphist,"<- done (RV=%ld)", error,0,0,0);
2829 return (error);
2830 }
2831
2832 /* We are holding a read lock here. */
2833 vm_map_unbusy(map);
2834 vm_map_unlock_read(map);
2835
2836 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
2837 return (0);
2838 }
2839
2840 /*
2841 * uvm_map_clean: clean out a map range
2842 *
2843 * => valid flags:
2844 * if (flags & PGO_CLEANIT): dirty pages are cleaned first
2845 * if (flags & PGO_SYNCIO): dirty pages are written synchronously
2846 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
2847 * if (flags & PGO_FREE): any cached pages are freed after clean
2848 * => returns an error if any part of the specified range isn't mapped
2849 * => never a need to flush amap layer since the anonymous memory has
2850 * no permanent home, but may deactivate pages there
2851 * => called from sys_msync() and sys_madvise()
2852 * => caller must not write-lock map (read OK).
2853 * => we may sleep while cleaning if SYNCIO [with map read-locked]
2854 */
2855
2856 int amap_clean_works = 1; /* XXX for now, just in case... */
2857
2858 int
2859 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
2860 {
2861 struct vm_map_entry *current, *entry;
2862 struct uvm_object *uobj;
2863 struct vm_amap *amap;
2864 struct vm_anon *anon;
2865 struct vm_page *pg;
2866 vaddr_t offset;
2867 vsize_t size;
2868 int rv, error, refs;
2869 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist);
2870
2871 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,flags=0x%lx)",
2872 map, start, end, flags);
2873 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
2874 (PGO_FREE|PGO_DEACTIVATE));
2875
2876 vm_map_lock_read(map);
2877 VM_MAP_RANGE_CHECK(map, start, end);
2878 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
2879 vm_map_unlock_read(map);
2880 return (EFAULT);
2881 }
2882
2883 /*
2884 * Make a first pass to check for holes.
2885 */
2886
2887 for (current = entry; current->start < end; current = current->next) {
2888 if (UVM_ET_ISSUBMAP(current)) {
2889 vm_map_unlock_read(map);
2890 return (EINVAL);
2891 }
2892 if (end > current->end && (current->next == &map->header ||
2893 current->end != current->next->start)) {
2894 vm_map_unlock_read(map);
2895 return (EFAULT);
2896 }
2897 }
2898
2899 error = 0;
2900
2901 for (current = entry; current->start < end; current = current->next) {
2902 amap = current->aref.ar_amap; /* top layer */
2903 uobj = current->object.uvm_obj; /* bottom layer */
2904 KASSERT(start >= current->start);
2905
2906 /*
2907 * No amap cleaning necessary if:
2908 *
2909 * (1) There's no amap.
2910 *
2911 * (2) We're not deactivating or freeing pages.
2912 */
2913
2914 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
2915 goto flush_object;
2916
2917 /* XXX for now, just in case... */
2918 if (amap_clean_works == 0)
2919 goto flush_object;
2920
2921 offset = start - current->start;
2922 size = MIN(end, current->end) - start;
2923 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
2924 anon = amap_lookup(¤t->aref, offset);
2925 if (anon == NULL)
2926 continue;
2927
2928 simple_lock(&anon->an_lock);
2929
2930 pg = anon->an_page;
2931 if (pg == NULL) {
2932 simple_unlock(&anon->an_lock);
2933 continue;
2934 }
2935
2936 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
2937
2938 /*
2939 * XXX In these first 3 cases, we always just
2940 * XXX deactivate the page. We may want to
2941 * XXX handle the different cases more
2942 * XXX specifically, in the future.
2943 */
2944
2945 case PGO_CLEANIT|PGO_FREE:
2946 case PGO_CLEANIT|PGO_DEACTIVATE:
2947 case PGO_DEACTIVATE:
2948 deactivate_it:
2949 /* skip the page if it's loaned or wired */
2950 if (pg->loan_count != 0 ||
2951 pg->wire_count != 0) {
2952 simple_unlock(&anon->an_lock);
2953 continue;
2954 }
2955
2956 uvm_lock_pageq();
2957
2958 /*
2959 * skip the page if it's not actually owned
2960 * by the anon (may simply be loaned to the
2961 * anon).
2962 */
2963
2964 if ((pg->pg_flags & PQ_ANON) == 0) {
2965 KASSERT(pg->uobject == NULL);
2966 uvm_unlock_pageq();
2967 simple_unlock(&anon->an_lock);
2968 continue;
2969 }
2970 KASSERT(pg->uanon == anon);
2971
2972 #ifdef UBC
2973 /* ...and deactivate the page. */
2974 pmap_clear_reference(pg);
2975 #else
2976 /* zap all mappings for the page. */
2977 pmap_page_protect(pg, VM_PROT_NONE);
2978
2979 /* ...and deactivate the page. */
2980 #endif
2981 uvm_pagedeactivate(pg);
2982
2983 uvm_unlock_pageq();
2984 simple_unlock(&anon->an_lock);
2985 continue;
2986
2987 case PGO_FREE:
2988
2989 /*
2990 * If there are multiple references to
2991 * the amap, just deactivate the page.
2992 */
2993
2994 if (amap_refs(amap) > 1)
2995 goto deactivate_it;
2996
2997 /* XXX skip the page if it's wired */
2998 if (pg->wire_count != 0) {
2999 simple_unlock(&anon->an_lock);
3000 continue;
3001 }
3002 amap_unadd(¤t->aref, offset);
3003 refs = --anon->an_ref;
3004 simple_unlock(&anon->an_lock);
3005 if (refs == 0)
3006 uvm_anfree(anon);
3007 continue;
3008
3009 default:
3010 panic("uvm_map_clean: weird flags");
3011 }
3012 }
3013
3014 flush_object:
3015 /*
3016 * flush pages if we've got a valid backing object.
3017 *
3018 * Don't PGO_FREE if we don't have write permission
3019 * and don't flush if this is a copy-on-write object
3020 * since we can't know our permissions on it.
3021 */
3022
3023 offset = current->offset + (start - current->start);
3024 size = MIN(end, current->end) - start;
3025 if (uobj != NULL &&
3026 ((flags & PGO_FREE) == 0 ||
3027 ((entry->max_protection & VM_PROT_WRITE) != 0 &&
3028 (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
3029 simple_lock(&uobj->vmobjlock);
3030 rv = uobj->pgops->pgo_flush(uobj, offset,
3031 offset + size, flags);
3032 simple_unlock(&uobj->vmobjlock);
3033
3034 if (rv == FALSE)
3035 error = EFAULT;
3036 }
3037 start += size;
3038 }
3039 vm_map_unlock_read(map);
3040 return (error);
3041 }
3042
3043
3044 /*
3045 * uvm_map_checkprot: check protection in map
3046 *
3047 * => must allow specified protection in a fully allocated region.
3048 * => map must be read or write locked by caller.
3049 */
3050
3051 boolean_t
3052 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
3053 vm_prot_t protection)
3054 {
3055 struct vm_map_entry *entry;
3056 struct vm_map_entry *tmp_entry;
3057
3058 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
3059 return(FALSE);
3060 }
3061 entry = tmp_entry;
3062 while (start < end) {
3063 if (entry == &map->header) {
3064 return(FALSE);
3065 }
3066
3067 /*
3068 * no holes allowed
3069 */
3070
3071 if (start < entry->start) {
3072 return(FALSE);
3073 }
3074
3075 /*
3076 * check protection associated with entry
3077 */
3078
3079 if ((entry->protection & protection) != protection) {
3080 return(FALSE);
3081 }
3082
3083 /* go to next entry */
3084
3085 start = entry->end;
3086 entry = entry->next;
3087 }
3088 return(TRUE);
3089 }
3090
3091 /*
3092 * uvmspace_alloc: allocate a vmspace structure.
3093 *
3094 * - structure includes vm_map and pmap
3095 * - XXX: no locking on this structure
3096 * - refcnt set to 1, rest must be init'd by caller
3097 */
3098 struct vmspace *
3099 uvmspace_alloc(vaddr_t min, vaddr_t max, int pageable)
3100 {
3101 struct vmspace *vm;
3102 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
3103
3104 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK);
3105 uvmspace_init(vm, NULL, min, max, pageable);
3106 UVMHIST_LOG(maphist,"<- done (vm=%p)", vm,0,0,0);
3107 return (vm);
3108 }
3109
3110 /*
3111 * uvmspace_init: initialize a vmspace structure.
3112 *
3113 * - XXX: no locking on this structure
3114 * - refcnt set to 1, rest must be init'd by caller
3115 */
3116 void
3117 uvmspace_init(vm, pmap, min, max, pageable)
3118 struct vmspace *vm;
3119 struct pmap *pmap;
3120 vaddr_t min, max;
3121 boolean_t pageable;
3122 {
3123 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
3124
3125 memset(vm, 0, sizeof(*vm));
3126
3127 uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0);
3128
3129 if (pmap)
3130 pmap_reference(pmap);
3131 else
3132 pmap = pmap_create();
3133 vm->vm_map.pmap = pmap;
3134
3135 vm->vm_refcnt = 1;
3136 UVMHIST_LOG(maphist,"<- done",0,0,0,0);
3137 }
3138
3139 /*
3140 * uvmspace_share: share a vmspace between two proceses
3141 *
3142 * - XXX: no locking on vmspace
3143 * - used for vfork, threads(?)
3144 */
3145
3146 void
3147 uvmspace_share(p1, p2)
3148 struct proc *p1, *p2;
3149 {
3150 p2->p_vmspace = p1->p_vmspace;
3151 p1->p_vmspace->vm_refcnt++;
3152 }
3153
3154 /*
3155 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
3156 *
3157 * - XXX: no locking on vmspace
3158 */
3159
3160 void
3161 uvmspace_unshare(p)
3162 struct proc *p;
3163 {
3164 struct vmspace *nvm, *ovm = p->p_vmspace;
3165
3166 if (ovm->vm_refcnt == 1)
3167 /* nothing to do: vmspace isn't shared in the first place */
3168 return;
3169
3170 /* make a new vmspace, still holding old one */
3171 nvm = uvmspace_fork(ovm);
3172
3173 pmap_deactivate(p); /* unbind old vmspace */
3174 p->p_vmspace = nvm;
3175 pmap_activate(p); /* switch to new vmspace */
3176
3177 uvmspace_free(ovm); /* drop reference to old vmspace */
3178 }
3179
3180 /*
3181 * uvmspace_exec: the process wants to exec a new program
3182 *
3183 * - XXX: no locking on vmspace
3184 */
3185
3186 void
3187 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3188 {
3189 struct vmspace *nvm, *ovm = p->p_vmspace;
3190 struct vm_map *map = &ovm->vm_map;
3191
3192 pmap_unuse_final(p); /* before stack addresses go away */
3193
3194 /*
3195 * see if more than one process is using this vmspace...
3196 */
3197
3198 if (ovm->vm_refcnt == 1) {
3199
3200 /*
3201 * if p is the only process using its vmspace then we can safely
3202 * recycle that vmspace for the program that is being exec'd.
3203 */
3204
3205 #ifdef SYSVSHM
3206 /*
3207 * SYSV SHM semantics require us to kill all segments on an exec
3208 */
3209 if (ovm->vm_shm)
3210 shmexit(ovm);
3211 #endif
3212
3213 /*
3214 * POSIX 1003.1b -- "lock future mappings" is revoked
3215 * when a process execs another program image.
3216 */
3217 vm_map_lock(map);
3218 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3219 vm_map_unlock(map);
3220
3221 /*
3222 * now unmap the old program
3223 */
3224 uvm_unmap(map, map->min_offset, map->max_offset);
3225
3226 /*
3227 * resize the map
3228 */
3229 vm_map_lock(map);
3230 map->min_offset = start;
3231 uvm_tree_sanity(map, "resize enter");
3232 map->max_offset = end;
3233 if (map->header.prev != &map->header)
3234 uvm_rb_fixup(map, map->header.prev);
3235 uvm_tree_sanity(map, "resize leave");
3236 vm_map_unlock(map);
3237
3238
3239 } else {
3240
3241 /*
3242 * p's vmspace is being shared, so we can't reuse it for p since
3243 * it is still being used for others. allocate a new vmspace
3244 * for p
3245 */
3246 nvm = uvmspace_alloc(start, end,
3247 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE);
3248
3249 /*
3250 * install new vmspace and drop our ref to the old one.
3251 */
3252
3253 pmap_deactivate(p);
3254 p->p_vmspace = nvm;
3255 pmap_activate(p);
3256
3257 uvmspace_free(ovm);
3258 }
3259 }
3260
3261 /*
3262 * uvmspace_free: free a vmspace data structure
3263 *
3264 * - XXX: no locking on vmspace
3265 */
3266
3267 void
3268 uvmspace_free(struct vmspace *vm)
3269 {
3270 struct vm_map_entry *dead_entries;
3271 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
3272
3273 UVMHIST_LOG(maphist,"(vm=%p) ref=%ld", vm, vm->vm_refcnt,0,0);
3274 if (--vm->vm_refcnt == 0) {
3275 /*
3276 * lock the map, to wait out all other references to it. delete
3277 * all of the mappings and pages they hold, then call the pmap
3278 * module to reclaim anything left.
3279 */
3280 #ifdef SYSVSHM
3281 /* Get rid of any SYSV shared memory segments. */
3282 if (vm->vm_shm != NULL)
3283 shmexit(vm);
3284 #endif
3285 vm_map_lock(&vm->vm_map);
3286 if (vm->vm_map.nentries) {
3287 uvm_unmap_remove(&vm->vm_map,
3288 vm->vm_map.min_offset, vm->vm_map.max_offset,
3289 &dead_entries, NULL);
3290 if (dead_entries != NULL)
3291 uvm_unmap_detach(dead_entries, 0);
3292 }
3293 pmap_destroy(vm->vm_map.pmap);
3294 vm->vm_map.pmap = NULL;
3295 pool_put(&uvm_vmspace_pool, vm);
3296 }
3297 UVMHIST_LOG(maphist,"<- done", 0,0,0,0);
3298 }
3299
3300 /*
3301 * F O R K - m a i n e n t r y p o i n t
3302 */
3303 /*
3304 * uvmspace_fork: fork a process' main map
3305 *
3306 * => create a new vmspace for child process from parent.
3307 * => parent's map must not be locked.
3308 */
3309
3310 struct vmspace *
3311 uvmspace_fork(struct vmspace *vm1)
3312 {
3313 struct vmspace *vm2;
3314 struct vm_map *old_map = &vm1->vm_map;
3315 struct vm_map *new_map;
3316 struct vm_map_entry *old_entry;
3317 struct vm_map_entry *new_entry;
3318 pmap_t new_pmap;
3319 boolean_t protect_child;
3320 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
3321
3322 vm_map_lock(old_map);
3323
3324 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3325 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE);
3326 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3327 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3328 new_map = &vm2->vm_map; /* XXX */
3329 new_pmap = new_map->pmap;
3330
3331 old_entry = old_map->header.next;
3332
3333 /*
3334 * go entry-by-entry
3335 */
3336
3337 while (old_entry != &old_map->header) {
3338
3339 /*
3340 * first, some sanity checks on the old entry
3341 */
3342 if (UVM_ET_ISSUBMAP(old_entry))
3343 panic("fork: encountered a submap during fork (illegal)");
3344
3345 if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3346 UVM_ET_ISNEEDSCOPY(old_entry))
3347 panic("fork: non-copy_on_write map entry marked needs_copy (illegal)");
3348
3349
3350 switch (old_entry->inheritance) {
3351 case MAP_INHERIT_NONE:
3352 /*
3353 * drop the mapping
3354 */
3355 break;
3356
3357 case MAP_INHERIT_SHARE:
3358 /*
3359 * share the mapping: this means we want the old and
3360 * new entries to share amaps and backing objects.
3361 */
3362
3363 /*
3364 * if the old_entry needs a new amap (due to prev fork)
3365 * then we need to allocate it now so that we have
3366 * something we own to share with the new_entry. [in
3367 * other words, we need to clear needs_copy]
3368 */
3369
3370 if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3371 /* get our own amap, clears needs_copy */
3372 amap_copy(old_map, old_entry, M_WAITOK, FALSE,
3373 0, 0);
3374 /* XXXCDC: WAITOK??? */
3375 }
3376
3377 new_entry = uvm_mapent_alloc(new_map);
3378 /* old_entry -> new_entry */
3379 uvm_mapent_copy(old_entry, new_entry);
3380
3381 /* new pmap has nothing wired in it */
3382 new_entry->wired_count = 0;
3383
3384 /*
3385 * gain reference to object backing the map (can't
3386 * be a submap, already checked this case).
3387 */
3388 if (new_entry->aref.ar_amap)
3389 /* share reference */
3390 uvm_map_reference_amap(new_entry, AMAP_SHARED);
3391
3392 if (new_entry->object.uvm_obj &&
3393 new_entry->object.uvm_obj->pgops->pgo_reference)
3394 new_entry->object.uvm_obj->
3395 pgops->pgo_reference(
3396 new_entry->object.uvm_obj);
3397
3398 /* insert entry at end of new_map's entry list */
3399 uvm_map_entry_link(new_map, new_map->header.prev,
3400 new_entry);
3401
3402 /*
3403 * pmap_copy the mappings: this routine is optional
3404 * but if it is there it will reduce the number of
3405 * page faults in the new proc.
3406 */
3407
3408 pmap_copy(new_pmap, old_map->pmap, new_entry->start,
3409 (old_entry->end - old_entry->start),
3410 old_entry->start);
3411
3412 break;
3413
3414 case MAP_INHERIT_COPY:
3415
3416 /*
3417 * copy-on-write the mapping (using mmap's
3418 * MAP_PRIVATE semantics)
3419 *
3420 * allocate new_entry, adjust reference counts.
3421 * (note that new references are read-only).
3422 */
3423
3424 new_entry = uvm_mapent_alloc(new_map);
3425 /* old_entry -> new_entry */
3426 uvm_mapent_copy(old_entry, new_entry);
3427
3428 if (new_entry->aref.ar_amap)
3429 uvm_map_reference_amap(new_entry, 0);
3430
3431 if (new_entry->object.uvm_obj &&
3432 new_entry->object.uvm_obj->pgops->pgo_reference)
3433 new_entry->object.uvm_obj->pgops->pgo_reference
3434 (new_entry->object.uvm_obj);
3435
3436 /* new pmap has nothing wired in it */
3437 new_entry->wired_count = 0;
3438
3439 new_entry->etype |=
3440 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3441 uvm_map_entry_link(new_map, new_map->header.prev,
3442 new_entry);
3443
3444 /*
3445 * the new entry will need an amap. it will either
3446 * need to be copied from the old entry or created
3447 * from scratch (if the old entry does not have an
3448 * amap). can we defer this process until later
3449 * (by setting "needs_copy") or do we need to copy
3450 * the amap now?
3451 *
3452 * we must copy the amap now if any of the following
3453 * conditions hold:
3454 * 1. the old entry has an amap and that amap is
3455 * being shared. this means that the old (parent)
3456 * process is sharing the amap with another
3457 * process. if we do not clear needs_copy here
3458 * we will end up in a situation where both the
3459 * parent and child process are referring to the
3460 * same amap with "needs_copy" set. if the
3461 * parent write-faults, the fault routine will
3462 * clear "needs_copy" in the parent by allocating
3463 * a new amap. this is wrong because the
3464 * parent is supposed to be sharing the old amap
3465 * and the new amap will break that.
3466 *
3467 * 2. if the old entry has an amap and a non-zero
3468 * wire count then we are going to have to call
3469 * amap_cow_now to avoid page faults in the
3470 * parent process. since amap_cow_now requires
3471 * "needs_copy" to be clear we might as well
3472 * clear it here as well.
3473 *
3474 */
3475
3476 if (old_entry->aref.ar_amap != NULL) {
3477
3478 if ((amap_flags(old_entry->aref.ar_amap) &
3479 AMAP_SHARED) != 0 ||
3480 VM_MAPENT_ISWIRED(old_entry)) {
3481
3482 amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3483 0, 0);
3484 /* XXXCDC: M_WAITOK ... ok? */
3485 }
3486 }
3487
3488 /*
3489 * if the parent's entry is wired down, then the
3490 * parent process does not want page faults on
3491 * access to that memory. this means that we
3492 * cannot do copy-on-write because we can't write
3493 * protect the old entry. in this case we
3494 * resolve all copy-on-write faults now, using
3495 * amap_cow_now. note that we have already
3496 * allocated any needed amap (above).
3497 */
3498
3499 if (VM_MAPENT_ISWIRED(old_entry)) {
3500
3501 /*
3502 * resolve all copy-on-write faults now
3503 * (note that there is nothing to do if
3504 * the old mapping does not have an amap).
3505 * XXX: is it worthwhile to bother with pmap_copy
3506 * in this case?
3507 */
3508 if (old_entry->aref.ar_amap)
3509 amap_cow_now(new_map, new_entry);
3510
3511 } else {
3512
3513 /*
3514 * setup mappings to trigger copy-on-write faults
3515 * we must write-protect the parent if it has
3516 * an amap and it is not already "needs_copy"...
3517 * if it is already "needs_copy" then the parent
3518 * has already been write-protected by a previous
3519 * fork operation.
3520 *
3521 * if we do not write-protect the parent, then
3522 * we must be sure to write-protect the child
3523 * after the pmap_copy() operation.
3524 *
3525 * XXX: pmap_copy should have some way of telling
3526 * us that it didn't do anything so we can avoid
3527 * calling pmap_protect needlessly.
3528 */
3529
3530 if (old_entry->aref.ar_amap) {
3531
3532 if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3533 if (old_entry->max_protection & VM_PROT_WRITE) {
3534 pmap_protect(old_map->pmap,
3535 old_entry->start,
3536 old_entry->end,
3537 old_entry->protection &
3538 ~VM_PROT_WRITE);
3539 pmap_update(old_map->pmap);
3540
3541 }
3542 old_entry->etype |= UVM_ET_NEEDSCOPY;
3543 }
3544
3545 /*
3546 * parent must now be write-protected
3547 */
3548 protect_child = FALSE;
3549 } else {
3550
3551 /*
3552 * we only need to protect the child if the
3553 * parent has write access.
3554 */
3555 if (old_entry->max_protection & VM_PROT_WRITE)
3556 protect_child = TRUE;
3557 else
3558 protect_child = FALSE;
3559
3560 }
3561
3562 /*
3563 * copy the mappings
3564 * XXX: need a way to tell if this does anything
3565 */
3566
3567 pmap_copy(new_pmap, old_map->pmap,
3568 new_entry->start,
3569 (old_entry->end - old_entry->start),
3570 old_entry->start);
3571
3572 /*
3573 * protect the child's mappings if necessary
3574 */
3575 if (protect_child) {
3576 pmap_protect(new_pmap, new_entry->start,
3577 new_entry->end,
3578 new_entry->protection &
3579 ~VM_PROT_WRITE);
3580 }
3581
3582 }
3583 break;
3584 } /* end of switch statement */
3585 old_entry = old_entry->next;
3586 }
3587
3588 new_map->size = old_map->size;
3589 vm_map_unlock(old_map);
3590
3591 #ifdef SYSVSHM
3592 if (vm1->vm_shm)
3593 shmfork(vm1, vm2);
3594 #endif
3595
3596 #ifdef PMAP_FORK
3597 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
3598 #endif
3599
3600 UVMHIST_LOG(maphist,"<- done",0,0,0,0);
3601 return(vm2);
3602 }
3603
3604 #if defined(DDB)
3605
3606 /*
3607 * DDB hooks
3608 */
3609
3610 /*
3611 * uvm_map_printit: actually prints the map
3612 */
3613
3614 void
3615 uvm_map_printit(struct vm_map *map, boolean_t full,
3616 int (*pr)(const char *, ...))
3617 {
3618 struct vm_map_entry *entry;
3619
3620 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
3621 (*pr)("\t#ent=%d, sz=%u, ref=%d, version=%u, flags=0x%x\n",
3622 map->nentries, map->size, map->ref_count, map->timestamp,
3623 map->flags);
3624 #ifdef pmap_resident_count
3625 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
3626 pmap_resident_count(map->pmap));
3627 #else
3628 /* XXXCDC: this should be required ... */
3629 (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);
3630 #endif
3631 if (!full)
3632 return;
3633 for (entry = map->header.next; entry != &map->header;
3634 entry = entry->next) {
3635 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
3636 entry, entry->start, entry->end, entry->object.uvm_obj,
3637 (long long)entry->offset, entry->aref.ar_amap,
3638 entry->aref.ar_pageoff);
3639 (*pr)(
3640 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
3641 "wc=%d, adv=%d\n",
3642 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
3643 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
3644 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
3645 entry->protection, entry->max_protection,
3646 entry->inheritance, entry->wired_count, entry->advice);
3647 }
3648 }
3649
3650 /*
3651 * uvm_object_printit: actually prints the object
3652 */
3653
3654 void
3655 uvm_object_printit(uobj, full, pr)
3656 struct uvm_object *uobj;
3657 boolean_t full;
3658 int (*pr)(const char *, ...);
3659 {
3660 struct vm_page *pg;
3661 int cnt = 0;
3662
3663 (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ",
3664 uobj, uobj->vmobjlock.lock_data, uobj->pgops, uobj->uo_npages);
3665 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3666 (*pr)("refs=<SYSTEM>\n");
3667 else
3668 (*pr)("refs=%d\n", uobj->uo_refs);
3669
3670 if (!full) {
3671 return;
3672 }
3673 (*pr)(" PAGES <pg,offset>:\n ");
3674 for (pg = TAILQ_FIRST(&uobj->memq);
3675 pg != NULL;
3676 pg = TAILQ_NEXT(pg, listq), cnt++) {
3677 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3678 if ((cnt % 3) == 2) {
3679 (*pr)("\n ");
3680 }
3681 }
3682 if ((cnt % 3) != 2) {
3683 (*pr)("\n");
3684 }
3685 }
3686
3687 /*
3688 * uvm_page_printit: actually print the page
3689 */
3690
3691 static const char page_flagbits[] =
3692 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3693 "\11ZERO\15PAGER1";
3694 static const char page_pqflagbits[] =
3695 "\20\1FREE\2INACTIVE\3ACTIVE\4LAUNDRY\5ANON\6AOBJ";
3696
3697 void
3698 uvm_page_printit(pg, full, pr)
3699 struct vm_page *pg;
3700 boolean_t full;
3701 int (*pr)(const char *, ...);
3702 {
3703 struct vm_page *tpg;
3704 struct uvm_object *uobj;
3705 struct pglist *pgl;
3706 char pgbuf[128];
3707 char pqbuf[128];
3708
3709 (*pr)("PAGE %p:\n", pg);
3710 snprintf(pgbuf, sizeof(pgbuf), "%b", pg->pg_flags, page_flagbits);
3711 snprintf(pqbuf, sizeof(pqbuf), "%b", pg->pg_flags, page_pqflagbits);
3712 (*pr)(" flags=%s, pg_flags=%s, vers=%d, wire_count=%d, pa=0x%llx\n",
3713 pgbuf, pqbuf, pg->pg_version, pg->wire_count,
3714 (long long)pg->phys_addr);
3715 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
3716 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
3717 #if defined(UVM_PAGE_TRKOWN)
3718 if (pg->pg_flags & PG_BUSY)
3719 (*pr)(" owning process = %d, tag=%s\n",
3720 pg->owner, pg->owner_tag);
3721 else
3722 (*pr)(" page not busy, no owner\n");
3723 #else
3724 (*pr)(" [page ownership tracking disabled]\n");
3725 #endif
3726
3727 if (!full)
3728 return;
3729
3730 /* cross-verify object/anon */
3731 if ((pg->pg_flags & PQ_FREE) == 0) {
3732 if (pg->pg_flags & PQ_ANON) {
3733 if (pg->uanon == NULL || pg->uanon->an_page != pg)
3734 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3735 (pg->uanon) ? pg->uanon->an_page : NULL);
3736 else
3737 (*pr)(" anon backpointer is OK\n");
3738 } else {
3739 uobj = pg->uobject;
3740 if (uobj) {
3741 (*pr)(" checking object list\n");
3742 TAILQ_FOREACH(tpg, &uobj->memq, listq) {
3743 if (tpg == pg) {
3744 break;
3745 }
3746 }
3747 if (tpg)
3748 (*pr)(" page found on object list\n");
3749 else
3750 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
3751 }
3752 }
3753 }
3754
3755 /* cross-verify page queue */
3756 if (pg->pg_flags & PQ_FREE) {
3757 int fl = uvm_page_lookup_freelist(pg);
3758 pgl = &uvm.page_free[fl].pgfl_queues[((pg)->pg_flags & PG_ZERO) ?
3759 PGFL_ZEROS : PGFL_UNKNOWN];
3760 } else if (pg->pg_flags & PQ_INACTIVE) {
3761 pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
3762 &uvm.page_inactive_swp : &uvm.page_inactive_obj;
3763 } else if (pg->pg_flags & PQ_ACTIVE) {
3764 pgl = &uvm.page_active;
3765 } else {
3766 pgl = NULL;
3767 }
3768
3769 if (pgl) {
3770 (*pr)(" checking pageq list\n");
3771 TAILQ_FOREACH(tpg, pgl, pageq) {
3772 if (tpg == pg) {
3773 break;
3774 }
3775 }
3776 if (tpg)
3777 (*pr)(" page found on pageq list\n");
3778 else
3779 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3780 }
3781 }
3782 #endif