1 /* $OpenBSD: pmap.c,v 1.119 2007/06/27 16:16:53 art Exp $ */
2 /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
3
4 /*
5 *
6 * Copyright (c) 1997 Charles D. Cranor and Washington University.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by Charles D. Cranor and
20 * Washington University.
21 * 4. The name of the author may not be used to endorse or promote products
22 * derived from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /*
37 * pmap.c: i386 pmap module rewrite
38 * Chuck Cranor <chuck@ccrc.wustl.edu>
39 * 11-Aug-97
40 *
41 * history of this pmap module: in addition to my own input, i used
42 * the following references for this rewrite of the i386 pmap:
43 *
44 * [1] the NetBSD i386 pmap. this pmap appears to be based on the
45 * BSD hp300 pmap done by Mike Hibler at University of Utah.
46 * it was then ported to the i386 by William Jolitz of UUNET
47 * Technologies, Inc. Then Charles M. Hannum of the NetBSD
48 * project fixed some bugs and provided some speed ups.
49 *
50 * [2] the FreeBSD i386 pmap. this pmap seems to be the
51 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
52 * and David Greenman.
53 *
54 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated
55 * between several processors. the VAX version was done by
56 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386
57 * version was done by Lance Berc, Mike Kupfer, Bob Baron,
58 * David Golub, and Richard Draves. the alpha version was
59 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou
60 * (NetBSD/alpha).
61 */
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/proc.h>
66 #include <sys/malloc.h>
67 #include <sys/pool.h>
68 #include <sys/user.h>
69 #include <sys/kernel.h>
70 #include <sys/mutex.h>
71
72 #include <uvm/uvm.h>
73
74 #include <machine/atomic.h>
75 #include <machine/cpu.h>
76 #include <machine/specialreg.h>
77 #include <machine/gdt.h>
78
79 #include <dev/isa/isareg.h>
80 #include <sys/msgbuf.h>
81 #include <stand/boot/bootarg.h>
82
83 /*
84 * general info:
85 *
86 * - for an explanation of how the i386 MMU hardware works see
87 * the comments in <machine/pte.h>.
88 *
89 * - for an explanation of the general memory structure used by
90 * this pmap (including the recursive mapping), see the comments
91 * in <machine/pmap.h>.
92 *
93 * this file contains the code for the "pmap module." the module's
94 * job is to manage the hardware's virtual to physical address mappings.
95 * note that there are two levels of mapping in the VM system:
96 *
97 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
98 * to map ranges of virtual address space to objects/files. for
99 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
100 * to the file /bin/ls starting at offset zero." note that
101 * the upper layer mapping is not concerned with how individual
102 * vm_pages are mapped.
103 *
104 * [2] the lower layer of the VM system (the pmap) maintains the mappings
105 * from virtual addresses. it is concerned with which vm_page is
106 * mapped where. for example, when you run /bin/ls and start
107 * at page 0x1000 the fault routine may lookup the correct page
108 * of the /bin/ls file and then ask the pmap layer to establish
109 * a mapping for it.
110 *
111 * note that information in the lower layer of the VM system can be
112 * thrown away since it can easily be reconstructed from the info
113 * in the upper layer.
114 *
115 * data structures we use include:
116 *
117 * - struct pmap: describes the address space of one thread
118 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
119 * - struct pv_head: there is one pv_head per managed page of
120 * physical memory. the pv_head points to a list of pv_entry
121 * structures which describe all the <PMAP,VA> pairs that this
122 * page is mapped in. this is critical for page based operations
123 * such as pmap_page_protect() [change protection on _all_ mappings
124 * of a page]
125 * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
126 * if we run out of pv_entry's we allocate a new pv_page and free
127 * its pv_entrys.
128 */
129 /*
130 * memory allocation
131 *
132 * - there are three data structures that we must dynamically allocate:
133 *
134 * [A] new process' page directory page (PDP)
135 * - plan 1: done at pmap_create() we use
136 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
137 * allocation.
138 *
139 * if we are low in free physical memory then we sleep in
140 * uvm_km_alloc -- in this case this is ok since we are creating
141 * a new pmap and should not be holding any locks.
142 *
143 * if the kernel is totally out of virtual space
144 * (i.e. uvm_km_alloc returns NULL), then we panic.
145 *
146 * XXX: the fork code currently has no way to return an "out of
147 * memory, try again" error code since uvm_fork [fka vm_fork]
148 * is a void function.
149 *
150 * [B] new page tables pages (PTP)
151 * call uvm_pagealloc()
152 * => success: zero page, add to pm_pdir
153 * => failure: we are out of free vm_pages, let pmap_enter()
154 * tell UVM about it.
155 *
156 * note: for kernel PTPs, we start with NKPTP of them. as we map
157 * kernel memory (at uvm_map time) we check to see if we've grown
158 * the kernel pmap. if so, we call the optional function
159 * pmap_growkernel() to grow the kernel PTPs in advance.
160 *
161 * [C] pv_entry structures
162 * - plan 1: try to allocate one off the free list
163 * => success: done!
164 * => failure: no more free pv_entrys on the list
165 * - plan 2: try to allocate a new pv_page to add a chunk of
166 * pv_entrys to the free list
167 * [a] obtain a free, unmapped, VA in kmem_map. either
168 * we have one saved from a previous call, or we allocate
169 * one now using a "vm_map_lock_try" in uvm_map
170 * => success: we have an unmapped VA, continue to [b]
171 * => failure: unable to lock kmem_map or out of VA in it.
172 * move on to plan 3.
173 * [b] allocate a page for the VA
174 * => success: map it in, free the pv_entry's, DONE!
175 * => failure: no free vm_pages, etc.
176 * save VA for later call to [a], go to plan 3.
177 * If we fail, we simply let pmap_enter() tell UVM about it.
178 */
179 /*
180 * locking
181 *
182 * we have the following locks that we must contend with:
183 *
184 * "simple" locks:
185 *
186 * - pmap lock (per pmap, part of uvm_object)
187 * this lock protects the fields in the pmap structure including
188 * the non-kernel PDEs in the PDP, and the PTEs. it also locks
189 * in the alternate PTE space (since that is determined by the
190 * entry in the PDP).
191 *
192 * - pvalloc_lock
193 * this lock protects the data structures which are used to manage
194 * the free list of pv_entry structures.
195 *
196 * - pmaps_lock
197 * this lock protects the list of active pmaps (headed by "pmaps").
198 * we lock it when adding or removing pmaps from this list.
199 *
200 */
201
202 /*
203 * locking data structures
204 */
205
206 struct simplelock pvalloc_lock;
207 struct simplelock pmaps_lock;
208
209 #define PMAP_MAP_TO_HEAD_LOCK() /* null */
210 #define PMAP_MAP_TO_HEAD_UNLOCK() /* null */
211
212 #define PMAP_HEAD_TO_MAP_LOCK() /* null */
213 #define PMAP_HEAD_TO_MAP_UNLOCK() /* null */
214
215 /*
216 * global data structures
217 */
218
219 struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
220
221 /*
222 * nkpde is the number of kernel PTPs allocated for the kernel at
223 * boot time (NKPTP is a compile time override). this number can
224 * grow dynamically as needed (but once allocated, we never free
225 * kernel PTPs).
226 */
227
228 int nkpde = NKPTP;
229 #ifdef NKPDE
230 #error "obsolete NKPDE: use NKPTP"
231 #endif
232
233 /*
234 * pmap_pg_g: if our processor supports PG_G in the PTE then we
235 * set pmap_pg_g to PG_G (otherwise it is zero).
236 */
237
238 int pmap_pg_g = 0;
239
240 /*
241 * i386 physical memory comes in a big contig chunk with a small
242 * hole toward the front of it... the following 4 paddr_t's
243 * (shared with machdep.c) describe the physical address space
244 * of this machine.
245 */
246 paddr_t avail_start; /* PA of first available physical page */
247 paddr_t hole_start; /* PA of start of "hole" */
248 paddr_t hole_end; /* PA of end of "hole" */
249
250 /*
251 * other data structures
252 */
253
254 static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */
255 static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */
256
257 /*
258 * the following two vaddr_t's are used during system startup
259 * to keep track of how much of the kernel's VM space we have used.
260 * once the system is started, the management of the remaining kernel
261 * VM space is turned over to the kernel_map vm_map.
262 */
263
264 static vaddr_t virtual_avail; /* VA of first free KVA */
265 static vaddr_t virtual_end; /* VA of last free KVA */
266
267 /*
268 * pv_page management structures: locked by pvalloc_lock
269 */
270
271 TAILQ_HEAD(pv_pagelist, pv_page);
272 static struct pv_pagelist pv_freepages; /* list of pv_pages with free entries */
273 static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */
274 static int pv_nfpvents; /* # of free pv entries */
275 static struct pv_page *pv_initpage; /* bootstrap page from kernel_map */
276 static vaddr_t pv_cachedva; /* cached VA for later use */
277
278 #define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */
279 #define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2))
280 /* high water mark */
281
282 /*
283 * linked list of all non-kernel pmaps
284 */
285
286 struct pmap_head pmaps;
287
288 /*
289 * pool that pmap structures are allocated from
290 */
291
292 struct pool pmap_pmap_pool;
293
294 /*
295 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
296 * I386_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing
297 * due to false sharing.
298 */
299
300 #ifdef MULTIPROCESSOR
301 #define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
302 #define VASLEW(va,id) ((va)+(id)*NPTECL*NBPG)
303 #else
304 #define PTESLEW(pte, id) (pte)
305 #define VASLEW(va,id) (va)
306 #endif
307
308 /*
309 * special VAs and the PTEs that map them
310 */
311
312 static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
313 static caddr_t csrcp, cdstp, zerop, ptpp;
314 caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */
315
316 #if defined(I586_CPU)
317 /* stuff to fix the pentium f00f bug */
318 extern vaddr_t pentium_idt_vaddr;
319 #endif
320
321
322 /*
323 * local prototypes
324 */
325
326 struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t);
327 struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t, pt_entry_t);
328 struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */
329 #define ALLOCPV_NEED 0 /* need PV now */
330 #define ALLOCPV_TRY 1 /* just try to allocate */
331 #define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */
332 struct pv_entry *pmap_alloc_pvpage(struct pmap *, int);
333 void pmap_enter_pv(struct vm_page *, struct pv_entry *,
334 struct pmap *, vaddr_t, struct vm_page *);
335 void pmap_free_pv(struct pmap *, struct pv_entry *);
336 void pmap_free_pvs(struct pmap *, struct pv_entry *);
337 void pmap_free_pv_doit(struct pv_entry *);
338 void pmap_free_pvpage(void);
339 struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t);
340 boolean_t pmap_is_curpmap(struct pmap *);
341 boolean_t pmap_is_active(struct pmap *, int);
342 void pmap_sync_flags_pte(struct vm_page *, u_long);
343 pt_entry_t *pmap_map_ptes(struct pmap *);
344 struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t);
345 void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
346 boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
347 vaddr_t, int);
348 void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t,
349 vaddr_t, vaddr_t, int);
350
351 #define PMAP_REMOVE_ALL 0
352 #define PMAP_REMOVE_SKIPWIRED 1
353
354 vaddr_t pmap_tmpmap_pa(paddr_t);
355 pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *);
356 void pmap_tmpunmap_pa(void);
357 void pmap_tmpunmap_pvepte(struct pv_entry *);
358 void pmap_apte_flush(struct pmap *);
359 void pmap_unmap_ptes(struct pmap *);
360 void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t,
361 pt_entry_t);
362
363 void pmap_pinit(pmap_t);
364 void pmap_release(pmap_t);
365
366 void pmap_zero_phys(paddr_t);
367
368 void setcslimit(struct pmap *, struct trapframe *, struct pcb *, vaddr_t);
369
370 /*
371 * p m a p i n l i n e h e l p e r f u n c t i o n s
372 */
373
374 /*
375 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
376 * of course the kernel is always loaded
377 */
378
379 boolean_t
380 pmap_is_curpmap(pmap)
381 struct pmap *pmap;
382 {
383 return((pmap == pmap_kernel()) ||
384 (pmap->pm_pdirpa == (paddr_t) rcr3()));
385 }
386
387 /*
388 * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
389 */
390
391 boolean_t
392 pmap_is_active(pmap, cpu_id)
393 struct pmap *pmap;
394 int cpu_id;
395 {
396
397 return (pmap == pmap_kernel() ||
398 (pmap->pm_cpus & (1U << cpu_id)) != 0);
399 }
400
401 static __inline u_int
402 pmap_pte2flags(u_long pte)
403 {
404 return (((pte & PG_U) ? PG_PMAP_REF : 0) |
405 ((pte & PG_M) ? PG_PMAP_MOD : 0));
406 }
407
408 static __inline u_int
409 pmap_flags2pte(u_long pte)
410 {
411 return (((pte & PG_PMAP_REF) ? PG_U : 0) |
412 ((pte & PG_PMAP_MOD) ? PG_M : 0));
413 }
414
415 void
416 pmap_sync_flags_pte(struct vm_page *pg, u_long pte)
417 {
418 if (pte & (PG_U|PG_M)) {
419 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(pte));
420 }
421 }
422
423 /*
424 * pmap_tmpmap_pa: map a page in for tmp usage
425 */
426
427 vaddr_t
428 pmap_tmpmap_pa(paddr_t pa)
429 {
430 #ifdef MULTIPROCESSOR
431 int id = cpu_number();
432 #endif
433 pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
434 caddr_t ptpva = VASLEW(ptpp, id);
435 #if defined(DIAGNOSTIC)
436 if (*ptpte)
437 panic("pmap_tmpmap_pa: ptp_pte in use?");
438 #endif
439 *ptpte = PG_V | PG_RW | pa; /* always a new mapping */
440 return((vaddr_t)ptpva);
441 }
442
443 /*
444 * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
445 */
446
447 void
448 pmap_tmpunmap_pa()
449 {
450 #ifdef MULTIPROCESSOR
451 int id = cpu_number();
452 #endif
453 pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
454 caddr_t ptpva = VASLEW(ptpp, id);
455 #if defined(DIAGNOSTIC)
456 if (!pmap_valid_entry(*ptpte))
457 panic("pmap_tmpunmap_pa: our pte invalid?");
458 #endif
459 *ptpte = 0; /* zap! */
460 pmap_update_pg((vaddr_t)ptpva);
461 #ifdef MULTIPROCESSOR
462 /*
463 * No need for tlb shootdown here, since ptp_pte is per-CPU.
464 */
465 #endif
466 }
467
468 /*
469 * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
470 *
471 * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL]
472 */
473
474 pt_entry_t *
475 pmap_tmpmap_pvepte(struct pv_entry *pve)
476 {
477 #ifdef DIAGNOSTIC
478 if (pve->pv_pmap == pmap_kernel())
479 panic("pmap_tmpmap_pvepte: attempt to map kernel");
480 #endif
481
482 /* is it current pmap? use direct mapping... */
483 if (pmap_is_curpmap(pve->pv_pmap))
484 return(vtopte(pve->pv_va));
485
486 return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp)))
487 + ptei((unsigned)pve->pv_va));
488 }
489
490 /*
491 * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
492 */
493
494 void
495 pmap_tmpunmap_pvepte(struct pv_entry *pve)
496 {
497 /* was it current pmap? if so, return */
498 if (pmap_is_curpmap(pve->pv_pmap))
499 return;
500
501 pmap_tmpunmap_pa();
502 }
503
504 void
505 pmap_apte_flush(struct pmap *pmap)
506 {
507 pmap_tlb_shoottlb();
508 pmap_tlb_shootwait();
509 }
510
511 /*
512 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
513 *
514 * => we lock enough pmaps to keep things locked in
515 * => must be undone with pmap_unmap_ptes before returning
516 */
517
518 pt_entry_t *
519 pmap_map_ptes(struct pmap *pmap)
520 {
521 pd_entry_t opde;
522
523 /* the kernel's pmap is always accessible */
524 if (pmap == pmap_kernel()) {
525 return(PTE_BASE);
526 }
527
528 /* if curpmap then we are always mapped */
529 if (pmap_is_curpmap(pmap)) {
530 simple_lock(&pmap->pm_obj.vmobjlock);
531 return(PTE_BASE);
532 }
533
534 /* need to lock both curpmap and pmap: use ordered locking */
535 if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) {
536 simple_lock(&pmap->pm_obj.vmobjlock);
537 simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
538 } else {
539 simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
540 simple_lock(&pmap->pm_obj.vmobjlock);
541 }
542
543 /* need to load a new alternate pt space into curpmap? */
544 opde = *APDP_PDE;
545 if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
546 *APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V |
547 PG_U | PG_M);
548 if (pmap_valid_entry(opde))
549 pmap_apte_flush(curpcb->pcb_pmap);
550 }
551 return(APTE_BASE);
552 }
553
554 /*
555 * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
556 */
557
558 void
559 pmap_unmap_ptes(struct pmap *pmap)
560 {
561 if (pmap == pmap_kernel())
562 return;
563
564 if (pmap_is_curpmap(pmap)) {
565 simple_unlock(&pmap->pm_obj.vmobjlock);
566 } else {
567 #if defined(MULTIPROCESSOR)
568 *APDP_PDE = 0;
569 pmap_apte_flush(curpcb->pcb_pmap);
570 #endif
571 simple_unlock(&pmap->pm_obj.vmobjlock);
572 simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
573 }
574 }
575
576 void
577 pmap_exec_account(struct pmap *pm, vaddr_t va,
578 pt_entry_t opte, pt_entry_t npte)
579 {
580 if (pm == pmap_kernel())
581 return;
582
583 if (curproc == NULL || curproc->p_vmspace == NULL ||
584 pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
585 return;
586
587 if ((opte ^ npte) & PG_X)
588 pmap_tlb_shootpage(pm, va);
589
590 /*
591 * Executability was removed on the last executable change.
592 * Reset the code segment to something conservative and
593 * let the trap handler deal with setting the right limit.
594 * We can't do that because of locking constraints on the vm map.
595 *
596 * XXX - floating cs - set this _really_ low.
597 */
598 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
599 struct trapframe *tf = curproc->p_md.md_regs;
600 struct pcb *pcb = &curproc->p_addr->u_pcb;
601
602 pm->pm_hiexec = I386_MAX_EXE_ADDR;
603 setcslimit(pm, tf, pcb, I386_MAX_EXE_ADDR);
604 }
605 }
606
607 /*
608 * Fixup the code segment to cover all potential executable mappings.
609 * Called by kernel SEGV trap handler.
610 * returns 0 if no changes to the code segment were made.
611 */
612 int
613 pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
614 {
615 struct vm_map_entry *ent;
616 struct pmap *pm = vm_map_pmap(map);
617 vaddr_t va = 0;
618
619 vm_map_lock(map);
620 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
621 /*
622 * This entry has greater va than the entries before.
623 * We need to make it point to the last page, not past it.
624 */
625 if (ent->protection & VM_PROT_EXECUTE)
626 va = trunc_page(ent->end - 1);
627 }
628 vm_map_unlock(map);
629
630 if (va <= pm->pm_hiexec) {
631 return (0);
632 }
633
634 pm->pm_hiexec = va;
635
636 /*
637 * We have a new 'highest executable' va, so we need to update
638 * the value for the code segment limit, which is stored in the
639 * PCB.
640 */
641 setcslimit(pm, tf, pcb, va);
642
643 return (1);
644 }
645
646 void
647 setcslimit(struct pmap *pm, struct trapframe *tf, struct pcb *pcb,
648 vaddr_t limit)
649 {
650 /*
651 * Called when we have a new 'highest executable' va, so we need
652 * to update the value for the code segment limit, which is stored
653 * in the PCB.
654 *
655 * There are no caching issues to be concerned with: the
656 * processor reads the whole descriptor from the GDT when the
657 * appropriate selector is loaded into a segment register, and
658 * this only happens on the return to userland.
659 *
660 * This also works in the MP case, since whichever CPU gets to
661 * run the process will pick up the right descriptor value from
662 * the PCB.
663 */
664 limit = min(limit, VM_MAXUSER_ADDRESS - 1);
665
666 setsegment(&pm->pm_codeseg, 0, atop(limit),
667 SDT_MEMERA, SEL_UPL, 1, 1);
668
669 /* And update the GDT and LDT since we may be called by the
670 * trap handler (cpu_switch won't get a chance).
671 */
672 curcpu()->ci_gdt[GUCODE_SEL].sd = pcb->pcb_ldt[LUCODE_SEL].sd =
673 pm->pm_codeseg;
674
675 pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
676 }
677
678 /*
679 * p m a p k e n t e r f u n c t i o n s
680 *
681 * functions to quickly enter/remove pages from the kernel address
682 * space. pmap_kremove is exported to MI kernel. we make use of
683 * the recursive PTE mappings.
684 */
685
686 /*
687 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
688 *
689 * => no need to lock anything, assume va is already allocated
690 * => should be faster than normal pmap enter function
691 */
692
693 void
694 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
695 {
696 pt_entry_t *pte, opte, npte;
697
698 pte = vtopte(va);
699 npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V |
700 pmap_pg_g | PG_U | PG_M;
701 opte = i386_atomic_testset_ul(pte, npte); /* zap! */
702 if (pmap_valid_entry(opte)) {
703 /* NB. - this should not happen. */
704 pmap_tlb_shootpage(pmap_kernel(), va);
705 pmap_tlb_shootwait();
706 }
707 }
708
709 /*
710 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
711 *
712 * => no need to lock anything
713 * => caller must dispose of any vm_page mapped in the va range
714 * => note: not an inline function
715 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
716 */
717
718 void
719 pmap_kremove(vaddr_t sva, vsize_t len)
720 {
721 pt_entry_t *pte, opte;
722 vaddr_t va, eva;
723
724 eva = sva + len;
725
726 for (va = sva; va != eva; va += PAGE_SIZE) {
727 pte = kvtopte(va);
728 opte = i386_atomic_testset_ul(pte, 0);
729 #ifdef DIAGNOSTIC
730 if (opte & PG_PVLIST)
731 panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va);
732 #endif
733 }
734 pmap_tlb_shootrange(pmap_kernel(), sva, eva);
735 pmap_tlb_shootwait();
736 }
737
738 /*
739 * p m a p i n i t f u n c t i o n s
740 *
741 * pmap_bootstrap and pmap_init are called during system startup
742 * to init the pmap module. pmap_bootstrap() does a low level
743 * init just to get things rolling. pmap_init() finishes the job.
744 */
745
746 /*
747 * pmap_bootstrap: get the system in a state where it can run with VM
748 * properly enabled (called before main()). the VM system is
749 * fully init'd later...
750 *
751 * => on i386, locore.s has already enabled the MMU by allocating
752 * a PDP for the kernel, and nkpde PTP's for the kernel.
753 * => kva_start is the first free virtual address in kernel space
754 */
755
756 void
757 pmap_bootstrap(vaddr_t kva_start)
758 {
759 extern paddr_t avail_end;
760 struct pmap *kpm;
761 vaddr_t kva;
762 pt_entry_t *pte;
763
764 /*
765 * set the page size (default value is 4K which is ok)
766 */
767
768 uvm_setpagesize();
769
770 /*
771 * a quick sanity check
772 */
773
774 if (PAGE_SIZE != NBPG)
775 panic("pmap_bootstrap: PAGE_SIZE != NBPG");
776
777 /*
778 * use the very last page of physical memory for the message buffer
779 */
780
781 avail_end -= round_page(MSGBUFSIZE);
782 /*
783 * The arguments passed in from /boot needs space too.
784 */
785 avail_end -= round_page(bootargc);
786
787 /*
788 * set up our local static global vars that keep track of the
789 * usage of KVM before kernel_map is set up
790 */
791
792 virtual_avail = kva_start; /* first free KVA */
793 virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */
794
795 /*
796 * set up protection_codes: we need to be able to convert from
797 * a MI protection code (some combo of VM_PROT...) to something
798 * we can jam into a i386 PTE.
799 */
800
801 protection_codes[UVM_PROT_NONE] = 0; /* --- */
802 protection_codes[UVM_PROT_EXEC] = PG_X; /* --x */
803 protection_codes[UVM_PROT_READ] = PG_RO; /* -r- */
804 protection_codes[UVM_PROT_RX] = PG_X; /* -rx */
805 protection_codes[UVM_PROT_WRITE] = PG_RW; /* w-- */
806 protection_codes[UVM_PROT_WX] = PG_RW|PG_X; /* w-x */
807 protection_codes[UVM_PROT_RW] = PG_RW; /* wr- */
808 protection_codes[UVM_PROT_RWX] = PG_RW|PG_X; /* wrx */
809
810 /*
811 * now we init the kernel's pmap
812 *
813 * the kernel pmap's pm_obj is not used for much. however, in
814 * user pmaps the pm_obj contains the list of active PTPs.
815 * the pm_obj currently does not have a pager. it might be possible
816 * to add a pager that would allow a process to read-only mmap its
817 * own page tables (fast user level vtophys?). this may or may not
818 * be useful.
819 */
820
821 kpm = pmap_kernel();
822 simple_lock_init(&kpm->pm_obj.vmobjlock);
823 kpm->pm_obj.pgops = NULL;
824 TAILQ_INIT(&kpm->pm_obj.memq);
825 kpm->pm_obj.uo_npages = 0;
826 kpm->pm_obj.uo_refs = 1;
827 bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */
828 kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
829 kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3;
830 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
831 atop(kva_start - VM_MIN_KERNEL_ADDRESS);
832
833 /*
834 * the above is just a rough estimate and not critical to the proper
835 * operation of the system.
836 */
837
838 /*
839 * enable global TLB entries if they are supported
840 */
841
842 if (cpu_feature & CPUID_PGE) {
843 lcr4(rcr4() | CR4_PGE); /* enable hardware (via %cr4) */
844 pmap_pg_g = PG_G; /* enable software */
845
846 /* add PG_G attribute to already mapped kernel pages */
847 for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ;
848 kva += PAGE_SIZE)
849 if (pmap_valid_entry(PTE_BASE[atop(kva)]))
850 PTE_BASE[atop(kva)] |= PG_G;
851 }
852
853 /*
854 * now we allocate the "special" VAs which are used for tmp mappings
855 * by the pmap (and other modules). we allocate the VAs by advancing
856 * virtual_avail (note that there are no pages mapped at these VAs).
857 * we find the PTE that maps the allocated VA via the linear PTE
858 * mapping.
859 */
860
861 pte = PTE_BASE + atop(virtual_avail);
862
863 #ifdef MULTIPROCESSOR
864 /*
865 * Waste some VA space to avoid false sharing of cache lines
866 * for page table pages: Give each possible CPU a cache line
867 * of PTE's (8) to play with, though we only need 4. We could
868 * recycle some of this waste by putting the idle stacks here
869 * as well; we could waste less space if we knew the largest
870 * CPU ID beforehand.
871 */
872 csrcp = (caddr_t) virtual_avail; csrc_pte = pte;
873
874 cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1;
875
876 zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2;
877
878 ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3;
879
880 virtual_avail += PAGE_SIZE * I386_MAXPROCS * NPTECL;
881 pte += I386_MAXPROCS * NPTECL;
882 #else
883 csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */
884 virtual_avail += PAGE_SIZE; pte++; /* advance */
885
886 cdstp = (caddr_t) virtual_avail; cdst_pte = pte;
887 virtual_avail += PAGE_SIZE; pte++;
888
889 zerop = (caddr_t) virtual_avail; zero_pte = pte;
890 virtual_avail += PAGE_SIZE; pte++;
891
892 ptpp = (caddr_t) virtual_avail; ptp_pte = pte;
893 virtual_avail += PAGE_SIZE; pte++;
894 #endif
895
896 /* XXX: vmmap used by mem.c... should be uvm_map_reserve */
897 vmmap = (char *)virtual_avail; /* don't need pte */
898 virtual_avail += PAGE_SIZE;
899
900 msgbufp = (struct msgbuf *)virtual_avail; /* don't need pte */
901 virtual_avail += round_page(MSGBUFSIZE); pte++;
902
903 bootargp = (bootarg_t *)virtual_avail;
904 virtual_avail += round_page(bootargc); pte++;
905
906 /*
907 * now we reserve some VM for mapping pages when doing a crash dump
908 */
909
910 virtual_avail = reserve_dumppages(virtual_avail);
911
912 /*
913 * init the static-global locks and global lists.
914 */
915
916 simple_lock_init(&pvalloc_lock);
917 simple_lock_init(&pmaps_lock);
918 LIST_INIT(&pmaps);
919 TAILQ_INIT(&pv_freepages);
920 TAILQ_INIT(&pv_unusedpgs);
921
922 /*
923 * initialize the pmap pool.
924 */
925
926 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
927 &pool_allocator_nointr);
928
929 /*
930 * ensure the TLB is sync'd with reality by flushing it...
931 */
932
933 tlbflush();
934 }
935
936 /*
937 * pmap_init: called from uvm_init, our job is to get the pmap
938 * system ready to manage mappings... this mainly means initing
939 * the pv_entry stuff.
940 */
941
942 void
943 pmap_init(void)
944 {
945 /*
946 * now we need to free enough pv_entry structures to allow us to get
947 * the kmem_map allocated and inited (done after this function is
948 * finished). to do this we allocate one bootstrap page out of
949 * kernel_map and use it to provide an initial pool of pv_entry
950 * structures. we never free this page.
951 */
952
953 pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, PAGE_SIZE);
954 if (pv_initpage == NULL)
955 panic("pmap_init: pv_initpage");
956 pv_cachedva = 0; /* a VA we have allocated but not used yet */
957 pv_nfpvents = 0;
958 (void) pmap_add_pvpage(pv_initpage, FALSE);
959
960 /*
961 * done: pmap module is up (and ready for business)
962 */
963
964 pmap_initialized = TRUE;
965 }
966
967 /*
968 * p v _ e n t r y f u n c t i o n s
969 */
970
971 /*
972 * pv_entry allocation functions:
973 * the main pv_entry allocation functions are:
974 * pmap_alloc_pv: allocate a pv_entry structure
975 * pmap_free_pv: free one pv_entry
976 * pmap_free_pvs: free a list of pv_entrys
977 *
978 * the rest are helper functions
979 */
980
981 /*
982 * pmap_alloc_pv: inline function to allocate a pv_entry structure
983 * => we lock pvalloc_lock
984 * => if we fail, we call out to pmap_alloc_pvpage
985 * => 3 modes:
986 * ALLOCPV_NEED = we really need a pv_entry
987 * ALLOCPV_TRY = we want a pv_entry
988 * ALLOCPV_NONEED = we are trying to grow our free list, don't really need
989 * one now
990 *
991 * "try" is for optional functions like pmap_copy().
992 */
993
994 struct pv_entry *
995 pmap_alloc_pv(struct pmap *pmap, int mode)
996 {
997 struct pv_page *pvpage;
998 struct pv_entry *pv;
999
1000 simple_lock(&pvalloc_lock);
1001
1002 if (!TAILQ_EMPTY(&pv_freepages)) {
1003 pvpage = TAILQ_FIRST(&pv_freepages);
1004 pvpage->pvinfo.pvpi_nfree--;
1005 if (pvpage->pvinfo.pvpi_nfree == 0) {
1006 /* nothing left in this one? */
1007 TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list);
1008 }
1009 pv = pvpage->pvinfo.pvpi_pvfree;
1010 #ifdef DIAGNOSTIC
1011 if (pv == NULL)
1012 panic("pmap_alloc_pv: pvpi_nfree off");
1013 #endif
1014 pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
1015 pv_nfpvents--; /* took one from pool */
1016 } else {
1017 pv = NULL; /* need more of them */
1018 }
1019
1020 /*
1021 * if below low water mark or we didn't get a pv_entry we try and
1022 * create more pv_entrys ...
1023 */
1024
1025 if (pv_nfpvents < PVE_LOWAT || pv == NULL) {
1026 if (pv == NULL)
1027 pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ?
1028 mode : ALLOCPV_NEED);
1029 else
1030 (void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED);
1031 }
1032
1033 simple_unlock(&pvalloc_lock);
1034 return(pv);
1035 }
1036
1037 /*
1038 * pmap_alloc_pvpage: maybe allocate a new pvpage
1039 *
1040 * if need_entry is false: try and allocate a new pv_page
1041 * if need_entry is true: try and allocate a new pv_page and return a
1042 * new pv_entry from it.
1043 *
1044 * => we assume that the caller holds pvalloc_lock
1045 */
1046
1047 struct pv_entry *
1048 pmap_alloc_pvpage(struct pmap *pmap, int mode)
1049 {
1050 struct vm_page *pg;
1051 struct pv_page *pvpage;
1052 struct pv_entry *pv;
1053 int s;
1054
1055 /*
1056 * if we need_entry and we've got unused pv_pages, allocate from there
1057 */
1058
1059 if (mode != ALLOCPV_NONEED && !TAILQ_EMPTY(&pv_unusedpgs)) {
1060
1061 /* move it to pv_freepages list */
1062 pvpage = TAILQ_FIRST(&pv_unusedpgs);
1063 TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
1064 TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
1065
1066 /* allocate a pv_entry */
1067 pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */
1068 pv = pvpage->pvinfo.pvpi_pvfree;
1069 #ifdef DIAGNOSTIC
1070 if (pv == NULL)
1071 panic("pmap_alloc_pvpage: pvpi_nfree off");
1072 #endif
1073 pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
1074
1075 pv_nfpvents--; /* took one from pool */
1076 return(pv);
1077 }
1078
1079 /*
1080 * see if we've got a cached unmapped VA that we can map a page in.
1081 * if not, try to allocate one.
1082 */
1083
1084 s = splvm(); /* must protect kmem_map with splvm! */
1085 if (pv_cachedva == 0) {
1086 pv_cachedva = uvm_km_kmemalloc(kmem_map, NULL,
1087 NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC);
1088 }
1089 splx(s);
1090 if (pv_cachedva == 0)
1091 return (NULL);
1092
1093 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE);
1094 if (pg == NULL)
1095 return (NULL);
1096
1097 atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
1098
1099 /*
1100 * add a mapping for our new pv_page and free its entries (save one!)
1101 *
1102 * NOTE: If we are allocating a PV page for the kernel pmap, the
1103 * pmap is already locked! (...but entering the mapping is safe...)
1104 */
1105
1106 pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg),
1107 VM_PROT_READ|VM_PROT_WRITE);
1108 pvpage = (struct pv_page *) pv_cachedva;
1109 pv_cachedva = 0;
1110 return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
1111 }
1112
1113 /*
1114 * pmap_add_pvpage: add a pv_page's pv_entrys to the free list
1115 *
1116 * => caller must hold pvalloc_lock
1117 * => if need_entry is true, we allocate and return one pv_entry
1118 */
1119
1120 struct pv_entry *
1121 pmap_add_pvpage(struct pv_page *pvp, boolean_t need_entry)
1122 {
1123 int tofree, lcv;
1124
1125 /* do we need to return one? */
1126 tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE;
1127
1128 pvp->pvinfo.pvpi_pvfree = NULL;
1129 pvp->pvinfo.pvpi_nfree = tofree;
1130 for (lcv = 0 ; lcv < tofree ; lcv++) {
1131 pvp->pvents[lcv].pv_next = pvp->pvinfo.pvpi_pvfree;
1132 pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv];
1133 }
1134 if (need_entry)
1135 TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list);
1136 else
1137 TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1138 pv_nfpvents += tofree;
1139 return((need_entry) ? &pvp->pvents[lcv] : NULL);
1140 }
1141
1142 /*
1143 * pmap_free_pv_doit: actually free a pv_entry
1144 *
1145 * => do not call this directly! instead use either
1146 * 1. pmap_free_pv ==> free a single pv_entry
1147 * 2. pmap_free_pvs => free a list of pv_entrys
1148 * => we must be holding pvalloc_lock
1149 */
1150
1151 void
1152 pmap_free_pv_doit(struct pv_entry *pv)
1153 {
1154 struct pv_page *pvp;
1155
1156 pvp = (struct pv_page*)trunc_page((vaddr_t)pv);
1157 pv_nfpvents++;
1158 pvp->pvinfo.pvpi_nfree++;
1159
1160 /* nfree == 1 => fully allocated page just became partly allocated */
1161 if (pvp->pvinfo.pvpi_nfree == 1) {
1162 TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list);
1163 }
1164
1165 /* free it */
1166 pv->pv_next = pvp->pvinfo.pvpi_pvfree;
1167 pvp->pvinfo.pvpi_pvfree = pv;
1168
1169 /*
1170 * are all pv_page's pv_entry's free? move it to unused queue.
1171 */
1172
1173 if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) {
1174 TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list);
1175 TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1176 }
1177 }
1178
1179 /*
1180 * pmap_free_pv: free a single pv_entry
1181 *
1182 * => we gain the pvalloc_lock
1183 */
1184
1185 void
1186 pmap_free_pv(struct pmap *pmap, struct pv_entry *pv)
1187 {
1188 simple_lock(&pvalloc_lock);
1189 pmap_free_pv_doit(pv);
1190
1191 /*
1192 * Can't free the PV page if the PV entries were associated with
1193 * the kernel pmap; the pmap is already locked.
1194 */
1195 if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
1196 pmap != pmap_kernel())
1197 pmap_free_pvpage();
1198
1199 simple_unlock(&pvalloc_lock);
1200 }
1201
1202 /*
1203 * pmap_free_pvs: free a list of pv_entrys
1204 *
1205 * => we gain the pvalloc_lock
1206 */
1207
1208 void
1209 pmap_free_pvs(struct pmap *pmap, struct pv_entry *pvs)
1210 {
1211 struct pv_entry *nextpv;
1212
1213 simple_lock(&pvalloc_lock);
1214
1215 for ( /* null */ ; pvs != NULL ; pvs = nextpv) {
1216 nextpv = pvs->pv_next;
1217 pmap_free_pv_doit(pvs);
1218 }
1219
1220 /*
1221 * Can't free the PV page if the PV entries were associated with
1222 * the kernel pmap; the pmap is already locked.
1223 */
1224 if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
1225 pmap != pmap_kernel())
1226 pmap_free_pvpage();
1227
1228 simple_unlock(&pvalloc_lock);
1229 }
1230
1231
1232 /*
1233 * pmap_free_pvpage: try and free an unused pv_page structure
1234 *
1235 * => assume caller is holding the pvalloc_lock and that
1236 * there is a page on the pv_unusedpgs list
1237 * => if we can't get a lock on the kmem_map we try again later
1238 */
1239
1240 void
1241 pmap_free_pvpage(void)
1242 {
1243 int s;
1244 struct vm_map *map;
1245 struct vm_map_entry *dead_entries;
1246 struct pv_page *pvp;
1247
1248 s = splvm(); /* protect kmem_map */
1249 pvp = TAILQ_FIRST(&pv_unusedpgs);
1250
1251 /*
1252 * note: watch out for pv_initpage which is allocated out of
1253 * kernel_map rather than kmem_map.
1254 */
1255
1256 if (pvp == pv_initpage)
1257 map = kernel_map;
1258 else
1259 map = kmem_map;
1260 if (vm_map_lock_try(map)) {
1261
1262 /* remove pvp from pv_unusedpgs */
1263 TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1264
1265 /* unmap the page */
1266 dead_entries = NULL;
1267 uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE,
1268 &dead_entries, NULL);
1269 vm_map_unlock(map);
1270
1271 if (dead_entries != NULL)
1272 uvm_unmap_detach(dead_entries, 0);
1273
1274 pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */
1275 }
1276
1277 if (pvp == pv_initpage)
1278 /* no more initpage, we've freed it */
1279 pv_initpage = NULL;
1280
1281 splx(s);
1282 }
1283
1284 /*
1285 * main pv_entry manipulation functions:
1286 * pmap_enter_pv: enter a mapping onto a pv list
1287 * pmap_remove_pv: remove a mappiing from a pv list
1288 */
1289
1290 /*
1291 * pmap_enter_pv: enter a mapping onto a pv list
1292 *
1293 * => caller should have pmap locked
1294 * => we will gain the lock on the pv and allocate the new pv_entry
1295 * => caller should adjust ptp's wire_count before calling
1296 *
1297 * pve: preallocated pve for us to use
1298 * ptp: PTP in pmap that maps this VA
1299 */
1300
1301 void
1302 pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap,
1303 vaddr_t va, struct vm_page *ptp)
1304 {
1305 pve->pv_pmap = pmap;
1306 pve->pv_va = va;
1307 pve->pv_ptp = ptp; /* NULL for kernel pmap */
1308 pve->pv_next = pg->mdpage.pv_list; /* add to ... */
1309 pg->mdpage.pv_list = pve; /* ... locked list */
1310 }
1311
1312 /*
1313 * pmap_remove_pv: try to remove a mapping from a pv_list
1314 *
1315 * => pmap should be locked
1316 * => caller should hold lock on pv [so that attrs can be adjusted]
1317 * => caller should adjust ptp's wire_count and free PTP if needed
1318 * => we return the removed pve
1319 */
1320
1321 struct pv_entry *
1322 pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va)
1323 {
1324 struct pv_entry *pve, **prevptr;
1325
1326 prevptr = &pg->mdpage.pv_list; /* previous pv_entry pointer */
1327 while ((pve = *prevptr) != NULL) {
1328 if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */
1329 *prevptr = pve->pv_next; /* remove it! */
1330 break;
1331 }
1332 prevptr = &pve->pv_next; /* previous pointer */
1333 }
1334 return(pve); /* return removed pve */
1335 }
1336
1337 /*
1338 * p t p f u n c t i o n s
1339 */
1340
1341 /*
1342 * pmap_alloc_ptp: allocate a PTP for a PMAP
1343 *
1344 * => pmap should already be locked by caller
1345 * => we use the ptp's wire_count to count the number of active mappings
1346 * in the PTP (we start it at one to prevent any chance this PTP
1347 * will ever leak onto the active/inactive queues)
1348 * => we may need to lock pv lists if we have to steal a PTP
1349 * => just_try: true if we want a PTP, but not enough to steal one
1350 * from another pmap (e.g. during optional functions like pmap_copy)
1351 */
1352
1353 struct vm_page *
1354 pmap_alloc_ptp(struct pmap *pmap, int pde_index, boolean_t just_try,
1355 pt_entry_t pde_flags)
1356 {
1357 struct vm_page *ptp;
1358
1359 ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
1360 UVM_PGA_USERESERVE|UVM_PGA_ZERO);
1361 if (ptp == NULL)
1362 return (NULL);
1363
1364 /* got one! */
1365 atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
1366 ptp->wire_count = 1; /* no mappings yet */
1367 pmap->pm_pdir[pde_index] = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) |
1368 PG_RW | PG_V | PG_M | PG_U | pde_flags);
1369 pmap->pm_stats.resident_count++; /* count PTP as resident */
1370 pmap->pm_ptphint = ptp;
1371 return(ptp);
1372 }
1373
1374 /*
1375 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
1376 *
1377 * => pmap should NOT be pmap_kernel()
1378 * => pmap should be locked
1379 */
1380
1381 struct vm_page *
1382 pmap_get_ptp(struct pmap *pmap, int pde_index, boolean_t just_try)
1383 {
1384 struct vm_page *ptp;
1385
1386 if (pmap_valid_entry(pmap->pm_pdir[pde_index])) {
1387
1388 /* valid... check hint (saves us a PA->PG lookup) */
1389 if (pmap->pm_ptphint &&
1390 (pmap->pm_pdir[pde_index] & PG_FRAME) ==
1391 VM_PAGE_TO_PHYS(pmap->pm_ptphint))
1392 return(pmap->pm_ptphint);
1393
1394 ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
1395 #ifdef DIAGNOSTIC
1396 if (ptp == NULL)
1397 panic("pmap_get_ptp: unmanaged user PTP");
1398 #endif
1399 pmap->pm_ptphint = ptp;
1400 return(ptp);
1401 }
1402
1403 /* allocate a new PTP (updates ptphint) */
1404 return (pmap_alloc_ptp(pmap, pde_index, just_try, PG_u));
1405 }
1406
1407 /*
1408 * p m a p l i f e c y c l e f u n c t i o n s
1409 */
1410
1411 /*
1412 * pmap_create: create a pmap
1413 *
1414 * => note: old pmap interface took a "size" args which allowed for
1415 * the creation of "software only" pmaps (not in bsd).
1416 */
1417
1418 struct pmap *
1419 pmap_create(void)
1420 {
1421 struct pmap *pmap;
1422
1423 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
1424 pmap_pinit(pmap);
1425 return(pmap);
1426 }
1427
1428 /*
1429 * pmap_pinit: given a zero'd pmap structure, init it.
1430 */
1431
1432 void
1433 pmap_pinit(struct pmap *pmap)
1434 {
1435 /* init uvm_object */
1436 simple_lock_init(&pmap->pm_obj.vmobjlock);
1437 pmap->pm_obj.pgops = NULL; /* currently not a mappable object */
1438 TAILQ_INIT(&pmap->pm_obj.memq);
1439 pmap->pm_obj.uo_npages = 0;
1440 pmap->pm_obj.uo_refs = 1;
1441 pmap->pm_stats.wired_count = 0;
1442 pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */
1443 pmap->pm_ptphint = NULL;
1444 pmap->pm_hiexec = 0;
1445 pmap->pm_flags = 0;
1446 pmap->pm_cpus = 0;
1447
1448 setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1,
1449 SDT_MEMERA, SEL_UPL, 1, 1);
1450
1451 /* allocate PDP */
1452 pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG);
1453 if (pmap->pm_pdir == NULL)
1454 panic("pmap_pinit: kernel_map out of virtual space!");
1455 (void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir,
1456 (paddr_t *)&pmap->pm_pdirpa);
1457
1458 /* init PDP */
1459 /* zero init area */
1460 bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
1461 /* put in recursive PDE to map the PTEs */
1462 pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW | PG_U |
1463 PG_M;
1464
1465 /* init the LDT */
1466 pmap->pm_ldt = NULL;
1467 pmap->pm_ldt_len = 0;
1468 pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
1469
1470 /*
1471 * we need to lock pmaps_lock to prevent nkpde from changing on
1472 * us. note that there is no need to splvm to protect us from
1473 * malloc since malloc allocates out of a submap and we should have
1474 * already allocated kernel PTPs to cover the range...
1475 */
1476 simple_lock(&pmaps_lock);
1477 /* put in kernel VM PDEs */
1478 bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN],
1479 nkpde * sizeof(pd_entry_t));
1480 /* zero the rest */
1481 bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde],
1482 NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
1483 LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
1484 simple_unlock(&pmaps_lock);
1485 }
1486
1487 /*
1488 * pmap_destroy: drop reference count on pmap. free pmap if
1489 * reference count goes to zero.
1490 */
1491
1492 void
1493 pmap_destroy(struct pmap *pmap)
1494 {
1495 int refs;
1496
1497 /*
1498 * drop reference count
1499 */
1500
1501 simple_lock(&pmap->pm_obj.vmobjlock);
1502 refs = --pmap->pm_obj.uo_refs;
1503 simple_unlock(&pmap->pm_obj.vmobjlock);
1504 if (refs > 0)
1505 return;
1506
1507 /*
1508 * reference count is zero, free pmap resources and then free pmap.
1509 */
1510
1511 pmap_release(pmap);
1512 pool_put(&pmap_pmap_pool, pmap);
1513 }
1514
1515 /*
1516 * pmap_release: release all resources held by a pmap
1517 *
1518 * => if pmap is still referenced it should be locked
1519 * => XXX: we currently don't expect any busy PTPs because we don't
1520 * allow anything to map them (except for the kernel's private
1521 * recursive mapping) or make them busy.
1522 */
1523
1524 void
1525 pmap_release(struct pmap *pmap)
1526 {
1527 struct vm_page *pg;
1528
1529 /*
1530 * remove it from global list of pmaps
1531 */
1532
1533 simple_lock(&pmaps_lock);
1534 LIST_REMOVE(pmap, pm_list);
1535 simple_unlock(&pmaps_lock);
1536
1537 /*
1538 * Before we free the pmap just make sure it's not cached anywhere.
1539 */
1540 tlbflushg();
1541
1542 /*
1543 * free any remaining PTPs
1544 */
1545
1546 while (!TAILQ_EMPTY(&pmap->pm_obj.memq)) {
1547 pg = TAILQ_FIRST(&pmap->pm_obj.memq);
1548 #ifdef DIAGNOSTIC
1549 if (pg->pg_flags & PG_BUSY)
1550 panic("pmap_release: busy page table page");
1551 #endif
1552 /* pmap_page_protect? currently no need for it. */
1553
1554 pg->wire_count = 0;
1555 uvm_pagefree(pg);
1556 }
1557
1558 /*
1559 * MULTIPROCESSOR -- no need to flush out of other processors'
1560 * APTE space because we do that in pmap_unmap_ptes().
1561 */
1562 uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG);
1563
1564 #ifdef USER_LDT
1565 if (pmap->pm_flags & PMF_USER_LDT) {
1566 /*
1567 * no need to switch the LDT; this address space is gone,
1568 * nothing is using it.
1569 *
1570 * No need to lock the pmap for ldt_free (or anything else),
1571 * we're the last one to use it.
1572 */
1573 ldt_free(pmap);
1574 uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
1575 pmap->pm_ldt_len * sizeof(union descriptor));
1576 }
1577 #endif
1578 }
1579
1580 /*
1581 * Add a reference to the specified pmap.
1582 */
1583
1584 void
1585 pmap_reference(struct pmap *pmap)
1586 {
1587 simple_lock(&pmap->pm_obj.vmobjlock);
1588 pmap->pm_obj.uo_refs++;
1589 simple_unlock(&pmap->pm_obj.vmobjlock);
1590 }
1591
1592 #if defined(PMAP_FORK)
1593 /*
1594 * pmap_fork: perform any necessary data structure manipulation when
1595 * a VM space is forked.
1596 */
1597
1598 void
1599 pmap_fork(struct pmap *pmap1, struct pmap *pmap2)
1600 {
1601 simple_lock(&pmap1->pm_obj.vmobjlock);
1602 simple_lock(&pmap2->pm_obj.vmobjlock);
1603
1604 #ifdef USER_LDT
1605 /* Copy the LDT, if necessary. */
1606 if (pmap1->pm_flags & PMF_USER_LDT) {
1607 union descriptor *new_ldt;
1608 size_t len;
1609
1610 len = pmap1->pm_ldt_len * sizeof(union descriptor);
1611 new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len);
1612 bcopy(pmap1->pm_ldt, new_ldt, len);
1613 pmap2->pm_ldt = new_ldt;
1614 pmap2->pm_ldt_len = pmap1->pm_ldt_len;
1615 pmap2->pm_flags |= PMF_USER_LDT;
1616 ldt_alloc(pmap2, new_ldt, len);
1617 }
1618 #endif /* USER_LDT */
1619
1620 simple_unlock(&pmap2->pm_obj.vmobjlock);
1621 simple_unlock(&pmap1->pm_obj.vmobjlock);
1622 }
1623 #endif /* PMAP_FORK */
1624
1625 #ifdef USER_LDT
1626 /*
1627 * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
1628 * restore the default.
1629 */
1630
1631 void
1632 pmap_ldt_cleanup(struct proc *p)
1633 {
1634 struct pcb *pcb = &p->p_addr->u_pcb;
1635 pmap_t pmap = p->p_vmspace->vm_map.pmap;
1636 union descriptor *old_ldt = NULL;
1637 size_t len = 0;
1638
1639 simple_lock(&pmap->pm_obj.vmobjlock);
1640
1641 if (pmap->pm_flags & PMF_USER_LDT) {
1642 ldt_free(pmap);
1643 pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
1644 pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
1645 /* Reset the cached address of the LDT that this process uses */
1646 #ifdef MULTIPROCESSOR
1647 pcb->pcb_ldt = curcpu()->ci_ldt;
1648 #else
1649 pcb->pcb_ldt = ldt;
1650 #endif
1651 if (pcb == curpcb)
1652 lldt(pcb->pcb_ldt_sel);
1653 old_ldt = pmap->pm_ldt;
1654 len = pmap->pm_ldt_len * sizeof(union descriptor);
1655 pmap->pm_ldt = NULL;
1656 pmap->pm_ldt_len = 0;
1657 pmap->pm_flags &= ~PMF_USER_LDT;
1658 }
1659
1660 simple_unlock(&pmap->pm_obj.vmobjlock);
1661
1662 if (old_ldt != NULL)
1663 uvm_km_free(kernel_map, (vaddr_t)old_ldt, len);
1664 }
1665 #endif /* USER_LDT */
1666
1667 /*
1668 * pmap_activate: activate a process' pmap (fill in %cr3 and LDT info)
1669 *
1670 * => called from cpu_switch()
1671 * => if proc is the curproc, then load it into the MMU
1672 */
1673
1674 void
1675 pmap_activate(struct proc *p)
1676 {
1677 struct pcb *pcb = &p->p_addr->u_pcb;
1678 struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1679 struct cpu_info *self = curcpu();
1680
1681 pcb->pcb_pmap = pmap;
1682 /* Get the LDT that this process will actually use */
1683 #ifdef MULTIPROCESSOR
1684 pcb->pcb_ldt = pmap->pm_ldt == NULL ? self->ci_ldt : pmap->pm_ldt;
1685 #else
1686 pcb->pcb_ldt = pmap->pm_ldt == NULL ? ldt : pmap->pm_ldt;
1687 #endif
1688 pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
1689 pcb->pcb_cr3 = pmap->pm_pdirpa;
1690 if (p == curproc) {
1691 /*
1692 * Set the correct descriptor value (i.e. with the
1693 * correct code segment X limit) in the GDT and the LDT.
1694 */
1695 self->ci_gdt[GUCODE_SEL].sd = pcb->pcb_ldt[LUCODE_SEL].sd =
1696 pmap->pm_codeseg;
1697
1698 lcr3(pcb->pcb_cr3);
1699 lldt(pcb->pcb_ldt_sel);
1700
1701 /*
1702 * mark the pmap in use by this processor.
1703 */
1704 i386_atomic_setbits_l(&pmap->pm_cpus, (1U << cpu_number()));
1705 }
1706 }
1707
1708 /*
1709 * pmap_deactivate: deactivate a process' pmap
1710 */
1711
1712 void
1713 pmap_deactivate(struct proc *p)
1714 {
1715 struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1716
1717 /*
1718 * mark the pmap no longer in use by this processor.
1719 */
1720 i386_atomic_clearbits_l(&pmap->pm_cpus, (1U << cpu_number()));
1721 }
1722
1723 /*
1724 * end of lifecycle functions
1725 */
1726
1727 /*
1728 * some misc. functions
1729 */
1730
1731 /*
1732 * pmap_extract: extract a PA for the given VA
1733 */
1734
1735 boolean_t
1736 pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
1737 {
1738 pt_entry_t *ptes, pte;
1739
1740 if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
1741 ptes = pmap_map_ptes(pmap);
1742 pte = ptes[atop(va)];
1743 pmap_unmap_ptes(pmap);
1744 if (!pmap_valid_entry(pte))
1745 return (FALSE);
1746 if (pap != NULL)
1747 *pap = (pte & PG_FRAME) | (va & ~PG_FRAME);
1748 return (TRUE);
1749 }
1750 return (FALSE);
1751 }
1752
1753 /*
1754 * pmap_virtual_space: used during bootup [pmap_steal_memory] to
1755 * determine the bounds of the kernel virtual address space.
1756 */
1757
1758 void
1759 pmap_virtual_space(vaddr_t *startp, vaddr_t *endp)
1760 {
1761 *startp = virtual_avail;
1762 *endp = virtual_end;
1763 }
1764
1765 /*
1766 * pmap_zero_page: zero a page
1767 */
1768 void (*pagezero)(void *, size_t) = bzero;
1769
1770 void
1771 pmap_zero_page(struct vm_page *pg)
1772 {
1773 pmap_zero_phys(VM_PAGE_TO_PHYS(pg));
1774 }
1775
1776 /*
1777 * pmap_zero_phys: same as pmap_zero_page, but for use before vm_pages are
1778 * initialized.
1779 */
1780 void
1781 pmap_zero_phys(paddr_t pa)
1782 {
1783 #ifdef MULTIPROCESSOR
1784 int id = cpu_number();
1785 #endif
1786 pt_entry_t *zpte = PTESLEW(zero_pte, id);
1787 caddr_t zerova = VASLEW(zerop, id);
1788
1789 #ifdef DIAGNOSTIC
1790 if (*zpte)
1791 panic("pmap_zero_phys: lock botch");
1792 #endif
1793
1794 *zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */
1795 pmap_update_pg((vaddr_t)zerova); /* flush TLB */
1796 pagezero(zerova, PAGE_SIZE); /* zero */
1797 *zpte = 0; /* zap! */
1798 }
1799
1800 /*
1801 * pmap_zero_page_uncached: the same, except uncached.
1802 */
1803
1804 boolean_t
1805 pmap_zero_page_uncached(paddr_t pa)
1806 {
1807 #ifdef MULTIPROCESSOR
1808 int id = cpu_number();
1809 #endif
1810 pt_entry_t *zpte = PTESLEW(zero_pte, id);
1811 caddr_t zerova = VASLEW(zerop, id);
1812
1813 #ifdef DIAGNOSTIC
1814 if (*zpte)
1815 panic("pmap_zero_page_uncached: lock botch");
1816 #endif
1817
1818 *zpte = (pa & PG_FRAME) | PG_V | PG_RW | PG_N; /* map in */
1819 pmap_update_pg((vaddr_t)zerova); /* flush TLB */
1820 pagezero(zerova, PAGE_SIZE); /* zero */
1821 *zpte = 0; /* zap! */
1822
1823 return (TRUE);
1824 }
1825
1826 /*
1827 * pmap_copy_page: copy a page
1828 */
1829
1830 void
1831 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1832 {
1833 paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
1834 paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
1835 #ifdef MULTIPROCESSOR
1836 int id = cpu_number();
1837 #endif
1838 pt_entry_t *spte = PTESLEW(csrc_pte, id);
1839 pt_entry_t *dpte = PTESLEW(cdst_pte, id);
1840 caddr_t csrcva = VASLEW(csrcp, id);
1841 caddr_t cdstva = VASLEW(cdstp, id);
1842
1843 #ifdef DIAGNOSTIC
1844 if (*spte || *dpte)
1845 panic("pmap_copy_page: lock botch");
1846 #endif
1847
1848 *spte = (srcpa & PG_FRAME) | PG_V | PG_RW;
1849 *dpte = (dstpa & PG_FRAME) | PG_V | PG_RW;
1850 pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
1851 bcopy(csrcva, cdstva, PAGE_SIZE);
1852 *spte = *dpte = 0; /* zap! */
1853 pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
1854 }
1855
1856 /*
1857 * p m a p r e m o v e f u n c t i o n s
1858 *
1859 * functions that remove mappings
1860 */
1861
1862 /*
1863 * pmap_remove_ptes: remove PTEs from a PTP
1864 *
1865 * => must have proper locking on pmap_master_lock
1866 * => caller must hold pmap's lock
1867 * => PTP must be mapped into KVA
1868 * => PTP should be null if pmap == pmap_kernel()
1869 */
1870
1871 void
1872 pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
1873 vaddr_t startva, vaddr_t endva, int flags)
1874 {
1875 struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */
1876 struct pv_entry *pve;
1877 pt_entry_t *pte = (pt_entry_t *) ptpva;
1878 struct vm_page *pg;
1879 pt_entry_t opte;
1880
1881 /*
1882 * note that ptpva points to the PTE that maps startva. this may
1883 * or may not be the first PTE in the PTP.
1884 *
1885 * we loop through the PTP while there are still PTEs to look at
1886 * and the wire_count is greater than 1 (because we use the wire_count
1887 * to keep track of the number of real PTEs in the PTP).
1888 */
1889
1890 for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
1891 ; pte++, startva += NBPG) {
1892 if (!pmap_valid_entry(*pte))
1893 continue; /* VA not mapped */
1894
1895 if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W))
1896 continue;
1897
1898 /* atomically save the old PTE and zap! it */
1899 opte = i386_atomic_testset_ul(pte, 0);
1900
1901 if (opte & PG_W)
1902 pmap->pm_stats.wired_count--;
1903 pmap->pm_stats.resident_count--;
1904
1905 if (ptp)
1906 ptp->wire_count--; /* dropping a PTE */
1907
1908 /*
1909 * Unnecessary work if not PG_VLIST.
1910 */
1911 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
1912
1913 /*
1914 * if we are not on a pv list we are done.
1915 */
1916 if ((opte & PG_PVLIST) == 0) {
1917 #ifdef DIAGNOSTIC
1918 if (pg != NULL)
1919 panic("pmap_remove_ptes: managed page without "
1920 "PG_PVLIST for 0x%lx", startva);
1921 #endif
1922 continue;
1923 }
1924
1925 #ifdef DIAGNOSTIC
1926 if (pg == NULL)
1927 panic("pmap_remove_ptes: unmanaged page marked "
1928 "PG_PVLIST, va = 0x%lx, pa = 0x%lx",
1929 startva, (u_long)(opte & PG_FRAME));
1930 #endif
1931
1932 /* sync R/M bits */
1933 pmap_sync_flags_pte(pg, opte);
1934 pve = pmap_remove_pv(pg, pmap, startva);
1935 if (pve) {
1936 pve->pv_next = pv_tofree;
1937 pv_tofree = pve;
1938 }
1939
1940 /* end of "for" loop: time for next pte */
1941 }
1942 if (pv_tofree)
1943 pmap_free_pvs(pmap, pv_tofree);
1944 }
1945
1946
1947 /*
1948 * pmap_remove_pte: remove a single PTE from a PTP
1949 *
1950 * => must have proper locking on pmap_master_lock
1951 * => caller must hold pmap's lock
1952 * => PTP must be mapped into KVA
1953 * => PTP should be null if pmap == pmap_kernel()
1954 * => returns true if we removed a mapping
1955 */
1956
1957 boolean_t
1958 pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
1959 vaddr_t va, int flags)
1960 {
1961 struct pv_entry *pve;
1962 struct vm_page *pg;
1963 pt_entry_t opte;
1964
1965 if (!pmap_valid_entry(*pte))
1966 return (FALSE); /* VA not mapped */
1967
1968 if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W))
1969 return (FALSE);
1970
1971 opte = *pte; /* save the old PTE */
1972 *pte = 0; /* zap! */
1973
1974 pmap_exec_account(pmap, va, opte, 0);
1975
1976 if (opte & PG_W)
1977 pmap->pm_stats.wired_count--;
1978 pmap->pm_stats.resident_count--;
1979
1980 if (ptp)
1981 ptp->wire_count--; /* dropping a PTE */
1982
1983 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
1984
1985 /*
1986 * if we are not on a pv list we are done.
1987 */
1988 if ((opte & PG_PVLIST) == 0) {
1989 #ifdef DIAGNOSTIC
1990 if (pg != NULL)
1991 panic("pmap_remove_pte: managed page without "
1992 "PG_PVLIST for 0x%lx", va);
1993 #endif
1994 return(TRUE);
1995 }
1996
1997 #ifdef DIAGNOSTIC
1998 if (pg == NULL)
1999 panic("pmap_remove_pte: unmanaged page marked "
2000 "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va,
2001 (u_long)(opte & PG_FRAME));
2002 #endif
2003
2004 pmap_sync_flags_pte(pg, opte);
2005 pve = pmap_remove_pv(pg, pmap, va);
2006 if (pve)
2007 pmap_free_pv(pmap, pve);
2008 return(TRUE);
2009 }
2010
2011 /*
2012 * pmap_remove: top level mapping removal function
2013 *
2014 * => caller should not be holding any pmap locks
2015 */
2016
2017 void
2018 pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
2019 {
2020 pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL);
2021 }
2022
2023 void
2024 pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
2025 {
2026 pt_entry_t *ptes, opte;
2027 boolean_t result;
2028 paddr_t ptppa;
2029 vaddr_t blkendva;
2030 struct vm_page *ptp;
2031 TAILQ_HEAD(, vm_page) empty_ptps;
2032 int shootall;
2033 vaddr_t va;
2034
2035 TAILQ_INIT(&empty_ptps);
2036
2037 PMAP_MAP_TO_HEAD_LOCK();
2038 ptes = pmap_map_ptes(pmap); /* locks pmap */
2039
2040 /*
2041 * removing one page? take shortcut function.
2042 */
2043
2044 if (sva + PAGE_SIZE == eva) {
2045
2046 if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) {
2047
2048 /* PA of the PTP */
2049 ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME;
2050
2051 /* get PTP if non-kernel mapping */
2052
2053 if (pmap == pmap_kernel()) {
2054 /* we never free kernel PTPs */
2055 ptp = NULL;
2056 } else {
2057 if (pmap->pm_ptphint &&
2058 VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
2059 ptppa) {
2060 ptp = pmap->pm_ptphint;
2061 } else {
2062 ptp = PHYS_TO_VM_PAGE(ptppa);
2063 #ifdef DIAGNOSTIC
2064 if (ptp == NULL)
2065 panic("pmap_remove: unmanaged "
2066 "PTP detected");
2067 #endif
2068 }
2069 }
2070
2071 /* do it! */
2072 result = pmap_remove_pte(pmap, ptp, &ptes[atop(sva)],
2073 sva, flags);
2074
2075 /*
2076 * if mapping removed and the PTP is no longer
2077 * being used, free it!
2078 */
2079
2080 if (result && ptp && ptp->wire_count <= 1) {
2081 opte = i386_atomic_testset_ul(
2082 &pmap->pm_pdir[pdei(sva)], 0);
2083 #ifdef MULTIPROCESSOR
2084 /*
2085 * XXXthorpej Redundant shootdown can happen
2086 * here if we're using APTE space.
2087 */
2088 #endif
2089 pmap_tlb_shootpage(curpcb->pcb_pmap,
2090 ((vaddr_t)ptes) + ptp->offset);
2091 #ifdef MULTIPROCESSOR
2092 /*
2093 * Always shoot down the pmap's self-mapping
2094 * of the PTP.
2095 * XXXthorpej Redundant shootdown can happen
2096 * here if pmap == curpcb->pcb_pmap (not APTE
2097 * space).
2098 */
2099 pmap_tlb_shootpage(pmap,
2100 ((vaddr_t)PTE_BASE) + ptp->offset);
2101 #endif
2102 pmap->pm_stats.resident_count--;
2103 if (pmap->pm_ptphint == ptp)
2104 pmap->pm_ptphint =
2105 TAILQ_FIRST(&pmap->pm_obj.memq);
2106 ptp->wire_count = 0;
2107 /* Postpone free to after shootdown. */
2108 uvm_pagerealloc(ptp, NULL, 0);
2109 TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
2110 }
2111 /*
2112 * Shoot the tlb after any updates to the PDE.
2113 */
2114 pmap_tlb_shootpage(pmap, sva);
2115 }
2116 pmap_tlb_shootwait();
2117 pmap_unmap_ptes(pmap); /* unlock pmap */
2118 PMAP_MAP_TO_HEAD_UNLOCK();
2119 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
2120 TAILQ_REMOVE(&empty_ptps, ptp, listq);
2121 uvm_pagefree(ptp);
2122 }
2123 return;
2124 }
2125
2126 /*
2127 * Decide if we want to shoot the whole tlb or just the range.
2128 * Right now, we simply shoot everything when we remove more
2129 * than 32 pages, but never in the kernel pmap. XXX - tune.
2130 */
2131 if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())
2132 shootall = 1;
2133 else
2134 shootall = 0;
2135
2136 for (va = sva ; va < eva ; va = blkendva) {
2137 /* determine range of block */
2138 blkendva = i386_round_pdr(va + 1);
2139 if (blkendva > eva)
2140 blkendva = eva;
2141
2142 /*
2143 * XXXCDC: our PTE mappings should never be removed
2144 * with pmap_remove! if we allow this (and why would
2145 * we?) then we end up freeing the pmap's page
2146 * directory page (PDP) before we are finished using
2147 * it when we hit in in the recursive mapping. this
2148 * is BAD.
2149 *
2150 * long term solution is to move the PTEs out of user
2151 * address space. and into kernel address space (up
2152 * with APTE). then we can set VM_MAXUSER_ADDRESS to
2153 * be VM_MAX_ADDRESS.
2154 */
2155
2156 if (pdei(va) == PDSLOT_PTE)
2157 /* XXXCDC: ugly hack to avoid freeing PDP here */
2158 continue;
2159
2160 if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
2161 /* valid block? */
2162 continue;
2163
2164 /* PA of the PTP */
2165 ptppa = (pmap->pm_pdir[pdei(va)] & PG_FRAME);
2166
2167 /* get PTP if non-kernel mapping */
2168 if (pmap == pmap_kernel()) {
2169 /* we never free kernel PTPs */
2170 ptp = NULL;
2171 } else {
2172 if (pmap->pm_ptphint &&
2173 VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
2174 ptp = pmap->pm_ptphint;
2175 } else {
2176 ptp = PHYS_TO_VM_PAGE(ptppa);
2177 #ifdef DIAGNOSTIC
2178 if (ptp == NULL)
2179 panic("pmap_remove: unmanaged PTP "
2180 "detected");
2181 #endif
2182 }
2183 }
2184 pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(va)],
2185 va, blkendva, flags);
2186
2187 /* if PTP is no longer being used, free it! */
2188 if (ptp && ptp->wire_count <= 1) {
2189 opte = i386_atomic_testset_ul(
2190 &pmap->pm_pdir[pdei(va)], 0);
2191 #if defined(MULTIPROCESSOR)
2192 /*
2193 * XXXthorpej Redundant shootdown can happen here
2194 * if we're using APTE space.
2195 */
2196 #endif
2197 pmap_tlb_shootpage(curpcb->pcb_pmap,
2198 ((vaddr_t)ptes) + ptp->offset);
2199 #if defined(MULTIPROCESSOR)
2200 /*
2201 * Always shoot down the pmap's self-mapping
2202 * of the PTP.
2203 * XXXthorpej Redundant shootdown can happen here
2204 * if pmap == curpcb->pcb_pmap (not APTE space).
2205 */
2206 pmap_tlb_shootpage(pmap,
2207 ((vaddr_t)PTE_BASE) + ptp->offset);
2208 #endif
2209 pmap->pm_stats.resident_count--;
2210 if (pmap->pm_ptphint == ptp) /* update hint? */
2211 pmap->pm_ptphint =
2212 TAILQ_FIRST(&pmap->pm_obj.memq);
2213 ptp->wire_count = 0;
2214 /* Postpone free to after shootdown. */
2215 uvm_pagerealloc(ptp, NULL, 0);
2216 TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
2217 }
2218 }
2219 if (!shootall)
2220 pmap_tlb_shootrange(pmap, sva, eva);
2221 else
2222 pmap_tlb_shoottlb();
2223
2224 pmap_tlb_shootwait();
2225 pmap_unmap_ptes(pmap);
2226 PMAP_MAP_TO_HEAD_UNLOCK();
2227 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
2228 TAILQ_REMOVE(&empty_ptps, ptp, listq);
2229 uvm_pagefree(ptp);
2230 }
2231 }
2232
2233 /*
2234 * pmap_page_remove: remove a managed vm_page from all pmaps that map it
2235 *
2236 * => R/M bits are sync'd back to attrs
2237 */
2238
2239 void
2240 pmap_page_remove(struct vm_page *pg)
2241 {
2242 struct pv_entry *pve;
2243 pt_entry_t *ptes, opte;
2244 TAILQ_HEAD(, vm_page) empty_ptps;
2245 struct vm_page *ptp;
2246
2247 if (pg->mdpage.pv_list == NULL)
2248 return;
2249
2250 TAILQ_INIT(&empty_ptps);
2251
2252 PMAP_HEAD_TO_MAP_LOCK();
2253
2254 for (pve = pg->mdpage.pv_list ; pve != NULL ; pve = pve->pv_next) {
2255 ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
2256
2257 #ifdef DIAGNOSTIC
2258 if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva)
2259 printf("pmap_page_remove: found pager VA on pv_list\n");
2260 if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
2261 PG_FRAME)
2262 != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
2263 printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
2264 pg, pve->pv_va, pve->pv_ptp);
2265 printf("pmap_page_remove: PTP's phys addr: "
2266 "actual=%x, recorded=%lx\n",
2267 (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
2268 PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
2269 panic("pmap_page_remove: mapped managed page has "
2270 "invalid pv_ptp field");
2271 }
2272 #endif
2273
2274 opte = i386_atomic_testset_ul(&ptes[atop(pve->pv_va)], 0);
2275
2276 if (opte & PG_W)
2277 pve->pv_pmap->pm_stats.wired_count--;
2278 pve->pv_pmap->pm_stats.resident_count--;
2279
2280 /* sync R/M bits */
2281 pmap_sync_flags_pte(pg, opte);
2282
2283 /* update the PTP reference count. free if last reference. */
2284 if (pve->pv_ptp) {
2285 pve->pv_ptp->wire_count--;
2286 if (pve->pv_ptp->wire_count <= 1) {
2287 opte = i386_atomic_testset_ul(
2288 &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)],
2289 0);
2290 pmap_tlb_shootpage(curpcb->pcb_pmap,
2291 ((vaddr_t)ptes) + pve->pv_ptp->offset);
2292 #if defined(MULTIPROCESSOR)
2293 /*
2294 * Always shoot down the other pmap's
2295 * self-mapping of the PTP.
2296 */
2297 pmap_tlb_shootpage(pve->pv_pmap,
2298 ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset);
2299 #endif
2300 pve->pv_pmap->pm_stats.resident_count--;
2301 /* update hint? */
2302 if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
2303 pve->pv_pmap->pm_ptphint =
2304 TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq);
2305 pve->pv_ptp->wire_count = 0;
2306 /* Postpone free to after shootdown. */
2307 uvm_pagerealloc(pve->pv_ptp, NULL, 0);
2308 TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
2309 listq);
2310 }
2311 }
2312
2313 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
2314
2315 pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
2316 }
2317 pmap_free_pvs(NULL, pg->mdpage.pv_list);
2318 pg->mdpage.pv_list = NULL;
2319 PMAP_HEAD_TO_MAP_UNLOCK();
2320 pmap_tlb_shootwait();
2321
2322 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
2323 TAILQ_REMOVE(&empty_ptps, ptp, listq);
2324 uvm_pagefree(ptp);
2325 }
2326 }
2327
2328 /*
2329 * p m a p a t t r i b u t e f u n c t i o n s
2330 * functions that test/change managed page's attributes
2331 * since a page can be mapped multiple times we must check each PTE that
2332 * maps it by going down the pv lists.
2333 */
2334
2335 /*
2336 * pmap_test_attrs: test a page's attributes
2337 */
2338
2339 boolean_t
2340 pmap_test_attrs(struct vm_page *pg, int testbits)
2341 {
2342 struct pv_entry *pve;
2343 pt_entry_t *ptes, pte;
2344 u_long mybits, testflags;
2345
2346 testflags = pmap_pte2flags(testbits);
2347
2348 if (pg->pg_flags & testflags)
2349 return (TRUE);
2350
2351 PMAP_HEAD_TO_MAP_LOCK();
2352 mybits = 0;
2353 for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0;
2354 pve = pve->pv_next) {
2355 ptes = pmap_map_ptes(pve->pv_pmap);
2356 pte = ptes[atop(pve->pv_va)];
2357 pmap_unmap_ptes(pve->pv_pmap);
2358 mybits |= (pte & testbits);
2359 }
2360 PMAP_HEAD_TO_MAP_UNLOCK();
2361
2362 if (mybits == 0)
2363 return (FALSE);
2364
2365 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(mybits));
2366
2367 return (TRUE);
2368 }
2369
2370 /*
2371 * pmap_clear_attrs: change a page's attributes
2372 *
2373 * => we return TRUE if we cleared one of the bits we were asked to
2374 */
2375
2376 boolean_t
2377 pmap_clear_attrs(struct vm_page *pg, int clearbits)
2378 {
2379 struct pv_entry *pve;
2380 pt_entry_t *ptes, npte, opte;
2381 u_long clearflags;
2382 int result;
2383
2384 clearflags = pmap_pte2flags(clearbits);
2385
2386 PMAP_HEAD_TO_MAP_LOCK();
2387
2388 result = pg->pg_flags & clearflags;
2389 if (result)
2390 atomic_clearbits_int(&pg->pg_flags, clearflags);
2391
2392 for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) {
2393 #ifdef DIAGNOSTIC
2394 if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]))
2395 panic("pmap_change_attrs: mapping without PTP "
2396 "detected");
2397 #endif
2398
2399 ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
2400 npte = ptes[atop(pve->pv_va)];
2401 if (npte & clearbits) {
2402 result = TRUE;
2403 npte &= ~clearbits;
2404 opte = i386_atomic_testset_ul(
2405 &ptes[atop(pve->pv_va)], npte);
2406 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
2407 }
2408 pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
2409 }
2410
2411 PMAP_HEAD_TO_MAP_UNLOCK();
2412 pmap_tlb_shootwait();
2413
2414 return (result != 0);
2415 }
2416
2417 /*
2418 * p m a p p r o t e c t i o n f u n c t i o n s
2419 */
2420
2421 /*
2422 * pmap_page_protect: change the protection of all recorded mappings
2423 * of a managed page
2424 *
2425 * => NOTE: this is an inline function in pmap.h
2426 */
2427
2428 /* see pmap.h */
2429
2430 /*
2431 * pmap_protect: set the protection in of the pages in a pmap
2432 *
2433 * => NOTE: this is an inline function in pmap.h
2434 */
2435
2436 /* see pmap.h */
2437
2438 /*
2439 * pmap_write_protect: write-protect pages in a pmap
2440 */
2441
2442 void
2443 pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,
2444 vm_prot_t prot)
2445 {
2446 pt_entry_t *ptes, *spte, *epte, npte;
2447 vaddr_t blockend;
2448 u_int32_t md_prot;
2449 vaddr_t va;
2450 int shootall = 0;
2451
2452 ptes = pmap_map_ptes(pmap); /* locks pmap */
2453
2454 /* should be ok, but just in case ... */
2455 sva &= PG_FRAME;
2456 eva &= PG_FRAME;
2457
2458 if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())
2459 shootall = 1;
2460
2461 for (va = sva; va < eva; va = blockend) {
2462 blockend = (va & PD_MASK) + NBPD;
2463 if (blockend > eva)
2464 blockend = eva;
2465
2466 /*
2467 * XXXCDC: our PTE mappings should never be write-protected!
2468 *
2469 * long term solution is to move the PTEs out of user
2470 * address space. and into kernel address space (up
2471 * with APTE). then we can set VM_MAXUSER_ADDRESS to
2472 * be VM_MAX_ADDRESS.
2473 */
2474
2475 /* XXXCDC: ugly hack to avoid freeing PDP here */
2476 if (pdei(va) == PDSLOT_PTE)
2477 continue;
2478
2479 /* empty block? */
2480 if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
2481 continue;
2482
2483 md_prot = protection_codes[prot];
2484 if (va < VM_MAXUSER_ADDRESS)
2485 md_prot |= PG_u;
2486 else if (va < VM_MAX_ADDRESS)
2487 /* XXX: write-prot our PTES? never! */
2488 md_prot |= (PG_u | PG_RW);
2489
2490 spte = &ptes[atop(va)];
2491 epte = &ptes[atop(blockend)];
2492
2493 for (/*null */; spte < epte ; spte++, va += PAGE_SIZE) {
2494
2495 if (!pmap_valid_entry(*spte)) /* no mapping? */
2496 continue;
2497
2498 npte = (*spte & ~PG_PROT) | md_prot;
2499
2500 if (npte != *spte) {
2501 pmap_exec_account(pmap, va, *spte, npte);
2502 i386_atomic_testset_ul(spte, npte);
2503 }
2504 }
2505 }
2506 if (shootall)
2507 pmap_tlb_shoottlb();
2508 else
2509 pmap_tlb_shootrange(pmap, sva, eva);
2510
2511 pmap_tlb_shootwait();
2512 pmap_unmap_ptes(pmap); /* unlocks pmap */
2513 }
2514
2515 /*
2516 * end of protection functions
2517 */
2518
2519 /*
2520 * pmap_unwire: clear the wired bit in the PTE
2521 *
2522 * => mapping should already be in map
2523 */
2524
2525 void
2526 pmap_unwire(struct pmap *pmap, vaddr_t va)
2527 {
2528 pt_entry_t *ptes;
2529
2530 if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
2531 ptes = pmap_map_ptes(pmap); /* locks pmap */
2532
2533 #ifdef DIAGNOSTIC
2534 if (!pmap_valid_entry(ptes[atop(va)]))
2535 panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
2536 #endif
2537 if ((ptes[atop(va)] & PG_W) != 0) {
2538 ptes[atop(va)] &= ~PG_W;
2539 pmap->pm_stats.wired_count--;
2540 }
2541 #ifdef DIAGNOSTIC
2542 else {
2543 printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2544 "didn't change!\n", pmap, va);
2545 }
2546 #endif
2547 pmap_unmap_ptes(pmap); /* unlocks map */
2548 }
2549 #ifdef DIAGNOSTIC
2550 else {
2551 panic("pmap_unwire: invalid PDE");
2552 }
2553 #endif
2554 }
2555
2556 /*
2557 * pmap_collect: free resources held by a pmap
2558 *
2559 * => optional function.
2560 * => called when a process is swapped out to free memory.
2561 */
2562
2563 void
2564 pmap_collect(struct pmap *pmap)
2565 {
2566 /*
2567 * free all of the pt pages by removing the physical mappings
2568 * for its entire address space.
2569 */
2570
2571 pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS,
2572 PMAP_REMOVE_SKIPWIRED);
2573 }
2574
2575 /*
2576 * pmap_copy: copy mappings from one pmap to another
2577 *
2578 * => optional function
2579 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2580 */
2581
2582 /*
2583 * defined as macro in pmap.h
2584 */
2585
2586 /*
2587 * pmap_enter: enter a mapping into a pmap
2588 *
2589 * => must be done "now" ... no lazy-evaluation
2590 */
2591
2592 int
2593 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa,
2594 vm_prot_t prot, int flags)
2595 {
2596 pt_entry_t *ptes, opte, npte;
2597 struct vm_page *ptp;
2598 struct pv_entry *pve = NULL;
2599 boolean_t wired = (flags & PMAP_WIRED) != 0;
2600 struct vm_page *pg = NULL;
2601 int error;
2602
2603 #ifdef DIAGNOSTIC
2604 /* sanity check: totally out of range? */
2605 if (va >= VM_MAX_KERNEL_ADDRESS)
2606 panic("pmap_enter: too big");
2607
2608 if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
2609 panic("pmap_enter: trying to map over PDP/APDP!");
2610
2611 /* sanity check: kernel PTPs should already have been pre-allocated */
2612 if (va >= VM_MIN_KERNEL_ADDRESS &&
2613 !pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
2614 panic("pmap_enter: missing kernel PTP!");
2615 #endif
2616
2617 /* get lock */
2618 PMAP_MAP_TO_HEAD_LOCK();
2619
2620 /*
2621 * map in ptes and get a pointer to our PTP (unless we are the kernel)
2622 */
2623
2624 ptes = pmap_map_ptes(pmap); /* locks pmap */
2625 if (pmap == pmap_kernel()) {
2626 ptp = NULL;
2627 } else {
2628 ptp = pmap_get_ptp(pmap, pdei(va), FALSE);
2629 if (ptp == NULL) {
2630 if (flags & PMAP_CANFAIL) {
2631 error = ENOMEM;
2632 goto out;
2633 }
2634 panic("pmap_enter: get ptp failed");
2635 }
2636 }
2637 opte = ptes[atop(va)]; /* old PTE */
2638
2639 /*
2640 * is there currently a valid mapping at our VA?
2641 */
2642
2643 if (pmap_valid_entry(opte)) {
2644
2645 /*
2646 * first, update pm_stats. resident count will not
2647 * change since we are replacing/changing a valid
2648 * mapping. wired count might change...
2649 */
2650
2651 if (wired && (opte & PG_W) == 0)
2652 pmap->pm_stats.wired_count++;
2653 else if (!wired && (opte & PG_W) != 0)
2654 pmap->pm_stats.wired_count--;
2655
2656 /*
2657 * is the currently mapped PA the same as the one we
2658 * want to map?
2659 */
2660
2661 if ((opte & PG_FRAME) == pa) {
2662
2663 /* if this is on the PVLIST, sync R/M bit */
2664 if (opte & PG_PVLIST) {
2665 pg = PHYS_TO_VM_PAGE(pa);
2666 #ifdef DIAGNOSTIC
2667 if (pg == NULL)
2668 panic("pmap_enter: same pa PG_PVLIST "
2669 "mapping with unmanaged page "
2670 "pa = 0x%lx (0x%lx)", pa,
2671 atop(pa));
2672 #endif
2673 pmap_sync_flags_pte(pg, opte);
2674 }
2675 goto enter_now;
2676 }
2677
2678 /*
2679 * changing PAs: we must remove the old one first
2680 */
2681
2682 /*
2683 * if current mapping is on a pvlist,
2684 * remove it (sync R/M bits)
2685 */
2686
2687 if (opte & PG_PVLIST) {
2688 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
2689 #ifdef DIAGNOSTIC
2690 if (pg == NULL)
2691 panic("pmap_enter: PG_PVLIST mapping with "
2692 "unmanaged page "
2693 "pa = 0x%lx (0x%lx)", pa, atop(pa));
2694 #endif
2695 pmap_sync_flags_pte(pg, opte);
2696 pve = pmap_remove_pv(pg, pmap, va);
2697 pg = NULL; /* This is not page we are looking for */
2698 }
2699 } else { /* opte not valid */
2700 pmap->pm_stats.resident_count++;
2701 if (wired)
2702 pmap->pm_stats.wired_count++;
2703 if (ptp)
2704 ptp->wire_count++; /* count # of valid entries */
2705 }
2706
2707 /*
2708 * at this point pm_stats has been updated. pve is either NULL
2709 * or points to a now-free pv_entry structure (the latter case is
2710 * if we called pmap_remove_pv above).
2711 *
2712 * if this entry is to be on a pvlist, enter it now.
2713 */
2714
2715 if (pmap_initialized && pg == NULL)
2716 pg = PHYS_TO_VM_PAGE(pa);
2717
2718 if (pg != NULL) {
2719 if (pve == NULL) {
2720 pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
2721 if (pve == NULL) {
2722 if (flags & PMAP_CANFAIL) {
2723 /*
2724 * XXX - Back out stats changes!
2725 */
2726 error = ENOMEM;
2727 goto out;
2728 }
2729 panic("pmap_enter: no pv entries available");
2730 }
2731 }
2732 /* lock pvh when adding */
2733 pmap_enter_pv(pg, pve, pmap, va, ptp);
2734 } else {
2735
2736 /* new mapping is not PG_PVLIST. free pve if we've got one */
2737 if (pve)
2738 pmap_free_pv(pmap, pve);
2739 }
2740
2741 enter_now:
2742 /*
2743 * at this point pvh is !NULL if we want the PG_PVLIST bit set
2744 */
2745
2746 npte = pa | protection_codes[prot] | PG_V;
2747 pmap_exec_account(pmap, va, opte, npte);
2748 if (wired)
2749 npte |= PG_W;
2750 if (va < VM_MAXUSER_ADDRESS)
2751 npte |= PG_u;
2752 else if (va < VM_MAX_ADDRESS)
2753 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
2754 if (pmap == pmap_kernel())
2755 npte |= pmap_pg_g;
2756 if (flags & VM_PROT_READ)
2757 npte |= PG_U;
2758 if (flags & VM_PROT_WRITE)
2759 npte |= PG_M;
2760 if (pg) {
2761 npte |= PG_PVLIST;
2762 pmap_sync_flags_pte(pg, npte);
2763 }
2764
2765 opte = i386_atomic_testset_ul(&ptes[atop(va)], npte);
2766
2767 if (opte & PG_V) {
2768 pmap_tlb_shootpage(pmap, va);
2769 pmap_tlb_shootwait();
2770 }
2771
2772 error = 0;
2773
2774 out:
2775 pmap_unmap_ptes(pmap);
2776 PMAP_MAP_TO_HEAD_UNLOCK();
2777
2778 return error;
2779 }
2780
2781 /*
2782 * pmap_growkernel: increase usage of KVM space
2783 *
2784 * => we allocate new PTPs for the kernel and install them in all
2785 * the pmaps on the system.
2786 */
2787
2788 vaddr_t
2789 pmap_growkernel(vaddr_t maxkvaddr)
2790 {
2791 struct pmap *kpm = pmap_kernel(), *pm;
2792 int needed_kpde; /* needed number of kernel PTPs */
2793 int s;
2794 paddr_t ptaddr;
2795
2796 needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
2797 / NBPD;
2798 if (needed_kpde <= nkpde)
2799 goto out; /* we are OK */
2800
2801 /*
2802 * whoops! we need to add kernel PTPs
2803 */
2804
2805 s = splhigh(); /* to be safe */
2806 simple_lock(&kpm->pm_obj.vmobjlock);
2807
2808 for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
2809
2810 if (uvm.page_init_done == FALSE) {
2811
2812 /*
2813 * we're growing the kernel pmap early (from
2814 * uvm_pageboot_alloc()). this case must be
2815 * handled a little differently.
2816 */
2817
2818 if (uvm_page_physget(&ptaddr) == FALSE)
2819 panic("pmap_growkernel: out of memory");
2820 pmap_zero_phys(ptaddr);
2821
2822 kpm->pm_pdir[PDSLOT_KERN + nkpde] =
2823 ptaddr | PG_RW | PG_V | PG_U | PG_M;
2824
2825 /* count PTP as resident */
2826 kpm->pm_stats.resident_count++;
2827 continue;
2828 }
2829
2830 /*
2831 * THIS *MUST* BE CODED SO AS TO WORK IN THE
2832 * pmap_initialized == FALSE CASE! WE MAY BE
2833 * INVOKED WHILE pmap_init() IS RUNNING!
2834 */
2835
2836 while (!pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE, 0))
2837 uvm_wait("pmap_growkernel");
2838
2839 /* distribute new kernel PTP to all active pmaps */
2840 simple_lock(&pmaps_lock);
2841 LIST_FOREACH(pm, &pmaps, pm_list) {
2842 pm->pm_pdir[PDSLOT_KERN + nkpde] =
2843 kpm->pm_pdir[PDSLOT_KERN + nkpde];
2844 }
2845 simple_unlock(&pmaps_lock);
2846 }
2847
2848 simple_unlock(&kpm->pm_obj.vmobjlock);
2849 splx(s);
2850
2851 out:
2852 return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
2853 }
2854
2855 #ifdef DEBUG
2856 void pmap_dump(struct pmap *, vaddr_t, vaddr_t);
2857
2858 /*
2859 * pmap_dump: dump all the mappings from a pmap
2860 *
2861 * => caller should not be holding any pmap locks
2862 */
2863
2864 void
2865 pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
2866 {
2867 pt_entry_t *ptes, *pte;
2868 vaddr_t blkendva;
2869
2870 /*
2871 * if end is out of range truncate.
2872 * if (end == start) update to max.
2873 */
2874
2875 if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
2876 eva = VM_MAXUSER_ADDRESS;
2877
2878 PMAP_MAP_TO_HEAD_LOCK();
2879 ptes = pmap_map_ptes(pmap); /* locks pmap */
2880
2881 /*
2882 * dumping a range of pages: we dump in PTP sized blocks (4MB)
2883 */
2884
2885 for (/* null */ ; sva < eva ; sva = blkendva) {
2886
2887 /* determine range of block */
2888 blkendva = i386_round_pdr(sva+1);
2889 if (blkendva > eva)
2890 blkendva = eva;
2891
2892 /* valid block? */
2893 if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
2894 continue;
2895
2896 pte = &ptes[atop(sva)];
2897 for (/* null */; sva < blkendva ; sva += NBPG, pte++) {
2898 if (!pmap_valid_entry(*pte))
2899 continue;
2900 printf("va %#lx -> pa %#x (pte=%#x)\n",
2901 sva, *pte, *pte & PG_FRAME);
2902 }
2903 }
2904 pmap_unmap_ptes(pmap);
2905 PMAP_MAP_TO_HEAD_UNLOCK();
2906 }
2907 #endif
2908
2909 #ifdef MULTIPROCESSOR
2910 /*
2911 * Locking for tlb shootdown.
2912 *
2913 * We lock by setting tlb_shoot_wait to the number of cpus that will
2914 * receive our tlb shootdown. After sending the IPIs, we don't need to
2915 * worry about locking order or interrupts spinning for the lock because
2916 * the call that grabs the "lock" isn't the one that releases it. And
2917 * there is nothing that can block the IPI that releases the lock.
2918 *
2919 * The functions are organized so that we first count the number of
2920 * cpus we need to send the IPI to, then we grab the counter, then
2921 * we send the IPIs, then we finally do our own shootdown.
2922 *
2923 * Our shootdown is last to make it parallell with the other cpus
2924 * to shorten the spin time.
2925 *
2926 * Notice that we depend on failures to send IPIs only being able to
2927 * happen during boot. If they happen later, the above assumption
2928 * doesn't hold since we can end up in situations where noone will
2929 * release the lock if we get an interrupt in a bad moment.
2930 */
2931
2932 volatile int tlb_shoot_wait;
2933
2934 volatile vaddr_t tlb_shoot_addr1;
2935 volatile vaddr_t tlb_shoot_addr2;
2936
2937 void
2938 pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
2939 {
2940 struct cpu_info *ci, *self = curcpu();
2941 CPU_INFO_ITERATOR cii;
2942 int wait = 0;
2943 int mask = 0;
2944
2945 CPU_INFO_FOREACH(cii, ci) {
2946 if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||
2947 !(ci->ci_flags & CPUF_RUNNING))
2948 continue;
2949 mask |= 1 << ci->ci_cpuid;
2950 wait++;
2951 }
2952
2953 if (wait > 0) {
2954 int s = splvm();
2955
2956 while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
2957 while (tlb_shoot_wait != 0)
2958 SPINLOCK_SPIN_HOOK;
2959 }
2960 tlb_shoot_addr1 = va;
2961 CPU_INFO_FOREACH(cii, ci) {
2962 if ((mask & 1 << ci->ci_cpuid) == 0)
2963 continue;
2964 if (i386_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0)
2965 panic("pmap_tlb_shootpage: ipi failed");
2966 }
2967 splx(s);
2968 }
2969
2970 if (pmap_is_curpmap(pm))
2971 pmap_update_pg(va);
2972 }
2973
2974 void
2975 pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)
2976 {
2977 struct cpu_info *ci, *self = curcpu();
2978 CPU_INFO_ITERATOR cii;
2979 int wait = 0;
2980 int mask = 0;
2981 vaddr_t va;
2982
2983 CPU_INFO_FOREACH(cii, ci) {
2984 if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||
2985 !(ci->ci_flags & CPUF_RUNNING))
2986 continue;
2987 mask |= 1 << ci->ci_cpuid;
2988 wait++;
2989 }
2990
2991 if (wait > 0) {
2992 int s = splvm();
2993
2994 while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
2995 while (tlb_shoot_wait != 0)
2996 SPINLOCK_SPIN_HOOK;
2997 }
2998 tlb_shoot_addr1 = sva;
2999 tlb_shoot_addr2 = eva;
3000 CPU_INFO_FOREACH(cii, ci) {
3001 if ((mask & 1 << ci->ci_cpuid) == 0)
3002 continue;
3003 if (i386_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0)
3004 panic("pmap_tlb_shootrange: ipi failed");
3005 }
3006 splx(s);
3007 }
3008
3009 if (pmap_is_curpmap(pm))
3010 for (va = sva; va < eva; va += PAGE_SIZE)
3011 pmap_update_pg(va);
3012 }
3013
3014 void
3015 pmap_tlb_shoottlb(void)
3016 {
3017 struct cpu_info *ci, *self = curcpu();
3018 CPU_INFO_ITERATOR cii;
3019 int wait = 0;
3020 int mask = 0;
3021
3022 CPU_INFO_FOREACH(cii, ci) {
3023 if (ci == self || !(ci->ci_flags & CPUF_RUNNING))
3024 continue;
3025 mask |= 1 << ci->ci_cpuid;
3026 wait++;
3027 }
3028
3029 if (wait) {
3030 int s = splvm();
3031
3032 while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
3033 while (tlb_shoot_wait != 0)
3034 SPINLOCK_SPIN_HOOK;
3035 }
3036
3037 CPU_INFO_FOREACH(cii, ci) {
3038 if ((mask & 1 << ci->ci_cpuid) == 0)
3039 continue;
3040 if (i386_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0)
3041 panic("pmap_tlb_shoottlb: ipi failed");
3042 }
3043 splx(s);
3044 }
3045
3046 tlbflush();
3047 }
3048
3049 void
3050 pmap_tlb_shootwait(void)
3051 {
3052 while (tlb_shoot_wait != 0)
3053 SPINLOCK_SPIN_HOOK;
3054 }
3055
3056 #else
3057
3058 void
3059 pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
3060 {
3061 if (pmap_is_curpmap(pm))
3062 pmap_update_pg(va);
3063
3064 }
3065
3066 void
3067 pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)
3068 {
3069 vaddr_t va;
3070
3071 for (va = sva; va < eva; va += PAGE_SIZE)
3072 pmap_update_pg(va);
3073 }
3074
3075 void
3076 pmap_tlb_shoottlb(void)
3077 {
3078 tlbflush();
3079 }
3080 #endif /* MULTIPROCESSOR */