1 /* $OpenBSD: uvm_pglist.c,v 1.20 2007/04/13 18:57:49 art Exp $ */
2 /* $NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $ */
3
4 /*-
5 * Copyright (c) 1997 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * uvm_pglist.c: pglist functions
43 */
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/proc.h>
49
50 #include <uvm/uvm.h>
51
52 #ifdef VM_PAGE_ALLOC_MEMORY_STATS
53 #define STAT_INCR(v) (v)++
54 #define STAT_DECR(v) do { \
55 if ((v) == 0) \
56 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
57 else \
58 (v)--; \
59 } while (0)
60 u_long uvm_pglistalloc_npages;
61 #else
62 #define STAT_INCR(v)
63 #define STAT_DECR(v)
64 #endif
65
66 int uvm_pglistalloc_simple(psize_t, paddr_t, paddr_t, struct pglist *);
67
68 int
69 uvm_pglistalloc_simple(psize_t size, paddr_t low, paddr_t high,
70 struct pglist *rlist)
71 {
72 psize_t try;
73 int psi;
74 struct vm_page *pg;
75 int s, todo, idx, pgflidx, error, free_list;
76 UVMHIST_FUNC("uvm_pglistalloc_simple"); UVMHIST_CALLED(pghist);
77 #ifdef DEBUG
78 vm_page_t tp;
79 #endif
80
81 /* Default to "lose". */
82 error = ENOMEM;
83
84 todo = size / PAGE_SIZE;
85
86 /*
87 * Block all memory allocation and lock the free list.
88 */
89 s = uvm_lock_fpageq();
90
91 /* Are there even any free pages? */
92 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
93 goto out;
94
95 for (try = low; try < high; try += PAGE_SIZE) {
96
97 /*
98 * Make sure this is a managed physical page.
99 */
100
101 if ((psi = vm_physseg_find(atop(try), &idx)) == -1)
102 continue; /* managed? */
103 pg = &vm_physmem[psi].pgs[idx];
104 if (VM_PAGE_IS_FREE(pg) == 0)
105 continue;
106
107 free_list = uvm_page_lookup_freelist(pg);
108 pgflidx = (pg->pg_flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
109 #ifdef DEBUG
110 for (tp = TAILQ_FIRST(&uvm.page_free[free_list].pgfl_queues[pgflidx]);
111 tp != NULL;
112 tp = TAILQ_NEXT(tp, pageq)) {
113 if (tp == pg)
114 break;
115 }
116 if (tp == NULL)
117 panic("uvm_pglistalloc_simple: page not on freelist");
118 #endif
119 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx], pg, pageq);
120 uvmexp.free--;
121 if (pg->pg_flags & PG_ZERO)
122 uvmexp.zeropages--;
123 pg->pg_flags = PG_CLEAN;
124 pg->uobject = NULL;
125 pg->uanon = NULL;
126 pg->pg_version++;
127 TAILQ_INSERT_TAIL(rlist, pg, pageq);
128 STAT_INCR(uvm_pglistalloc_npages);
129 if (--todo == 0) {
130 error = 0;
131 goto out;
132 }
133 }
134
135 out:
136 /*
137 * check to see if we need to generate some free pages waking
138 * the pagedaemon.
139 */
140
141 if (!error && (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
142 (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
143 uvmexp.inactive < uvmexp.inactarg))) {
144 wakeup(&uvm.pagedaemon);
145 }
146
147 uvm_unlock_fpageq(s);
148
149 if (error)
150 uvm_pglistfree(rlist);
151
152 return (error);
153 }
154
155 /*
156 * uvm_pglistalloc: allocate a list of pages
157 *
158 * => allocated pages are placed at the tail of rlist. rlist is
159 * assumed to be properly initialized by caller.
160 * => returns 0 on success or errno on failure
161 * => XXX: implementation allocates only a single segment, also
162 * might be able to better advantage of vm_physeg[].
163 * => doesn't take into account clean non-busy pages on inactive list
164 * that could be used(?)
165 * => params:
166 * size the size of the allocation, rounded to page size.
167 * low the low address of the allowed allocation range.
168 * high the high address of the allowed allocation range.
169 * alignment memory must be aligned to this power-of-two boundary.
170 * boundary no segment in the allocation may cross this
171 * power-of-two boundary (relative to zero).
172 */
173
174 int
175 uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
176 psize_t size;
177 paddr_t low, high, alignment, boundary;
178 struct pglist *rlist;
179 int nsegs, waitok;
180 {
181 paddr_t try, idxpa, lastidxpa;
182 int psi;
183 struct vm_page *pgs;
184 int s, tryidx, idx, pgflidx, end, error, free_list;
185 vm_page_t m;
186 u_long pagemask;
187 #ifdef DEBUG
188 vm_page_t tp;
189 #endif
190 UVMHIST_FUNC("uvm_pglistalloc"); UVMHIST_CALLED(pghist);
191
192 KASSERT((alignment & (alignment - 1)) == 0);
193 KASSERT((boundary & (boundary - 1)) == 0);
194
195 /*
196 * Our allocations are always page granularity, so our alignment
197 * must be, too.
198 */
199 if (alignment < PAGE_SIZE)
200 alignment = PAGE_SIZE;
201
202 if (size == 0)
203 return (EINVAL);
204
205 size = round_page(size);
206 try = roundup(low, alignment);
207
208 if ((nsegs >= size / PAGE_SIZE) && (alignment == PAGE_SIZE) &&
209 (boundary == 0))
210 return (uvm_pglistalloc_simple(size, try, high, rlist));
211
212 if (boundary != 0 && boundary < size)
213 return (EINVAL);
214
215 pagemask = ~(boundary - 1);
216
217 /* Default to "lose". */
218 error = ENOMEM;
219
220 /*
221 * Block all memory allocation and lock the free list.
222 */
223 s = uvm_lock_fpageq();
224
225 /* Are there even any free pages? */
226 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
227 goto out;
228
229 for (;; try += alignment) {
230 if (try + size > high) {
231
232 /*
233 * We've run past the allowable range.
234 */
235
236 goto out;
237 }
238
239 /*
240 * Make sure this is a managed physical page.
241 */
242
243 if ((psi = vm_physseg_find(atop(try), &idx)) == -1)
244 continue; /* managed? */
245 if (vm_physseg_find(atop(try + size), NULL) != psi)
246 continue; /* end must be in this segment */
247
248 tryidx = idx;
249 end = idx + (size / PAGE_SIZE);
250 pgs = vm_physmem[psi].pgs;
251
252 /*
253 * Found a suitable starting page. See of the range is free.
254 */
255
256 for (; idx < end; idx++) {
257 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) {
258 break;
259 }
260 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]);
261 if (idx > tryidx) {
262 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]);
263 if ((lastidxpa + PAGE_SIZE) != idxpa) {
264
265 /*
266 * Region not contiguous.
267 */
268
269 break;
270 }
271 if (boundary != 0 &&
272 ((lastidxpa ^ idxpa) & pagemask) != 0) {
273
274 /*
275 * Region crosses boundary.
276 */
277
278 break;
279 }
280 }
281 }
282 if (idx == end) {
283 break;
284 }
285 }
286
287 #if PGFL_NQUEUES != 2
288 #error uvm_pglistalloc needs to be updated
289 #endif
290
291 /*
292 * we have a chunk of memory that conforms to the requested constraints.
293 */
294 idx = tryidx;
295 while (idx < end) {
296 m = &pgs[idx];
297 free_list = uvm_page_lookup_freelist(m);
298 pgflidx = (m->pg_flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
299 #ifdef DEBUG
300 for (tp = TAILQ_FIRST(&uvm.page_free[
301 free_list].pgfl_queues[pgflidx]);
302 tp != NULL;
303 tp = TAILQ_NEXT(tp, pageq)) {
304 if (tp == m)
305 break;
306 }
307 if (tp == NULL)
308 panic("uvm_pglistalloc: page not on freelist");
309 #endif
310 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx],
311 m, pageq);
312 uvmexp.free--;
313 if (m->pg_flags & PG_ZERO)
314 uvmexp.zeropages--;
315 m->pg_flags = PG_CLEAN;
316 m->uobject = NULL;
317 m->uanon = NULL;
318 m->pg_version++;
319 TAILQ_INSERT_TAIL(rlist, m, pageq);
320 idx++;
321 STAT_INCR(uvm_pglistalloc_npages);
322 }
323 error = 0;
324
325 out:
326 /*
327 * check to see if we need to generate some free pages waking
328 * the pagedaemon.
329 */
330
331 if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
332 (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
333 uvmexp.inactive < uvmexp.inactarg)) {
334 wakeup(&uvm.pagedaemon);
335 }
336
337 uvm_unlock_fpageq(s);
338
339 return (error);
340 }
341
342 /*
343 * uvm_pglistfree: free a list of pages
344 *
345 * => pages should already be unmapped
346 */
347
348 void
349 uvm_pglistfree(struct pglist *list)
350 {
351 struct vm_page *m;
352 int s;
353 UVMHIST_FUNC("uvm_pglistfree"); UVMHIST_CALLED(pghist);
354
355 /*
356 * Block all memory allocation and lock the free list.
357 */
358 s = uvm_lock_fpageq();
359
360 while ((m = TAILQ_FIRST(list)) != NULL) {
361 KASSERT((m->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) == 0);
362 TAILQ_REMOVE(list, m, pageq);
363 #ifdef DEBUG
364 if (m->uobject == (void *)0xdeadbeef &&
365 m->uanon == (void *)0xdeadbeef) {
366 panic("uvm_pagefree: freeing free page %p", m);
367 }
368
369 m->uobject = (void *)0xdeadbeef;
370 m->offset = 0xdeadbeef;
371 m->uanon = (void *)0xdeadbeef;
372 #endif
373 atomic_clearbits_int(&m->pg_flags, PQ_MASK);
374 atomic_setbits_int(&m->pg_flags, PQ_FREE);
375 TAILQ_INSERT_TAIL(&uvm.page_free[
376 uvm_page_lookup_freelist(m)].pgfl_queues[PGFL_UNKNOWN],
377 m, pageq);
378 uvmexp.free++;
379 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
380 uvm.page_idle_zero = vm_page_zero_enable;
381 STAT_DECR(uvm_pglistalloc_npages);
382 }
383
384 uvm_unlock_fpageq(s);
385 }