root/uvm/uvm_mmap.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sys_sbrk
  2. sys_sstk
  3. sys_mquery
  4. sys_mincore
  5. sys_mmap
  6. sys_msync
  7. sys_munmap
  8. sys_mprotect
  9. sys_minherit
  10. sys_madvise
  11. sys_mlock
  12. sys_munlock
  13. sys_mlockall
  14. sys_munlockall
  15. uvm_mmap

    1 /*      $OpenBSD: uvm_mmap.c,v 1.69 2007/06/18 21:51:15 pedro Exp $     */
    2 /*      $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $        */
    3 
    4 /*
    5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    6  * Copyright (c) 1991, 1993 The Regents of the University of California.  
    7  * Copyright (c) 1988 University of Utah.
    8  * 
    9  * All rights reserved.
   10  *
   11  * This code is derived from software contributed to Berkeley by
   12  * the Systems Programming Group of the University of Utah Computer
   13  * Science Department.
   14  *
   15  * Redistribution and use in source and binary forms, with or without
   16  * modification, are permitted provided that the following conditions
   17  * are met:
   18  * 1. Redistributions of source code must retain the above copyright
   19  *    notice, this list of conditions and the following disclaimer.
   20  * 2. Redistributions in binary form must reproduce the above copyright
   21  *    notice, this list of conditions and the following disclaimer in the
   22  *    documentation and/or other materials provided with the distribution.
   23  * 3. All advertising materials mentioning features or use of this software
   24  *    must display the following acknowledgement:
   25  *      This product includes software developed by the Charles D. Cranor,
   26  *      Washington University, University of California, Berkeley and 
   27  *      its contributors.
   28  * 4. Neither the name of the University nor the names of its contributors
   29  *    may be used to endorse or promote products derived from this software
   30  *    without specific prior written permission.
   31  *
   32  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   33  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   34  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   35  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   36  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   37  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   38  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   39  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   40  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   41  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   42  * SUCH DAMAGE.
   43  *
   44  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
   45  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
   46  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
   47  */
   48 
   49 /*
   50  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
   51  * function.
   52  */
   53 #include <sys/param.h>
   54 #include <sys/systm.h>
   55 #include <sys/file.h>
   56 #include <sys/filedesc.h>
   57 #include <sys/resourcevar.h>
   58 #include <sys/mman.h>
   59 #include <sys/mount.h>
   60 #include <sys/proc.h>
   61 #include <sys/malloc.h>
   62 #include <sys/vnode.h>
   63 #include <sys/conf.h>
   64 #include <sys/stat.h>
   65 
   66 #include <machine/exec.h>       /* for __LDPGSZ */
   67 
   68 #include <miscfs/specfs/specdev.h>
   69 
   70 #include <sys/syscallargs.h>
   71 
   72 #include <uvm/uvm.h>
   73 #include <uvm/uvm_device.h>
   74 #include <uvm/uvm_vnode.h>
   75 
   76 /*
   77  * Page align addr and size, returning EINVAL on wraparound.
   78  */
   79 #define ALIGN_ADDR(addr, size, pageoff) do {                            \
   80         pageoff = (addr & PAGE_MASK);                                   \
   81         if (pageoff != 0) {                                             \
   82                 if (size > SIZE_MAX - pageoff)                          \
   83                         return (EINVAL);        /* wraparound */        \
   84                 addr -= pageoff;                                        \
   85                 size += pageoff;                                        \
   86         }                                                               \
   87         if (size != 0) {                                                \
   88                 size = (vsize_t)round_page(size);                       \
   89                 if (size == 0)                                          \
   90                         return (EINVAL);        /* wraparound */        \
   91         }                                                               \
   92 } while (0)
   93 
   94 /*
   95  * unimplemented VM system calls:
   96  */
   97 
   98 /*
   99  * sys_sbrk: sbrk system call.
  100  */
  101 
  102 /* ARGSUSED */
  103 int
  104 sys_sbrk(p, v, retval)
  105         struct proc *p;
  106         void *v;
  107         register_t *retval;
  108 {
  109 #if 0
  110         struct sys_sbrk_args /* {
  111                 syscallarg(int) incr;
  112         } */ *uap = v;
  113 #endif
  114 
  115         return (ENOSYS);
  116 }
  117 
  118 /*
  119  * sys_sstk: sstk system call.
  120  */
  121 
  122 /* ARGSUSED */
  123 int
  124 sys_sstk(p, v, retval)
  125         struct proc *p;
  126         void *v;
  127         register_t *retval;
  128 {
  129 #if 0
  130         struct sys_sstk_args /* {
  131                 syscallarg(int) incr;
  132         } */ *uap = v;
  133 #endif
  134 
  135         return (ENOSYS);
  136 }
  137 
  138 /*
  139  * sys_mquery: provide mapping hints to applications that do fixed mappings
  140  *
  141  * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and
  142  *      don't care about PMAP_PREFER or such)
  143  * addr: hint where we'd like to place the mapping.
  144  * size: size of the mapping
  145  * fd: fd of the file we want to map
  146  * off: offset within the file
  147  */
  148 
  149 int
  150 sys_mquery(p, v, retval)
  151         struct proc *p;
  152         void *v;
  153         register_t *retval;
  154 {
  155         struct sys_mquery_args /* {
  156                 syscallarg(void *) addr;
  157                 syscallarg(size_t) len;
  158                 syscallarg(int) prot;
  159                 syscallarg(int) flags;
  160                 syscallarg(int) fd;
  161                 syscallarg(long) pad;
  162                 syscallarg(off_t) pos;
  163         } */ *uap = v;
  164         struct file *fp;
  165         struct uvm_object *uobj;
  166         voff_t uoff;
  167         int error;
  168         vaddr_t vaddr;
  169         int flags = 0;
  170         vsize_t size;
  171         vm_prot_t prot;
  172         int fd;
  173 
  174         vaddr = (vaddr_t) SCARG(uap, addr);
  175         prot = SCARG(uap, prot);
  176         size = (vsize_t) SCARG(uap, len);
  177         fd = SCARG(uap, fd);
  178 
  179         if ((prot & VM_PROT_ALL) != prot)
  180                 return (EINVAL);
  181 
  182         if (SCARG(uap, flags) & MAP_FIXED)
  183                 flags |= UVM_FLAG_FIXED;
  184 
  185         if (fd >= 0) {
  186                 if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
  187                         return (error);
  188                 uobj = &((struct vnode *)fp->f_data)->v_uvm.u_obj;
  189                 uoff = SCARG(uap, pos);
  190         } else {
  191                 fp = NULL;
  192                 uobj = NULL;
  193                 uoff = 0;
  194         }
  195 
  196         if (vaddr == 0)
  197                 vaddr = uvm_map_hint(p, prot);
  198 
  199         /* prevent a user requested address from falling in heap space */
  200         if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
  201             (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) {
  202                 if (flags & UVM_FLAG_FIXED) {
  203                         error = EINVAL;
  204                         goto done;
  205                 }
  206                 vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ);
  207         }
  208 again:
  209 
  210         if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, size,
  211             &vaddr, uobj, uoff, 0, flags) == NULL) {
  212                 if (flags & UVM_FLAG_FIXED)
  213                         error = EINVAL;
  214                 else
  215                         error = ENOMEM;
  216         } else {
  217                 /* prevent a returned address from falling in heap space */
  218                 if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr)
  219                     && (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) {
  220                         vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
  221                             MAXDSIZ);
  222                         goto again;
  223                 }
  224                 error = 0;
  225                 *retval = (register_t)(vaddr);
  226         }
  227 done:
  228         if (fp != NULL)
  229                 FRELE(fp);
  230         return (error);
  231 }
  232 
  233 /*
  234  * sys_mincore: determine if pages are in core or not.
  235  */
  236 
  237 /* ARGSUSED */
  238 int
  239 sys_mincore(p, v, retval)
  240         struct proc *p;
  241         void *v;
  242         register_t *retval;
  243 {
  244         struct sys_mincore_args /* {
  245                 syscallarg(void *) addr;
  246                 syscallarg(size_t) len;
  247                 syscallarg(char *) vec;
  248         } */ *uap = v;
  249         vm_page_t m;
  250         char *vec, pgi;
  251         struct uvm_object *uobj;
  252         struct vm_amap *amap;
  253         struct vm_anon *anon;
  254         vm_map_entry_t entry;
  255         vaddr_t start, end, lim;
  256         vm_map_t map;
  257         vsize_t len, npgs;
  258         int error = 0;
  259 
  260         map = &p->p_vmspace->vm_map;
  261 
  262         start = (vaddr_t)SCARG(uap, addr);
  263         len = SCARG(uap, len);
  264         vec = SCARG(uap, vec);
  265 
  266         if (start & PAGE_MASK)
  267                 return (EINVAL);
  268         len = round_page(len);
  269         end = start + len;
  270         if (end <= start)
  271                 return (EINVAL);
  272 
  273         npgs = len >> PAGE_SHIFT;
  274 
  275         /*
  276          * Lock down vec, so our returned status isn't outdated by
  277          * storing the status byte for a page.
  278          */
  279         if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0)
  280                 return (error);
  281 
  282         vm_map_lock_read(map);
  283 
  284         if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
  285                 error = ENOMEM;
  286                 goto out;
  287         }
  288 
  289         for (/* nothing */;
  290              entry != &map->header && entry->start < end;
  291              entry = entry->next) {
  292                 KASSERT(!UVM_ET_ISSUBMAP(entry));
  293                 KASSERT(start >= entry->start);
  294 
  295                 /* Make sure there are no holes. */
  296                 if (entry->end < end &&
  297                      (entry->next == &map->header ||
  298                       entry->next->start > entry->end)) {
  299                         error = ENOMEM;
  300                         goto out;
  301                 }
  302 
  303                 lim = end < entry->end ? end : entry->end;
  304 
  305                 /*
  306                  * Special case for objects with no "real" pages.  Those
  307                  * are always considered resident (mapped devices).
  308                  */
  309                 if (UVM_ET_ISOBJ(entry)) {
  310                         KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
  311                         if (entry->object.uvm_obj->pgops->pgo_releasepg
  312                             == NULL) {
  313                                 pgi = 1;
  314                                 for (/* nothing */; start < lim;
  315                                      start += PAGE_SIZE, vec++)
  316                                         copyout(&pgi, vec, sizeof(char));
  317                                 continue;
  318                         }
  319                 }
  320 
  321                 amap = entry->aref.ar_amap;     /* top layer */
  322                 uobj = entry->object.uvm_obj;   /* bottom layer */
  323 
  324                 if (uobj != NULL)
  325                         simple_lock(&uobj->vmobjlock);
  326 
  327                 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
  328                         pgi = 0;
  329                         if (amap != NULL) {
  330                                 /* Check the top layer first. */
  331                                 anon = amap_lookup(&entry->aref,
  332                                     start - entry->start);
  333                                 /* Don't need to lock anon here. */
  334                                 if (anon != NULL && anon->an_page != NULL) {
  335                                         /*
  336                                          * Anon has the page for this entry
  337                                          * offset.
  338                                          */
  339                                         pgi = 1;
  340                                 }
  341                         }
  342 
  343                         if (uobj != NULL && pgi == 0) {
  344                                 /* Check the bottom layer. */
  345                                 m = uvm_pagelookup(uobj,
  346                                     entry->offset + (start - entry->start));
  347                                 if (m != NULL) {
  348                                         /*
  349                                          * Object has the page for this entry
  350                                          * offset.
  351                                          */
  352                                         pgi = 1;
  353                                 }
  354                         }
  355 
  356                         copyout(&pgi, vec, sizeof(char));
  357                 }
  358 
  359                 if (uobj != NULL)
  360                         simple_unlock(&uobj->vmobjlock);
  361         }
  362 
  363  out:
  364         vm_map_unlock_read(map);
  365         uvm_vsunlock(p, SCARG(uap, vec), npgs);
  366         return (error);
  367 }
  368 
  369 /*
  370  * sys_mmap: mmap system call.
  371  *
  372  * => file offset and address may not be page aligned
  373  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
  374  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
  375  *      and the return value is adjusted up by the page offset.
  376  */
  377 
  378 int
  379 sys_mmap(p, v, retval)
  380         struct proc *p;
  381         void *v;
  382         register_t *retval;
  383 {
  384         struct sys_mmap_args /* {
  385                 syscallarg(void *) addr;
  386                 syscallarg(size_t) len;
  387                 syscallarg(int) prot;
  388                 syscallarg(int) flags;
  389                 syscallarg(int) fd;
  390                 syscallarg(long) pad;
  391                 syscallarg(off_t) pos;
  392         } */ *uap = v;
  393         vaddr_t addr;
  394         struct vattr va;
  395         off_t pos;
  396         vsize_t size, pageoff;
  397         vm_prot_t prot, maxprot;
  398         int flags, fd;
  399         vaddr_t vm_min_address = VM_MIN_ADDRESS;
  400         struct filedesc *fdp = p->p_fd;
  401         struct file *fp = NULL;
  402         struct vnode *vp;
  403         caddr_t handle;
  404         int error;
  405 
  406         /*
  407          * first, extract syscall args from the uap.
  408          */
  409 
  410         addr = (vaddr_t) SCARG(uap, addr);
  411         size = (vsize_t) SCARG(uap, len);
  412         prot = SCARG(uap, prot);
  413         flags = SCARG(uap, flags);
  414         fd = SCARG(uap, fd);
  415         pos = SCARG(uap, pos);
  416 
  417         /*
  418          * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
  419          * validate the flags.
  420          */
  421         if ((prot & VM_PROT_ALL) != prot)
  422                 return (EINVAL);
  423         if ((flags & MAP_FLAGMASK) != flags)
  424                 return (EINVAL);
  425         if (flags & MAP_COPY)
  426                 flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
  427         if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
  428                 return (EINVAL);
  429 
  430         /*
  431          * align file position and save offset.  adjust size.
  432          */
  433         ALIGN_ADDR(pos, size, pageoff);
  434 
  435         /*
  436          * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 
  437          */
  438 
  439         if (flags & MAP_FIXED) {
  440 
  441                 /* adjust address by the same amount as we did the offset */
  442                 addr -= pageoff;
  443                 if (addr & PAGE_MASK)
  444                         return (EINVAL);                /* not page aligned */
  445 
  446                 if (addr > SIZE_MAX - size)
  447                         return (EINVAL);                /* no wrapping! */
  448                 if (VM_MAXUSER_ADDRESS > 0 &&
  449                     (addr + size) > VM_MAXUSER_ADDRESS)
  450                         return (EINVAL);
  451                 if (vm_min_address > 0 && addr < vm_min_address)
  452                         return (EINVAL);
  453 
  454         } else {
  455 
  456                 /*
  457                  * not fixed: make sure we skip over the largest possible heap.
  458                  * we will refine our guess later (e.g. to account for VAC, etc)
  459                  */
  460                 if (addr == 0)
  461                         addr = uvm_map_hint(p, prot);
  462                 else if (!(flags & MAP_TRYFIXED) &&
  463                     addr < (vaddr_t)p->p_vmspace->vm_daddr)
  464                         addr = uvm_map_hint(p, prot);
  465         }
  466 
  467         /*
  468          * check for file mappings (i.e. not anonymous) and verify file.
  469          */
  470         if ((flags & MAP_ANON) == 0) {
  471 
  472                 if ((fp = fd_getfile(fdp, fd)) == NULL)
  473                         return (EBADF);
  474 
  475                 FREF(fp);
  476 
  477                 if (fp->f_type != DTYPE_VNODE) {
  478                         error = ENODEV;         /* only mmap vnodes! */
  479                         goto out;
  480                 }
  481                 vp = (struct vnode *)fp->f_data;        /* convert to vnode */
  482 
  483                 if (vp->v_type != VREG && vp->v_type != VCHR &&
  484                     vp->v_type != VBLK) {
  485                         error = ENODEV; /* only REG/CHR/BLK support mmap */
  486                         goto out;
  487                 }
  488 
  489                 if (vp->v_type == VREG && (pos + size) < pos) {
  490                         error = EINVAL;         /* no offset wrapping */
  491                         goto out;
  492                 }
  493 
  494                 /* special case: catch SunOS style /dev/zero */
  495                 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
  496                         flags |= MAP_ANON;
  497                         FRELE(fp);
  498                         fp = NULL;
  499                         goto is_anon;
  500                 }
  501 
  502                 /*
  503                  * Old programs may not select a specific sharing type, so
  504                  * default to an appropriate one.
  505                  *
  506                  * XXX: how does MAP_ANON fit in the picture?
  507                  */
  508                 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
  509 #if defined(DEBUG)
  510                         printf("WARNING: defaulted mmap() share type to "
  511                            "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
  512                            "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
  513                             p->p_comm);
  514 #endif
  515                         if (vp->v_type == VCHR)
  516                                 flags |= MAP_SHARED;    /* for a device */
  517                         else
  518                                 flags |= MAP_PRIVATE;   /* for a file */
  519                 }
  520 
  521                 /* 
  522                  * MAP_PRIVATE device mappings don't make sense (and aren't
  523                  * supported anyway).  However, some programs rely on this,
  524                  * so just change it to MAP_SHARED.
  525                  */
  526                 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
  527                         flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
  528                 }
  529 
  530                 /*
  531                  * now check protection
  532                  */
  533 
  534                 maxprot = VM_PROT_EXECUTE;
  535 
  536                 /* check read access */
  537                 if (fp->f_flag & FREAD)
  538                         maxprot |= VM_PROT_READ;
  539                 else if (prot & PROT_READ) {
  540                         error = EACCES;
  541                         goto out;
  542                 }
  543 
  544                 /* check write access, shared case first */
  545                 if (flags & MAP_SHARED) {
  546                         /*
  547                          * if the file is writable, only add PROT_WRITE to
  548                          * maxprot if the file is not immutable, append-only.
  549                          * otherwise, if we have asked for PROT_WRITE, return
  550                          * EPERM.
  551                          */
  552                         if (fp->f_flag & FWRITE) {
  553                                 if ((error =
  554                                     VOP_GETATTR(vp, &va, p->p_ucred, p)))
  555                                         goto out;
  556                                 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
  557                                         maxprot |= VM_PROT_WRITE;
  558                                 else if (prot & PROT_WRITE) {
  559                                         error = EPERM;
  560                                         goto out;
  561                                 }
  562                         } else if (prot & PROT_WRITE) {
  563                                 error = EACCES;
  564                                 goto out;
  565                         }
  566                 } else {
  567                         /* MAP_PRIVATE mappings can always write to */
  568                         maxprot |= VM_PROT_WRITE;
  569                 }
  570 
  571                 /*
  572                  * set handle to vnode
  573                  */
  574 
  575                 handle = (caddr_t)vp;
  576 
  577         } else {                /* MAP_ANON case */
  578                 /*
  579                  * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
  580                  */
  581                 if (fd != -1) {
  582                         error = EINVAL;
  583                         goto out;
  584                 }
  585 
  586  is_anon:               /* label for SunOS style /dev/zero */
  587                 handle = NULL;
  588                 maxprot = VM_PROT_ALL;
  589                 pos = 0;
  590         }
  591 
  592         if ((flags & MAP_ANON) != 0 ||
  593             ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
  594                 if (size >
  595                     (p->p_rlimit[RLIMIT_DATA].rlim_cur - ctob(p->p_vmspace->vm_dused))) {
  596                         error = ENOMEM;
  597                         goto out;
  598                 }
  599         }
  600 
  601         /*
  602          * now let kernel internal function uvm_mmap do the work.
  603          */
  604 
  605         error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
  606             flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);
  607 
  608         if (error == 0)
  609                 /* remember to add offset */
  610                 *retval = (register_t)(addr + pageoff);
  611 
  612 out:
  613         if (fp)
  614                 FRELE(fp);      
  615         return (error);
  616 }
  617 
  618 /*
  619  * sys_msync: the msync system call (a front-end for flush)
  620  */
  621 
  622 int
  623 sys_msync(p, v, retval)
  624         struct proc *p;
  625         void *v;
  626         register_t *retval;
  627 {
  628         struct sys_msync_args /* {
  629                 syscallarg(void *) addr;
  630                 syscallarg(size_t) len;
  631                 syscallarg(int) flags;
  632         } */ *uap = v;
  633         vaddr_t addr;
  634         vsize_t size, pageoff;
  635         vm_map_t map;
  636         int rv, flags, uvmflags;
  637 
  638         /*
  639          * extract syscall args from the uap
  640          */
  641 
  642         addr = (vaddr_t)SCARG(uap, addr);
  643         size = (vsize_t)SCARG(uap, len);
  644         flags = SCARG(uap, flags);
  645 
  646         /* sanity check flags */
  647         if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
  648                         (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
  649                         (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
  650                 return (EINVAL);
  651         if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
  652                 flags |= MS_SYNC;
  653 
  654         /*
  655          * align the address to a page boundary, and adjust the size accordingly
  656          */
  657         ALIGN_ADDR(addr, size, pageoff);
  658         if (addr > SIZE_MAX - size)
  659                 return (EINVAL);                /* disallow wrap-around. */
  660 
  661         /*
  662          * get map
  663          */
  664 
  665         map = &p->p_vmspace->vm_map;
  666 
  667         /*
  668          * XXXCDC: do we really need this semantic?
  669          *
  670          * XXX Gak!  If size is zero we are supposed to sync "all modified
  671          * pages with the region containing addr".  Unfortunately, we
  672          * don't really keep track of individual mmaps so we approximate
  673          * by flushing the range of the map entry containing addr.
  674          * This can be incorrect if the region splits or is coalesced
  675          * with a neighbor.
  676          */
  677         if (size == 0) {
  678                 vm_map_entry_t entry;
  679                 
  680                 vm_map_lock_read(map);
  681                 rv = uvm_map_lookup_entry(map, addr, &entry);
  682                 if (rv == TRUE) {
  683                         addr = entry->start;
  684                         size = entry->end - entry->start;
  685                 }
  686                 vm_map_unlock_read(map);
  687                 if (rv == FALSE)
  688                         return (EINVAL);
  689         }
  690 
  691         /*
  692          * translate MS_ flags into PGO_ flags
  693          */
  694         uvmflags = PGO_CLEANIT;
  695         if (flags & MS_INVALIDATE)
  696                 uvmflags |= PGO_FREE;
  697         if (flags & MS_SYNC)
  698                 uvmflags |= PGO_SYNCIO;
  699         else
  700                 uvmflags |= PGO_SYNCIO;  /* XXXCDC: force sync for now! */
  701 
  702         return (uvm_map_clean(map, addr, addr+size, uvmflags));
  703 }
  704 
  705 /*
  706  * sys_munmap: unmap a users memory
  707  */
  708 
  709 int
  710 sys_munmap(p, v, retval)
  711         struct proc *p;
  712         void *v;
  713         register_t *retval;
  714 {
  715         struct sys_munmap_args /* {
  716                 syscallarg(void *) addr;
  717                 syscallarg(size_t) len;
  718         } */ *uap = v;
  719         vaddr_t addr;
  720         vsize_t size, pageoff;
  721         vm_map_t map;
  722         vaddr_t vm_min_address = VM_MIN_ADDRESS;
  723         struct vm_map_entry *dead_entries;
  724 
  725         /*
  726          * get syscall args...
  727          */
  728 
  729         addr = (vaddr_t) SCARG(uap, addr);
  730         size = (vsize_t) SCARG(uap, len);
  731         
  732         /*
  733          * align the address to a page boundary, and adjust the size accordingly
  734          */
  735         ALIGN_ADDR(addr, size, pageoff);
  736 
  737         /*
  738          * Check for illegal addresses.  Watch out for address wrap...
  739          * Note that VM_*_ADDRESS are not constants due to casts (argh).
  740          */
  741         if (addr > SIZE_MAX - size)
  742                 return (EINVAL);
  743         if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
  744                 return (EINVAL);
  745         if (vm_min_address > 0 && addr < vm_min_address)
  746                 return (EINVAL);
  747         map = &p->p_vmspace->vm_map;
  748 
  749 
  750         vm_map_lock(map);       /* lock map so we can checkprot */
  751 
  752         /*
  753          * interesting system call semantic: make sure entire range is 
  754          * allocated before allowing an unmap.
  755          */
  756 
  757         if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
  758                 vm_map_unlock(map);
  759                 return (EINVAL);
  760         }
  761 
  762         /*
  763          * doit!
  764          */
  765         uvm_unmap_remove(map, addr, addr + size, &dead_entries, p);
  766 
  767         vm_map_unlock(map);     /* and unlock */
  768 
  769         if (dead_entries != NULL)
  770                 uvm_unmap_detach(dead_entries, 0);
  771 
  772         return (0);
  773 }
  774 
  775 /*
  776  * sys_mprotect: the mprotect system call
  777  */
  778 
  779 int
  780 sys_mprotect(p, v, retval)
  781         struct proc *p;
  782         void *v;
  783         register_t *retval;
  784 {
  785         struct sys_mprotect_args /* {
  786                 syscallarg(void *) addr;
  787                 syscallarg(size_t) len;
  788                 syscallarg(int) prot;
  789         } */ *uap = v;
  790         vaddr_t addr;
  791         vsize_t size, pageoff;
  792         vm_prot_t prot;
  793 
  794         /*
  795          * extract syscall args from uap
  796          */
  797 
  798         addr = (vaddr_t)SCARG(uap, addr);
  799         size = (vsize_t)SCARG(uap, len);
  800         prot = SCARG(uap, prot);
  801         
  802         if ((prot & VM_PROT_ALL) != prot)
  803                 return (EINVAL);
  804 
  805         /*
  806          * align the address to a page boundary, and adjust the size accordingly
  807          */
  808         ALIGN_ADDR(addr, size, pageoff);
  809         if (addr > SIZE_MAX - size)
  810                 return (EINVAL);                /* disallow wrap-around. */
  811 
  812         return (uvm_map_protect(&p->p_vmspace->vm_map, addr, addr+size,
  813             prot, FALSE));
  814 }
  815 
  816 /*
  817  * sys_minherit: the minherit system call
  818  */
  819 
  820 int
  821 sys_minherit(p, v, retval)
  822         struct proc *p;
  823         void *v;
  824         register_t *retval;
  825 {
  826         struct sys_minherit_args /* {
  827                 syscallarg(void *) addr;
  828                 syscallarg(size_t) len;
  829                 syscallarg(int) inherit;
  830         } */ *uap = v;
  831         vaddr_t addr;
  832         vsize_t size, pageoff;
  833         vm_inherit_t inherit;
  834         
  835         addr = (vaddr_t)SCARG(uap, addr);
  836         size = (vsize_t)SCARG(uap, len);
  837         inherit = SCARG(uap, inherit);
  838 
  839         /*
  840          * align the address to a page boundary, and adjust the size accordingly
  841          */
  842         ALIGN_ADDR(addr, size, pageoff);
  843         if (addr > SIZE_MAX - size)
  844                 return (EINVAL);                /* disallow wrap-around. */
  845         
  846         return (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
  847             inherit));
  848 }
  849 
  850 /*
  851  * sys_madvise: give advice about memory usage.
  852  */
  853 
  854 /* ARGSUSED */
  855 int
  856 sys_madvise(p, v, retval)
  857         struct proc *p;
  858         void *v;
  859         register_t *retval;
  860 {
  861         struct sys_madvise_args /* {
  862                 syscallarg(void *) addr;
  863                 syscallarg(size_t) len;
  864                 syscallarg(int) behav;
  865         } */ *uap = v;
  866         vaddr_t addr;
  867         vsize_t size, pageoff;
  868         int advice, error;
  869         
  870         addr = (vaddr_t)SCARG(uap, addr);
  871         size = (vsize_t)SCARG(uap, len);
  872         advice = SCARG(uap, behav);
  873 
  874         /*
  875          * align the address to a page boundary, and adjust the size accordingly
  876          */
  877         ALIGN_ADDR(addr, size, pageoff);
  878         if (addr > SIZE_MAX - size)
  879                 return (EINVAL);                /* disallow wrap-around. */
  880 
  881         switch (advice) {
  882         case MADV_NORMAL:
  883         case MADV_RANDOM:
  884         case MADV_SEQUENTIAL:
  885                 error = uvm_map_advice(&p->p_vmspace->vm_map, addr,
  886                     addr + size, advice);
  887                 break;
  888 
  889         case MADV_WILLNEED:
  890                 /*
  891                  * Activate all these pages, pre-faulting them in if
  892                  * necessary.
  893                  */
  894                 /*
  895                  * XXX IMPLEMENT ME.
  896                  * Should invent a "weak" mode for uvm_fault()
  897                  * which would only do the PGO_LOCKED pgo_get().
  898                  */
  899                 return (0);
  900 
  901         case MADV_DONTNEED:
  902                 /*
  903                  * Deactivate all these pages.  We don't need them
  904                  * any more.  We don't, however, toss the data in
  905                  * the pages.
  906                  */
  907                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  908                     PGO_DEACTIVATE);
  909                 break;
  910 
  911         case MADV_FREE:
  912                 /*
  913                  * These pages contain no valid data, and may be
  914                  * garbage-collected.  Toss all resources, including
  915                  * any swap space in use.
  916                  */
  917                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  918                     PGO_FREE);
  919                 break;
  920 
  921         case MADV_SPACEAVAIL:
  922                 /*
  923                  * XXXMRG What is this?  I think it's:
  924                  *
  925                  *      Ensure that we have allocated backing-store
  926                  *      for these pages.
  927                  *
  928                  * This is going to require changes to the page daemon,
  929                  * as it will free swap space allocated to pages in core.
  930                  * There's also what to do for device/file/anonymous memory.
  931                  */
  932                 return (EINVAL);
  933 
  934         default:
  935                 return (EINVAL);
  936         }
  937 
  938         return (error);
  939 }
  940 
  941 /*
  942  * sys_mlock: memory lock
  943  */
  944 
  945 int
  946 sys_mlock(p, v, retval)
  947         struct proc *p;
  948         void *v;
  949         register_t *retval;
  950 {
  951         struct sys_mlock_args /* {
  952                 syscallarg(const void *) addr;
  953                 syscallarg(size_t) len;
  954         } */ *uap = v;
  955         vaddr_t addr;
  956         vsize_t size, pageoff;
  957         int error;
  958 
  959         /*
  960          * extract syscall args from uap
  961          */
  962         addr = (vaddr_t)SCARG(uap, addr);
  963         size = (vsize_t)SCARG(uap, len);
  964 
  965         /*
  966          * align the address to a page boundary and adjust the size accordingly
  967          */
  968         ALIGN_ADDR(addr, size, pageoff);
  969         if (addr > SIZE_MAX - size)
  970                 return (EINVAL);                /* disallow wrap-around. */
  971 
  972         if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
  973                 return (EAGAIN);
  974 
  975 #ifdef pmap_wired_count
  976         if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
  977                         p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
  978                 return (EAGAIN);
  979 #else
  980         if ((error = suser(p, 0)) != 0)
  981                 return (error);
  982 #endif
  983 
  984         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
  985             0);
  986         return (error == 0 ? 0 : ENOMEM);
  987 }
  988 
  989 /*
  990  * sys_munlock: unlock wired pages
  991  */
  992 
  993 int
  994 sys_munlock(p, v, retval)
  995         struct proc *p;
  996         void *v;
  997         register_t *retval;
  998 {
  999         struct sys_munlock_args /* {
 1000                 syscallarg(const void *) addr;
 1001                 syscallarg(size_t) len;
 1002         } */ *uap = v;
 1003         vaddr_t addr;
 1004         vsize_t size, pageoff;
 1005         int error;
 1006 
 1007         /*
 1008          * extract syscall args from uap
 1009          */
 1010 
 1011         addr = (vaddr_t)SCARG(uap, addr);
 1012         size = (vsize_t)SCARG(uap, len);
 1013 
 1014         /*
 1015          * align the address to a page boundary, and adjust the size accordingly
 1016          */
 1017         ALIGN_ADDR(addr, size, pageoff);
 1018         if (addr > SIZE_MAX - size)
 1019                 return (EINVAL);                /* disallow wrap-around. */
 1020 
 1021 #ifndef pmap_wired_count
 1022         if ((error = suser(p, 0)) != 0)
 1023                 return (error);
 1024 #endif
 1025 
 1026         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
 1027             0);
 1028         return (error == 0 ? 0 : ENOMEM);
 1029 }
 1030 
 1031 /*
 1032  * sys_mlockall: lock all pages mapped into an address space.
 1033  */
 1034 
 1035 int
 1036 sys_mlockall(p, v, retval)
 1037         struct proc *p;
 1038         void *v;
 1039         register_t *retval;
 1040 {
 1041         struct sys_mlockall_args /* {
 1042                 syscallarg(int) flags;
 1043         } */ *uap = v;
 1044         int error, flags;
 1045 
 1046         flags = SCARG(uap, flags);
 1047 
 1048         if (flags == 0 ||
 1049             (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
 1050                 return (EINVAL);
 1051 
 1052 #ifndef pmap_wired_count
 1053         if ((error = suser(p, 0)) != 0)
 1054                 return (error);
 1055 #endif
 1056 
 1057         error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
 1058             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
 1059         if (error != 0 && error != ENOMEM)
 1060                 return (EAGAIN);
 1061         return (error);
 1062 }
 1063 
 1064 /*
 1065  * sys_munlockall: unlock all pages mapped into an address space.
 1066  */
 1067 
 1068 int
 1069 sys_munlockall(p, v, retval)
 1070         struct proc *p;
 1071         void *v;
 1072         register_t *retval;
 1073 {
 1074 
 1075         (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
 1076         return (0);
 1077 }
 1078 
 1079 /*
 1080  * uvm_mmap: internal version of mmap
 1081  *
 1082  * - used by sys_mmap, exec, and sysv shm
 1083  * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
 1084  *      sysv shm uses "named anonymous memory")
 1085  * - caller must page-align the file offset
 1086  */
 1087 
 1088 int
 1089 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit, p)
 1090         vm_map_t map;
 1091         vaddr_t *addr;
 1092         vsize_t size;
 1093         vm_prot_t prot, maxprot;
 1094         int flags;
 1095         caddr_t handle;         /* XXX: VNODE? */
 1096         voff_t foff;
 1097         vsize_t locklimit;
 1098         struct proc *p;
 1099 {
 1100         struct uvm_object *uobj;
 1101         struct vnode *vp;
 1102         int error;
 1103         int advice = UVM_ADV_NORMAL;
 1104         uvm_flag_t uvmflag = 0;
 1105         vsize_t align = 0;      /* userland page size */
 1106 
 1107         /*
 1108          * check params
 1109          */
 1110 
 1111         if (size == 0)
 1112                 return(0);
 1113         if (foff & PAGE_MASK)
 1114                 return(EINVAL);
 1115         if ((prot & maxprot) != prot)
 1116                 return(EINVAL);
 1117 
 1118         /*
 1119          * for non-fixed mappings, round off the suggested address.
 1120          * for fixed mappings, check alignment and zap old mappings.
 1121          */
 1122 
 1123         if ((flags & MAP_FIXED) == 0) {
 1124                 *addr = round_page(*addr);      /* round */
 1125         } else {
 1126                 if (*addr & PAGE_MASK)
 1127                         return(EINVAL);
 1128                 uvmflag |= UVM_FLAG_FIXED;
 1129                 uvm_unmap_p(map, *addr, *addr + size, p);       /* zap! */
 1130         }
 1131 
 1132         /*
 1133          * handle anon vs. non-anon mappings.   for non-anon mappings attach
 1134          * to underlying vm object.
 1135          */
 1136 
 1137         if (flags & MAP_ANON) {
 1138                 if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ)
 1139                         align = __LDPGSZ;
 1140                 foff = UVM_UNKNOWN_OFFSET;
 1141                 uobj = NULL;
 1142                 if ((flags & MAP_SHARED) == 0)
 1143                         /* XXX: defer amap create */
 1144                         uvmflag |= UVM_FLAG_COPYONW;
 1145                 else
 1146                         /* shared: create amap now */
 1147                         uvmflag |= UVM_FLAG_OVERLAY;
 1148 
 1149         } else {
 1150 
 1151                 vp = (struct vnode *) handle;   /* get vnode */
 1152                 if (vp->v_type != VCHR) {
 1153                         uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
 1154                            maxprot : (maxprot & ~VM_PROT_WRITE));
 1155 
 1156 #ifndef UBC
 1157                         /*
 1158                          * XXXCDC: hack from old code
 1159                          * don't allow vnodes which have been mapped
 1160                          * shared-writeable to persist [forces them to be
 1161                          * flushed out when last reference goes].
 1162                          * XXXCDC: interesting side effect: avoids a bug.
 1163                          * note that in WRITE [ufs_readwrite.c] that we
 1164                          * allocate buffer, uncache, and then do the write.
 1165                          * the problem with this is that if the uncache causes
 1166                          * VM data to be flushed to the same area of the file
 1167                          * we are writing to... in that case we've got the
 1168                          * buffer locked and our process goes to sleep forever.
 1169                          *
 1170                          * XXXCDC: checking maxprot protects us from the
 1171                          * "persistbug" program but this is not a long term
 1172                          * solution.
 1173                          * 
 1174                          * XXXCDC: we don't bother calling uncache with the vp
 1175                          * VOP_LOCKed since we know that we are already
 1176                          * holding a valid reference to the uvn (from the
 1177                          * uvn_attach above), and thus it is impossible for
 1178                          * the uncache to kill the uvn and trigger I/O.
 1179                          */
 1180                         if (flags & MAP_SHARED) {
 1181                                 if ((prot & VM_PROT_WRITE) ||
 1182                                     (maxprot & VM_PROT_WRITE)) {
 1183                                         uvm_vnp_uncache(vp);
 1184                                 }
 1185                         }
 1186 #else
 1187                         /* XXX for now, attach doesn't gain a ref */
 1188                         VREF(vp);
 1189 #endif
 1190                 } else {
 1191                         uobj = udv_attach((void *) &vp->v_rdev,
 1192                             (flags & MAP_SHARED) ? maxprot :
 1193                             (maxprot & ~VM_PROT_WRITE), foff, size);
 1194                         /*
 1195                          * XXX Some devices don't like to be mapped with
 1196                          * XXX PROT_EXEC, but we don't really have a
 1197                          * XXX better way of handling this, right now
 1198                          */
 1199                         if (uobj == NULL && (prot & PROT_EXEC) == 0) {
 1200                                 maxprot &= ~VM_PROT_EXECUTE;
 1201                                 uobj = udv_attach((void *) &vp->v_rdev,
 1202                                     (flags & MAP_SHARED) ? maxprot :
 1203                                     (maxprot & ~VM_PROT_WRITE), foff, size);
 1204                         }
 1205                         advice = UVM_ADV_RANDOM;
 1206                 }
 1207                 
 1208                 if (uobj == NULL)
 1209                         return((vp->v_type == VREG) ? ENOMEM : EINVAL);
 1210 
 1211                 if ((flags & MAP_SHARED) == 0)
 1212                         uvmflag |= UVM_FLAG_COPYONW;
 1213         }
 1214 
 1215         /*
 1216          * set up mapping flags
 1217          */
 1218 
 1219         uvmflag = UVM_MAPFLAG(prot, maxprot, 
 1220                         (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
 1221                         advice, uvmflag);
 1222 
 1223         error = uvm_map_p(map, addr, size, uobj, foff, align, uvmflag, p);
 1224 
 1225         if (error == 0) {
 1226                 /*
 1227                  * POSIX 1003.1b -- if our address space was configured
 1228                  * to lock all future mappings, wire the one we just made.
 1229                  */
 1230                 if (prot == VM_PROT_NONE) {
 1231                         /*
 1232                          * No more work to do in this case.
 1233                          */
 1234                         return (0);
 1235                 }
 1236                 
 1237                 vm_map_lock(map);
 1238 
 1239                 if (map->flags & VM_MAP_WIREFUTURE) {
 1240                         if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
 1241 #ifdef pmap_wired_count
 1242                             || (locklimit != 0 && (size +
 1243                                  ptoa(pmap_wired_count(vm_map_pmap(map)))) >
 1244                                 locklimit)
 1245 #endif
 1246                         ) {
 1247                                 error = ENOMEM;
 1248                                 vm_map_unlock(map);
 1249                                 /* unmap the region! */
 1250                                 uvm_unmap(map, *addr, *addr + size);
 1251                                 goto bad;
 1252                         }
 1253                         /*
 1254                          * uvm_map_pageable() always returns the map
 1255                          * unlocked.
 1256                          */
 1257                         error = uvm_map_pageable(map, *addr, *addr + size,
 1258                             FALSE, UVM_LK_ENTER);
 1259                         if (error != 0) {
 1260                                 /* unmap the region! */
 1261                                 uvm_unmap(map, *addr, *addr + size);
 1262                                 goto bad;
 1263                         }
 1264                         return (0);
 1265                 }
 1266 
 1267                 vm_map_unlock(map);
 1268 
 1269                 return (0);
 1270         }
 1271 
 1272         /*
 1273          * errors: first detach from the uobj, if any.
 1274          */
 1275         
 1276         if (uobj)
 1277                 uobj->pgops->pgo_detach(uobj);
 1278 
 1279 bad:
 1280         return (error);
 1281 }

/* [<][>][^][v][top][bottom][index][help] */