root/kern/kern_fork.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. fork_return
  2. sys_fork
  3. sys_vfork
  4. sys_rfork
  5. process_new
  6. fork1
  7. pidtaken
  8. proc_trampoline_mp

    1 /*      $OpenBSD: kern_fork.c,v 1.92 2007/07/25 23:11:52 art Exp $      */
    2 /*      $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $  */
    3 
    4 /*
    5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  * (c) UNIX System Laboratories, Inc.
    8  * All or some portions of this file are derived from material licensed
    9  * to the University of California by American Telephone and Telegraph
   10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   11  * the permission of UNIX System Laboratories, Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
   38  */
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/filedesc.h>
   43 #include <sys/kernel.h>
   44 #include <sys/malloc.h>
   45 #include <sys/mount.h>
   46 #include <sys/proc.h>
   47 #include <sys/exec.h>
   48 #include <sys/resourcevar.h>
   49 #include <sys/signalvar.h>
   50 #include <sys/vnode.h>
   51 #include <sys/file.h>
   52 #include <sys/acct.h>
   53 #include <sys/ktrace.h>
   54 #include <sys/sched.h>
   55 #include <dev/rndvar.h>
   56 #include <sys/pool.h>
   57 #include <sys/mman.h>
   58 #include <sys/ptrace.h>
   59 
   60 #include <sys/syscallargs.h>
   61 
   62 #include "systrace.h"
   63 #include <dev/systrace.h>
   64 
   65 #include <uvm/uvm_extern.h>
   66 #include <uvm/uvm_map.h>
   67 
   68 int     nprocs = 1;             /* process 0 */
   69 int     randompid;              /* when set to 1, pid's go random */
   70 pid_t   lastpid;
   71 struct  forkstat forkstat;
   72 
   73 void fork_return(void *);
   74 int pidtaken(pid_t);
   75 
   76 void process_new(struct proc *, struct proc *);
   77 
   78 void
   79 fork_return(void *arg)
   80 {
   81         struct proc *p = (struct proc *)arg;
   82 
   83         if (p->p_flag & P_TRACED)
   84                 psignal(p, SIGTRAP);
   85 
   86         child_return(p);
   87 }
   88 
   89 /*ARGSUSED*/
   90 int
   91 sys_fork(struct proc *p, void *v, register_t *retval)
   92 {
   93         int flags;
   94 
   95         flags = FORK_FORK;
   96         if (p->p_ptmask & PTRACE_FORK)
   97                 flags |= FORK_PTRACE;
   98         return (fork1(p, SIGCHLD, flags, NULL, 0,
   99             fork_return, NULL, retval, NULL));
  100 }
  101 
  102 /*ARGSUSED*/
  103 int
  104 sys_vfork(struct proc *p, void *v, register_t *retval)
  105 {
  106         return (fork1(p, SIGCHLD, FORK_VFORK|FORK_PPWAIT, NULL, 0, NULL,
  107             NULL, retval, NULL));
  108 }
  109 
  110 int
  111 sys_rfork(struct proc *p, void *v, register_t *retval)
  112 {
  113         struct sys_rfork_args /* {
  114                 syscallarg(int) flags;
  115         } */ *uap = v;
  116 
  117         int rforkflags;
  118         int flags;
  119 
  120         flags = FORK_RFORK;
  121         rforkflags = SCARG(uap, flags);
  122 
  123         if ((rforkflags & RFPROC) == 0)
  124                 return (EINVAL);
  125 
  126         switch(rforkflags & (RFFDG|RFCFDG)) {
  127         case (RFFDG|RFCFDG):
  128                 return EINVAL;
  129         case RFCFDG:
  130                 flags |= FORK_CLEANFILES;
  131                 break;
  132         case RFFDG:
  133                 break;
  134         default:
  135                 flags |= FORK_SHAREFILES;
  136                 break;
  137         }
  138 
  139         if (rforkflags & RFNOWAIT)
  140                 flags |= FORK_NOZOMBIE;
  141 
  142         if (rforkflags & RFMEM)
  143                 flags |= FORK_SHAREVM;
  144 #ifdef RTHREADS
  145         if (rforkflags & RFTHREAD)
  146                 flags |= FORK_THREAD;
  147 #endif
  148 
  149         return (fork1(p, SIGCHLD, flags, NULL, 0, NULL, NULL, retval, NULL));
  150 }
  151 
  152 /*
  153  * Allocate and initialize a new process.
  154  */
  155 void
  156 process_new(struct proc *newproc, struct proc *parent)
  157 {
  158         struct process *pr;
  159 
  160         pr = pool_get(&process_pool, PR_WAITOK);
  161         pr->ps_mainproc = newproc;
  162         TAILQ_INIT(&pr->ps_threads);
  163         TAILQ_INSERT_TAIL(&pr->ps_threads, newproc, p_thr_link);
  164         newproc->p_p = pr;
  165 }
  166 
  167 /* print the 'table full' message once per 10 seconds */
  168 struct timeval fork_tfmrate = { 10, 0 };
  169 
  170 int
  171 fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,
  172     void (*func)(void *), void *arg, register_t *retval,
  173     struct proc **rnewprocp)
  174 {
  175         struct proc *p2;
  176         uid_t uid;
  177         struct vmspace *vm;
  178         int count;
  179         vaddr_t uaddr;
  180         int s;
  181         extern void endtsleep(void *);
  182         extern void realitexpire(void *);
  183 
  184         /*
  185          * Although process entries are dynamically created, we still keep
  186          * a global limit on the maximum number we will create. We reserve
  187          * the last 5 processes to root. The variable nprocs is the current
  188          * number of processes, maxproc is the limit.
  189          */
  190         uid = p1->p_cred->p_ruid;
  191         if ((nprocs >= maxproc - 5 && uid != 0) || nprocs >= maxproc) {
  192                 static struct timeval lasttfm;
  193 
  194                 if (ratecheck(&lasttfm, &fork_tfmrate))
  195                         tablefull("proc");
  196                 return (EAGAIN);
  197         }
  198         nprocs++;
  199 
  200         /*
  201          * Increment the count of procs running with this uid. Don't allow
  202          * a nonprivileged user to exceed their current limit.
  203          */
  204         count = chgproccnt(uid, 1);
  205         if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
  206                 (void)chgproccnt(uid, -1);
  207                 nprocs--;
  208                 return (EAGAIN);
  209         }
  210 
  211         uaddr = uvm_km_alloc1(kernel_map, USPACE, USPACE_ALIGN, 1);
  212         if (uaddr == 0) {
  213                 chgproccnt(uid, -1);
  214                 nprocs--;
  215                 return (ENOMEM);
  216         }
  217 
  218         /*
  219          * From now on, we're committed to the fork and cannot fail.
  220          */
  221 
  222         /* Allocate new proc. */
  223         p2 = pool_get(&proc_pool, PR_WAITOK);
  224 
  225         p2->p_stat = SIDL;                      /* protect against others */
  226         p2->p_exitsig = exitsig;
  227         p2->p_forw = p2->p_back = NULL;
  228 
  229 #ifdef RTHREADS
  230         if (flags & FORK_THREAD) {
  231                 atomic_setbits_int(&p2->p_flag, P_THREAD);
  232                 p2->p_p = p1->p_p;
  233                 TAILQ_INSERT_TAIL(&p2->p_p->ps_threads, p2, p_thr_link);
  234         } else {
  235                 process_new(p2, p1);
  236         }
  237 #else
  238         process_new(p2, p1);
  239 #endif
  240 
  241         /*
  242          * Make a proc table entry for the new process.
  243          * Start by zeroing the section of proc that is zero-initialized,
  244          * then copy the section that is copied directly from the parent.
  245          */
  246         bzero(&p2->p_startzero,
  247             (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
  248         bcopy(&p1->p_startcopy, &p2->p_startcopy,
  249             (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
  250 
  251         /*
  252          * Initialize the timeouts.
  253          */
  254         timeout_set(&p2->p_sleep_to, endtsleep, p2);
  255         timeout_set(&p2->p_realit_to, realitexpire, p2);
  256 
  257         p2->p_cpu = p1->p_cpu;
  258 
  259         /*
  260          * Duplicate sub-structures as needed.
  261          * Increase reference counts on shared objects.
  262          * The p_stats and p_sigacts substructs are set in vm_fork.
  263          */
  264         p2->p_flag = 0;
  265         p2->p_emul = p1->p_emul;
  266         if (p1->p_flag & P_PROFIL)
  267                 startprofclock(p2);
  268         atomic_setbits_int(&p2->p_flag, p1->p_flag & (P_SUGID | P_SUGIDEXEC));
  269         if (flags & FORK_PTRACE)
  270                 atomic_setbits_int(&p2->p_flag, p1->p_flag & P_TRACED);
  271 #ifdef RTHREADS
  272         if (flags & FORK_THREAD) {
  273                 /* nothing */
  274         } else
  275 #endif
  276         {
  277                 p2->p_p->ps_cred = pool_get(&pcred_pool, PR_WAITOK);
  278                 bcopy(p1->p_p->ps_cred, p2->p_p->ps_cred, sizeof(*p2->p_p->ps_cred));
  279                 p2->p_p->ps_cred->p_refcnt = 1;
  280                 crhold(p1->p_ucred);
  281         }
  282 
  283         /* bump references to the text vnode (for procfs) */
  284         p2->p_textvp = p1->p_textvp;
  285         if (p2->p_textvp)
  286                 VREF(p2->p_textvp);
  287 
  288         if (flags & FORK_CLEANFILES)
  289                 p2->p_fd = fdinit(p1);
  290         else if (flags & FORK_SHAREFILES)
  291                 p2->p_fd = fdshare(p1);
  292         else
  293                 p2->p_fd = fdcopy(p1);
  294 
  295         /*
  296          * If ps_limit is still copy-on-write, bump refcnt,
  297          * otherwise get a copy that won't be modified.
  298          * (If PL_SHAREMOD is clear, the structure is shared
  299          * copy-on-write.)
  300          */
  301 #ifdef RTHREADS
  302         if (flags & FORK_THREAD) {
  303                 /* nothing */
  304         } else
  305 #endif
  306         {
  307                 if (p1->p_p->ps_limit->p_lflags & PL_SHAREMOD)
  308                         p2->p_p->ps_limit = limcopy(p1->p_p->ps_limit);
  309                 else {
  310                         p2->p_p->ps_limit = p1->p_p->ps_limit;
  311                         p2->p_p->ps_limit->p_refcnt++;
  312                 }
  313         }
  314 
  315         if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
  316                 atomic_setbits_int(&p2->p_flag, P_CONTROLT);
  317         if (flags & FORK_PPWAIT)
  318                 atomic_setbits_int(&p2->p_flag, P_PPWAIT);
  319         p2->p_pptr = p1;
  320         if (flags & FORK_NOZOMBIE)
  321                 atomic_setbits_int(&p2->p_flag, P_NOZOMBIE);
  322         LIST_INIT(&p2->p_children);
  323 
  324 #ifdef KTRACE
  325         /*
  326          * Copy traceflag and tracefile if enabled.
  327          * If not inherited, these were zeroed above.
  328          */
  329         if (p1->p_traceflag & KTRFAC_INHERIT) {
  330                 p2->p_traceflag = p1->p_traceflag;
  331                 if ((p2->p_tracep = p1->p_tracep) != NULL)
  332                         VREF(p2->p_tracep);
  333         }
  334 #endif
  335 
  336         /*
  337          * set priority of child to be that of parent
  338          * XXX should move p_estcpu into the region of struct proc which gets
  339          * copied.
  340          */
  341         scheduler_fork_hook(p1, p2);
  342 
  343         /*
  344          * Create signal actions for the child process.
  345          */
  346         if (flags & FORK_SIGHAND)
  347                 sigactsshare(p1, p2);
  348         else
  349                 p2->p_sigacts = sigactsinit(p1);
  350 
  351         /*
  352          * If emulation has process fork hook, call it now.
  353          */
  354         if (p2->p_emul->e_proc_fork)
  355                 (*p2->p_emul->e_proc_fork)(p2, p1);
  356 
  357         p2->p_addr = (struct user *)uaddr;
  358 
  359         /*
  360          * Finish creating the child process.  It will return through a
  361          * different path later.
  362          */
  363         uvm_fork(p1, p2, ((flags & FORK_SHAREVM) ? TRUE : FALSE), stack,
  364             stacksize, func ? func : child_return, arg ? arg : p2);
  365 
  366         timeout_set(&p2->p_stats->p_virt_to, virttimer_trampoline, p2);
  367         timeout_set(&p2->p_stats->p_prof_to, proftimer_trampoline, p2);
  368 
  369         vm = p2->p_vmspace;
  370 
  371         if (flags & FORK_FORK) {
  372                 forkstat.cntfork++;
  373                 forkstat.sizfork += vm->vm_dsize + vm->vm_ssize;
  374         } else if (flags & FORK_VFORK) {
  375                 forkstat.cntvfork++;
  376                 forkstat.sizvfork += vm->vm_dsize + vm->vm_ssize;
  377         } else if (flags & FORK_RFORK) {
  378                 forkstat.cntrfork++;
  379                 forkstat.sizrfork += vm->vm_dsize + vm->vm_ssize;
  380         } else {
  381                 forkstat.cntkthread++;
  382                 forkstat.sizkthread += vm->vm_dsize + vm->vm_ssize;
  383         }
  384 
  385         /* Find an unused pid satisfying 1 <= lastpid <= PID_MAX */
  386         do {
  387                 lastpid = 1 + (randompid ? arc4random() : lastpid) % PID_MAX;
  388         } while (pidtaken(lastpid));
  389         p2->p_pid = lastpid;
  390 
  391         LIST_INSERT_HEAD(&allproc, p2, p_list);
  392         LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
  393         LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
  394         LIST_INSERT_AFTER(p1, p2, p_pglist);
  395         if (p2->p_flag & P_TRACED) {
  396                 p2->p_oppid = p1->p_pid;
  397                 if (p2->p_pptr != p1->p_pptr)
  398                         proc_reparent(p2, p1->p_pptr);
  399 
  400                 /*
  401                  * Set ptrace status.
  402                  */
  403                 if (flags & FORK_FORK) {
  404                         p2->p_ptstat = malloc(sizeof(*p2->p_ptstat),
  405                             M_SUBPROC, M_WAITOK);
  406                         p1->p_ptstat->pe_report_event = PTRACE_FORK;
  407                         p2->p_ptstat->pe_report_event = PTRACE_FORK;
  408                         p1->p_ptstat->pe_other_pid = p2->p_pid;
  409                         p2->p_ptstat->pe_other_pid = p1->p_pid;
  410                 }
  411         }
  412 
  413 #if NSYSTRACE > 0
  414         if (ISSET(p1->p_flag, P_SYSTRACE))
  415                 systrace_fork(p1, p2);
  416 #endif
  417 
  418         /*
  419          * Make child runnable, set start time, and add to run queue.
  420          */
  421         SCHED_LOCK(s);
  422         getmicrotime(&p2->p_stats->p_start);
  423         p2->p_acflag = AFORK;
  424         p2->p_stat = SRUN;
  425         setrunqueue(p2);
  426         SCHED_UNLOCK(s);
  427 
  428         /*
  429          * Notify any interested parties about the new process.
  430          */
  431         KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
  432 
  433         /*
  434          * Update stats now that we know the fork was successfull.
  435          */
  436         uvmexp.forks++;
  437         if (flags & FORK_PPWAIT)
  438                 uvmexp.forks_ppwait++;
  439         if (flags & FORK_SHAREVM)
  440                 uvmexp.forks_sharevm++;
  441 
  442         /*
  443          * Pass a pointer to the new process to the caller.
  444          */
  445         if (rnewprocp != NULL)
  446                 *rnewprocp = p2;
  447 
  448         /*
  449          * Preserve synchronization semantics of vfork.  If waiting for
  450          * child to exec or exit, set P_PPWAIT on child, and sleep on our
  451          * proc (in case of exit).
  452          */
  453         if (flags & FORK_PPWAIT)
  454                 while (p2->p_flag & P_PPWAIT)
  455                         tsleep(p1, PWAIT, "ppwait", 0);
  456 
  457         /*
  458          * If we're tracing the child, alert the parent too.
  459          */
  460         if ((flags & FORK_PTRACE) && (p1->p_flag & P_TRACED))
  461                 psignal(p1, SIGTRAP);
  462 
  463         /*
  464          * Return child pid to parent process,
  465          * marking us as parent via retval[1].
  466          */
  467         if (retval != NULL) {
  468                 retval[0] = p2->p_pid;
  469                 retval[1] = 0;
  470         }
  471         return (0);
  472 }
  473 
  474 /*
  475  * Checks for current use of a pid, either as a pid or pgid.
  476  */
  477 int
  478 pidtaken(pid_t pid)
  479 {
  480         struct proc *p;
  481 
  482         if (pfind(pid) != NULL)
  483                 return (1);
  484         if (pgfind(pid) != NULL)
  485                 return (1);
  486         LIST_FOREACH(p, &zombproc, p_list)
  487                 if (p->p_pid == pid || p->p_pgid == pid)
  488                         return (1);
  489         return (0);
  490 }
  491 
  492 #if defined(MULTIPROCESSOR)
  493 /*
  494  * XXX This is a slight hack to get newly-formed processes to
  495  * XXX acquire the kernel lock as soon as they run.
  496  */
  497 void
  498 proc_trampoline_mp(void)
  499 {
  500         struct proc *p;
  501 
  502         p = curproc;
  503 
  504         SCHED_ASSERT_UNLOCKED();
  505         KERNEL_PROC_LOCK(p);
  506 }
  507 #endif

/* [<][>][^][v][top][bottom][index][help] */