root/arch/i386/i386/trap.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. userret
  2. trap
  3. syscall
  4. child_return

    1 /*      $OpenBSD: trap.c,v 1.85 2007/06/26 13:39:02 tom Exp $   */
    2 /*      $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $        */
    3 
    4 /*-
    5  * Copyright (c) 1995 Charles M. Hannum.  All rights reserved.
    6  * Copyright (c) 1990 The Regents of the University of California.
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the University of Utah, and William Jolitz.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)trap.c      7.4 (Berkeley) 5/13/91
   37  */
   38 
   39 /*
   40  * 386 Trap and System call handling
   41  */
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/proc.h>
   46 #include <sys/signalvar.h>
   47 #include <sys/user.h>
   48 #include <sys/acct.h>
   49 #include <sys/kernel.h>
   50 #include <sys/signal.h>
   51 #ifdef KTRACE
   52 #include <sys/ktrace.h>
   53 #endif
   54 #include <sys/syscall.h>
   55 
   56 #include "systrace.h"
   57 #include <dev/systrace.h>
   58 
   59 #include <uvm/uvm_extern.h>
   60 
   61 #include <machine/cpu.h>
   62 #include <machine/cpufunc.h>
   63 #include <machine/psl.h>
   64 #include <machine/reg.h>
   65 #include <machine/trap.h>
   66 #ifdef DDB
   67 #include <machine/db_machdep.h>
   68 #endif
   69 
   70 #ifdef KGDB
   71 #include <sys/kgdb.h>
   72 #endif
   73 
   74 #ifdef COMPAT_IBCS2
   75 #include <compat/ibcs2/ibcs2_errno.h>
   76 #include <compat/ibcs2/ibcs2_exec.h>
   77 extern struct emul emul_ibcs2;
   78 #endif
   79 #include <sys/exec.h>
   80 #ifdef COMPAT_LINUX
   81 #include <compat/linux/linux_syscall.h>
   82 extern struct emul emul_linux_aout, emul_linux_elf;
   83 #endif
   84 #ifdef COMPAT_FREEBSD
   85 extern struct emul emul_freebsd_aout, emul_freebsd_elf;
   86 #endif
   87 #ifdef COMPAT_BSDOS
   88 extern struct emul emul_bsdos;
   89 #endif
   90 #ifdef COMPAT_AOUT
   91 extern struct emul emul_aout;
   92 #endif
   93 #ifdef KVM86
   94 #include <machine/kvm86.h>
   95 #define KVM86MODE (kvm86_incall)
   96 #endif
   97 
   98 #include "npx.h"
   99 
  100 static __inline void userret(struct proc *);
  101 void trap(struct trapframe);
  102 void syscall(struct trapframe);
  103 
  104 /*
  105  * Define the code needed before returning to user mode, for
  106  * trap and syscall.
  107  */
  108 static __inline void
  109 userret(struct proc *p)
  110 {
  111         int sig;
  112 
  113         /* take pending signals */
  114         while ((sig = CURSIG(p)) != 0)
  115                 postsig(sig);
  116 
  117         p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri;
  118 }
  119 
  120 char    *trap_type[] = {
  121         "privileged instruction fault",         /*  0 T_PRIVINFLT */
  122         "breakpoint trap",                      /*  1 T_BPTFLT */
  123         "arithmetic trap",                      /*  2 T_ARITHTRAP */
  124         "asynchronous system trap",             /*  3 T_ASTFLT */
  125         "protection fault",                     /*  4 T_PROTFLT */
  126         "trace trap",                           /*  5 T_TRCTRAP */
  127         "page fault",                           /*  6 T_PAGEFLT */
  128         "alignment fault",                      /*  7 T_ALIGNFLT */
  129         "integer divide fault",                 /*  8 T_DIVIDE */
  130         "non-maskable interrupt",               /*  9 T_NMI */
  131         "overflow trap",                        /* 10 T_OFLOW */
  132         "bounds check fault",                   /* 11 T_BOUND */
  133         "FPU not available fault",              /* 12 T_DNA */
  134         "double fault",                         /* 13 T_DOUBLEFLT */
  135         "FPU operand fetch fault",              /* 14 T_FPOPFLT (![P]Pro) */
  136         "invalid TSS fault",                    /* 15 T_TSSFLT */
  137         "segment not present fault",            /* 16 T_SEGNPFLT */
  138         "stack fault",                          /* 17 T_STKFLT */
  139         "machine check",                        /* 18 T_MACHK ([P]Pro) */
  140         "SIMD FP fault",                        /* 19 T_XFTRAP */
  141         "reserved trap",                        /* 20 T_RESERVED */
  142 };
  143 int     trap_types = sizeof trap_type / sizeof trap_type[0];
  144 
  145 #ifdef DEBUG
  146 int     trapdebug = 0;
  147 #endif
  148 
  149 /*
  150  * trap(frame):
  151  *      Exception, fault, and trap interface to BSD kernel. This
  152  * common code is called from assembly language IDT gate entry
  153  * routines that prepare a suitable stack frame, and restore this
  154  * frame after the exception has been processed. Note that the
  155  * effect is as if the arguments were passed call by reference.
  156  */
  157 /*ARGSUSED*/
  158 void
  159 trap(struct trapframe frame)
  160 {
  161         struct proc *p = curproc;
  162         int type = frame.tf_trapno;
  163         struct pcb *pcb = NULL;
  164         extern char resume_iret[], resume_pop_ds[], resume_pop_es[],
  165             resume_pop_fs[], resume_pop_gs[];
  166         struct trapframe *vframe;
  167         int resume;
  168         vm_prot_t vftype, ftype;
  169         union sigval sv;
  170         caddr_t onfault;
  171         uint32_t cr2;
  172 
  173         uvmexp.traps++;
  174 
  175         /* SIGSEGV and SIGBUS need this */
  176         if (frame.tf_err & PGEX_W) {
  177                 vftype = VM_PROT_WRITE;
  178                 ftype = VM_PROT_READ | VM_PROT_WRITE;
  179         } else
  180                 ftype = vftype = VM_PROT_READ;
  181 
  182 #ifdef DEBUG
  183         if (trapdebug) {
  184                 printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
  185                     frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs,
  186                     frame.tf_eflags, rcr2(), lapic_tpr);
  187                 printf("curproc %p\n", curproc);
  188         }
  189 #endif
  190 
  191         if (!KERNELMODE(frame.tf_cs, frame.tf_eflags)) {
  192                 type |= T_USER;
  193                 p->p_md.md_regs = &frame;
  194         }
  195 
  196         switch (type) {
  197 
  198         /* trace trap */
  199         case T_TRCTRAP: {
  200 #if defined(DDB) || defined(KGDB)
  201                 /* Make sure nobody is single stepping into kernel land.
  202                  * The syscall has to turn off the trace bit itself.  The
  203                  * easiest way, is to simply not call the debugger, until
  204                  * we are through the problematic "osyscall" stub.  This
  205                  * is a hack, but it does seem to work.
  206                  */
  207                 extern int Xosyscall, Xosyscall_end;
  208 
  209                 if (frame.tf_eip >= (int)&Xosyscall &&
  210                     frame.tf_eip <= (int)&Xosyscall_end)
  211                         return;
  212 #else
  213                 return; /* Just return if no DDB */
  214 #endif
  215         }
  216         /* FALLTHROUGH */
  217 
  218         default:
  219         we_re_toast:
  220 #ifdef KGDB
  221                 if (kgdb_trap(type, &frame))
  222                         return;
  223                 else {
  224                         /*
  225                          * If this is a breakpoint, don't panic
  226                          * if we're not connected.
  227                          */
  228                         if (type == T_BPTFLT) {
  229                                 printf("kgdb: ignored %s\n", trap_type[type]);
  230                                 return;
  231                         }
  232                 }
  233 #endif
  234 
  235 #ifdef DDB
  236                 if (kdb_trap(type, 0, &frame))
  237                         return;
  238 #endif
  239                 if (frame.tf_trapno < trap_types)
  240                         printf("fatal %s (%d)", trap_type[frame.tf_trapno],
  241                                 frame.tf_trapno);
  242                 else
  243                         printf("unknown trap %d", frame.tf_trapno);
  244                 printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
  245                 printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
  246                     type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), lapic_tpr);
  247 
  248                 panic("trap type %d, code=%x, pc=%x",
  249                     type, frame.tf_err, frame.tf_eip);
  250                 /*NOTREACHED*/
  251 
  252         case T_PROTFLT:
  253 #ifdef KVM86
  254                 if (KVM86MODE) {
  255                         kvm86_gpfault(&frame);
  256                         return;
  257                 }
  258 #endif
  259         case T_SEGNPFLT:
  260         case T_ALIGNFLT:
  261                 /* Check for copyin/copyout fault. */
  262                 if (p && p->p_addr) {
  263                         pcb = &p->p_addr->u_pcb;
  264                         if (pcb->pcb_onfault != 0) {
  265                         copyfault:
  266                                 frame.tf_eip = (int)pcb->pcb_onfault;
  267                                 return;
  268                         }
  269                 }
  270 
  271                 /*
  272                  * Check for failure during return to user mode.
  273                  *
  274                  * We do this by looking at the instruction we faulted on.  The
  275                  * specific instructions we recognize only happen when
  276                  * returning from a trap, syscall, or interrupt.
  277                  *
  278                  * XXX
  279                  * The heuristic used here will currently fail for the case of
  280                  * one of the 2 pop instructions faulting when returning from a
  281                  * a fast interrupt.  This should not be possible.  It can be
  282                  * fixed by rearranging the trap frame so that the stack format
  283                  * at this point is the same as on exit from a `slow'
  284                  * interrupt.
  285                  */
  286                 switch (*(u_char *)frame.tf_eip) {
  287                 case 0xcf:      /* iret */
  288                         vframe = (void *)((int)&frame.tf_esp -
  289                             offsetof(struct trapframe, tf_eip));
  290                         resume = (int)resume_iret;
  291                         break;
  292                 case 0x1f:      /* popl %ds */
  293                         vframe = (void *)((int)&frame.tf_esp -
  294                             offsetof(struct trapframe, tf_ds));
  295                         resume = (int)resume_pop_ds;
  296                         break;
  297                 case 0x07:      /* popl %es */
  298                         vframe = (void *)((int)&frame.tf_esp -
  299                             offsetof(struct trapframe, tf_es));
  300                         resume = (int)resume_pop_es;
  301                         break;
  302                 case 0x0f:      /* 0x0f prefix */
  303                         switch (*(u_char *)(frame.tf_eip+1)) {
  304                         case 0xa1:              /* popl %fs */
  305                                 vframe = (void *)((int)&frame.tf_esp -
  306                                     offsetof(struct trapframe, tf_fs));
  307                                 resume = (int)resume_pop_fs;
  308                                 break;
  309                         case 0xa9:              /* popl %gs */
  310                                 vframe = (void *)((int)&frame.tf_esp -
  311                                     offsetof(struct trapframe, tf_gs));
  312                                 resume = (int)resume_pop_gs;
  313                                 break;
  314                         default:
  315                                 goto we_re_toast;
  316                         }
  317                         break;
  318                 default:
  319                         goto we_re_toast;
  320                 }
  321                 if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
  322                         goto we_re_toast;
  323 
  324                 frame.tf_eip = resume;
  325                 return;
  326 
  327         case T_PROTFLT|T_USER:          /* protection fault */
  328                 KERNEL_PROC_LOCK(p);
  329 #ifdef VM86
  330                 if (frame.tf_eflags & PSL_VM) {
  331                         vm86_gpfault(p, type & ~T_USER);
  332                         KERNEL_PROC_UNLOCK(p);
  333                         goto out;
  334                 }
  335 #endif
  336                 /* If pmap_exec_fixup does something, let's retry the trap. */
  337                 if (pmap_exec_fixup(&p->p_vmspace->vm_map, &frame,
  338                     &p->p_addr->u_pcb)) {
  339                         KERNEL_PROC_UNLOCK(p);
  340                         goto out;
  341                 }
  342 
  343                 sv.sival_int = frame.tf_eip;
  344                 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
  345                 KERNEL_PROC_UNLOCK(p);
  346                 goto out;
  347 
  348         case T_TSSFLT|T_USER:
  349                 sv.sival_int = frame.tf_eip;
  350                 KERNEL_PROC_LOCK(p);
  351                 trapsignal(p, SIGBUS, vftype, BUS_OBJERR, sv);
  352                 KERNEL_PROC_UNLOCK(p);
  353                 goto out;
  354 
  355         case T_SEGNPFLT|T_USER:
  356         case T_STKFLT|T_USER:
  357                 sv.sival_int = frame.tf_eip;
  358                 KERNEL_PROC_LOCK(p);
  359                 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
  360                 KERNEL_PROC_UNLOCK(p);
  361                 goto out;
  362 
  363         case T_ALIGNFLT|T_USER:
  364                 sv.sival_int = frame.tf_eip;
  365                 KERNEL_PROC_LOCK(p);
  366                 trapsignal(p, SIGBUS, vftype, BUS_ADRALN, sv);
  367                 KERNEL_PROC_UNLOCK(p);
  368                 goto out;
  369 
  370         case T_PRIVINFLT|T_USER:        /* privileged instruction fault */
  371                 sv.sival_int = frame.tf_eip;
  372                 KERNEL_PROC_LOCK(p);
  373                 trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv);
  374                 KERNEL_PROC_UNLOCK(p);
  375                 goto out;
  376 
  377         case T_FPOPFLT|T_USER:          /* coprocessor operand fault */
  378                 sv.sival_int = frame.tf_eip;
  379                 KERNEL_PROC_LOCK(p);
  380                 trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv);
  381                 KERNEL_PROC_UNLOCK(p);
  382                 goto out;
  383 
  384         case T_ASTFLT|T_USER:           /* Allow process switch */
  385                 uvmexp.softs++;
  386                 if (p->p_flag & P_OWEUPC) {
  387                         KERNEL_PROC_LOCK(p);
  388                         ADDUPROF(p);
  389                         KERNEL_PROC_UNLOCK(p);
  390                 }
  391                 if (want_resched)
  392                         preempt(NULL);
  393                 goto out;
  394 
  395         case T_DNA|T_USER: {
  396                 printf("pid %d killed due to lack of floating point\n",
  397                     p->p_pid);
  398                 sv.sival_int = frame.tf_eip;
  399                 KERNEL_PROC_LOCK(p);
  400                 trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv);
  401                 KERNEL_PROC_UNLOCK(p);
  402                 goto out;
  403         }
  404 
  405         case T_BOUND|T_USER:
  406                 sv.sival_int = frame.tf_eip;
  407                 KERNEL_PROC_LOCK(p);
  408                 trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
  409                 KERNEL_PROC_UNLOCK(p);
  410                 goto out;
  411         case T_OFLOW|T_USER:
  412                 sv.sival_int = frame.tf_eip;
  413                 KERNEL_PROC_LOCK(p);
  414                 trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
  415                 KERNEL_PROC_UNLOCK(p);
  416                 goto out;
  417         case T_DIVIDE|T_USER:
  418                 sv.sival_int = frame.tf_eip;
  419                 KERNEL_PROC_LOCK(p);
  420                 trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv);
  421                 KERNEL_PROC_UNLOCK(p);
  422                 goto out;
  423 
  424         case T_ARITHTRAP|T_USER:
  425                 sv.sival_int = frame.tf_eip;
  426                 KERNEL_PROC_LOCK(p);
  427                 trapsignal(p, SIGFPE, frame.tf_err, FPE_INTOVF, sv);
  428                 KERNEL_PROC_UNLOCK(p);
  429                 goto out;
  430 
  431         case T_PAGEFLT:                 /* allow page faults in kernel mode */
  432                 if (p == 0 || p->p_addr == 0)
  433                         goto we_re_toast;
  434 #ifdef LOCKDEBUG
  435                 /* If we page-fault while in scheduler, we're doomed. */
  436 #ifdef notyet
  437                 if (simple_lock_held(&sched_lock))
  438 #else
  439                 if (__mp_lock_held(&sched_lock))
  440 #endif
  441                         goto we_re_toast;
  442 #endif
  443 
  444                 pcb = &p->p_addr->u_pcb;
  445 #if 0
  446                 /* XXX - check only applies to 386's and 486's with WP off */
  447                 if (frame.tf_err & PGEX_P)
  448                         goto we_re_toast;
  449 #endif
  450                 cr2 = rcr2();
  451                 KERNEL_LOCK();
  452                 goto faultcommon;
  453 
  454         case T_PAGEFLT|T_USER: {        /* page fault */
  455                 vaddr_t va, fa;
  456                 struct vmspace *vm;
  457                 struct vm_map *map;
  458                 int rv;
  459 
  460                 cr2 = rcr2();
  461                 KERNEL_PROC_LOCK(p);
  462         faultcommon:
  463                 vm = p->p_vmspace;
  464                 if (vm == NULL)
  465                         goto we_re_toast;
  466                 fa = (vaddr_t)cr2;
  467                 va = trunc_page(fa);
  468                 /*
  469                  * It is only a kernel address space fault iff:
  470                  *      1. (type & T_USER) == 0  and
  471                  *      2. pcb_onfault not set or
  472                  *      3. pcb_onfault set but supervisor space fault
  473                  * The last can occur during an exec() copyin where the
  474                  * argument space is lazy-allocated.
  475                  */
  476                 if (type == T_PAGEFLT && va >= KERNBASE)
  477                         map = kernel_map;
  478                 else
  479                         map = &vm->vm_map;
  480 
  481 #ifdef DIAGNOSTIC
  482                 if (map == kernel_map && va == 0) {
  483                         printf("trap: bad kernel access at %lx\n", va);
  484                         goto we_re_toast;
  485                 }
  486 #endif
  487 
  488                 onfault = p->p_addr->u_pcb.pcb_onfault;
  489                 p->p_addr->u_pcb.pcb_onfault = NULL;
  490                 rv = uvm_fault(map, va, 0, ftype);
  491                 p->p_addr->u_pcb.pcb_onfault = onfault;
  492 
  493                 if (rv == 0) {
  494                         if (map != kernel_map)
  495                                 uvm_grow(p, va);
  496                         if (type == T_PAGEFLT) {
  497                                 KERNEL_UNLOCK();
  498                                 return;
  499                         }
  500                         KERNEL_PROC_UNLOCK(p);
  501                         goto out;
  502                 }
  503 
  504                 if (type == T_PAGEFLT) {
  505                         if (pcb->pcb_onfault != 0) {
  506                                 KERNEL_UNLOCK();
  507                                 goto copyfault;
  508                         }
  509                         printf("uvm_fault(%p, 0x%lx, 0, %d) -> %x\n",
  510                             map, va, ftype, rv);
  511                         goto we_re_toast;
  512                 }
  513                 sv.sival_int = fa;
  514                 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
  515                 KERNEL_PROC_UNLOCK(p);
  516                 break;
  517         }
  518 
  519 #if 0  /* Should this be left out?  */
  520 #if !defined(DDB) && !defined(KGDB)
  521         /* XXX need to deal with this when DDB is present, too */
  522         case T_TRCTRAP: /* kernel trace trap; someone single stepping lcall's */
  523                         /* syscall has to turn off the trace bit itself */
  524                 return;
  525 #endif
  526 #endif
  527 
  528         case T_BPTFLT|T_USER:           /* bpt instruction fault */
  529                 sv.sival_int = rcr2();
  530                 KERNEL_PROC_LOCK(p);
  531                 trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv);
  532                 KERNEL_PROC_UNLOCK(p);
  533                 break;
  534         case T_TRCTRAP|T_USER:          /* trace trap */
  535                 sv.sival_int = rcr2();
  536                 KERNEL_PROC_LOCK(p);
  537                 trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv);
  538                 KERNEL_PROC_UNLOCK(p);
  539                 break;
  540 
  541 #if     NISA > 0
  542         case T_NMI:
  543         case T_NMI|T_USER:
  544 #if defined(DDB) || defined(KGDB)
  545                 /* NMI can be hooked up to a pushbutton for debugging */
  546                 printf ("NMI ... going to debugger\n");
  547 #ifdef KGDB
  548                 if (kgdb_trap(type, &frame))
  549                         return;
  550 #endif
  551 #ifdef DDB
  552                 if (kdb_trap(type, 0, &frame))
  553                         return;
  554 #endif
  555                         return;
  556 #endif /* DDB || KGDB */
  557                 /* machine/parity/power fail/"kitchen sink" faults */
  558                 if (isa_nmi() == 0)
  559                         return;
  560                 else
  561                         goto we_re_toast;
  562 #endif
  563         }
  564 
  565         if ((type & T_USER) == 0)
  566                 return;
  567 out:
  568         userret(p);
  569 }
  570 
  571 /*
  572  * syscall(frame):
  573  *      System call request from POSIX system call gate interface to kernel.
  574  * Like trap(), argument is call by reference.
  575  */
  576 /*ARGSUSED*/
  577 void
  578 syscall(struct trapframe frame)
  579 {
  580         caddr_t params;
  581         struct sysent *callp;
  582         struct proc *p;
  583         int orig_error, error, opc, nsys;
  584         size_t argsize;
  585         register_t code, args[8], rval[2];
  586 #ifdef DIAGNOSTIC
  587         int ocpl = lapic_tpr;
  588 #endif
  589 
  590         uvmexp.syscalls++;
  591 #ifdef DIAGNOSTIC
  592         if (!USERMODE(frame.tf_cs, frame.tf_eflags))
  593                 panic("syscall");
  594 #endif
  595         p = curproc;
  596         p->p_md.md_regs = &frame;
  597         opc = frame.tf_eip;
  598         code = frame.tf_eax;
  599 
  600         nsys = p->p_emul->e_nsysent;
  601         callp = p->p_emul->e_sysent;
  602 
  603 #ifdef COMPAT_IBCS2
  604         if (p->p_emul == &emul_ibcs2)
  605                 if (IBCS2_HIGH_SYSCALL(code))
  606                         code = IBCS2_CVT_HIGH_SYSCALL(code);
  607 #endif
  608         params = (caddr_t)frame.tf_esp + sizeof(int);
  609 
  610 #ifdef VM86
  611         /*
  612          * VM86 mode application found our syscall trap gate by accident; let
  613          * it get a SIGSYS and have the VM86 handler in the process take care
  614          * of it.
  615          */
  616         if (frame.tf_eflags & PSL_VM)
  617                 code = -1;
  618         else
  619 #endif
  620 
  621         switch (code) {
  622         case SYS_syscall:
  623 #ifdef COMPAT_LINUX
  624                 /* Linux has a special system setup call as number 0 */
  625                 if (p->p_emul == &emul_linux_aout ||
  626                     p->p_emul == &emul_linux_elf)
  627                         break;
  628 #endif
  629                 /*
  630                  * Code is first argument, followed by actual args.
  631                  */
  632                 copyin(params, &code, sizeof(int));
  633                 params += sizeof(int);
  634                 break;
  635         case SYS___syscall:
  636                 /*
  637                  * Like syscall, but code is a quad, so as to maintain
  638                  * quad alignment for the rest of the arguments.
  639                  */
  640                 if (callp != sysent
  641 #ifdef COMPAT_FREEBSD
  642                     && p->p_emul != &emul_freebsd_aout
  643                     && p->p_emul != &emul_freebsd_elf
  644 #endif
  645 #ifdef COMPAT_AOUT
  646                     && p->p_emul != &emul_aout
  647 #endif
  648 #ifdef COMPAT_BSDOS
  649                     && p->p_emul != &emul_bsdos
  650 #endif
  651                     )
  652                         break;
  653                 copyin(params + _QUAD_LOWWORD * sizeof(int), &code, sizeof(int));
  654                 params += sizeof(quad_t);
  655                 break;
  656         default:
  657                 break;
  658         }
  659         if (code < 0 || code >= nsys)
  660                 callp += p->p_emul->e_nosys;            /* illegal */
  661         else
  662                 callp += code;
  663         argsize = callp->sy_argsize;
  664 #ifdef COMPAT_LINUX
  665         /* XXX extra if() for every emul type.. */
  666         if (p->p_emul == &emul_linux_aout || p->p_emul == &emul_linux_elf) {
  667                 /*
  668                  * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in
  669                  * increasing order.
  670                  */
  671                 switch (argsize) {
  672                 case 24:
  673                         args[5] = frame.tf_ebp;
  674                 case 20:
  675                         args[4] = frame.tf_edi;
  676                 case 16:
  677                         args[3] = frame.tf_esi;
  678                 case 12:
  679                         args[2] = frame.tf_edx;
  680                 case 8:
  681                         args[1] = frame.tf_ecx;
  682                 case 4:
  683                         args[0] = frame.tf_ebx;
  684                 case 0:
  685                         break;
  686                 default:
  687                         panic("linux syscall with weird argument size %d",
  688                             argsize);
  689                         break;
  690                 }
  691                 error = 0;
  692         }
  693         else
  694 #endif
  695         if (argsize)
  696                 error = copyin(params, (caddr_t)args, argsize);
  697         else
  698                 error = 0;
  699         orig_error = error;
  700         KERNEL_PROC_LOCK(p);
  701 #ifdef SYSCALL_DEBUG
  702         scdebug_call(p, code, args);
  703 #endif
  704 #ifdef KTRACE
  705         if (KTRPOINT(p, KTR_SYSCALL))
  706                 ktrsyscall(p, code, argsize, args);
  707 #endif
  708         if (error) {
  709                 KERNEL_PROC_UNLOCK(p);
  710                 goto bad;
  711         }
  712         rval[0] = 0;
  713         rval[1] = frame.tf_edx;
  714 #if NSYSTRACE > 0
  715         if (ISSET(p->p_flag, P_SYSTRACE))
  716                 orig_error = error = systrace_redirect(code, p, args, rval);
  717         else
  718 #endif
  719                 orig_error = error = (*callp->sy_call)(p, args, rval);
  720         KERNEL_PROC_UNLOCK(p);
  721         switch (error) {
  722         case 0:
  723                 frame.tf_eax = rval[0];
  724                 frame.tf_edx = rval[1];
  725                 frame.tf_eflags &= ~PSL_C;      /* carry bit */
  726                 break;
  727         case ERESTART:
  728                 /*
  729                  * The offset to adjust the PC by depends on whether we entered
  730                  * the kernel through the trap or call gate.  We pushed the
  731                  * size of the instruction into tf_err on entry.
  732                  */
  733                 frame.tf_eip = opc - frame.tf_err;
  734                 break;
  735         case EJUSTRETURN:
  736                 /* nothing to do */
  737                 break;
  738         default:
  739         bad:
  740                 if (p->p_emul->e_errno)
  741                         error = p->p_emul->e_errno[error];
  742                 frame.tf_eax = error;
  743                 frame.tf_eflags |= PSL_C;       /* carry bit */
  744                 break;
  745         }
  746 
  747 #ifdef SYSCALL_DEBUG
  748         KERNEL_PROC_LOCK(p);
  749         scdebug_ret(p, code, orig_error, rval);
  750         KERNEL_PROC_UNLOCK(p);
  751 #endif
  752         userret(p);
  753 #ifdef KTRACE
  754         if (KTRPOINT(p, KTR_SYSRET)) {
  755                 KERNEL_PROC_LOCK(p);
  756                 ktrsysret(p, code, orig_error, rval[0]);
  757                 KERNEL_PROC_UNLOCK(p);
  758         }
  759 #endif
  760 #ifdef DIAGNOSTIC
  761         if (lapic_tpr != ocpl) {
  762                 printf("WARNING: SPL (0x%x) NOT LOWERED ON "
  763                     "syscall(0x%x, 0x%x, 0x%x, 0x%x...) EXIT, PID %d\n",
  764                     lapic_tpr, code, args[0], args[1], args[2], p->p_pid);
  765                 lapic_tpr = ocpl;
  766         }
  767 #endif
  768 }
  769 
  770 void
  771 child_return(void *arg)
  772 {
  773         struct proc *p = (struct proc *)arg;
  774         struct trapframe *tf = p->p_md.md_regs;
  775 
  776         tf->tf_eax = 0;
  777         tf->tf_eflags &= ~PSL_C;
  778 
  779         KERNEL_PROC_UNLOCK(p);
  780 
  781         userret(p);
  782 #ifdef KTRACE
  783         if (KTRPOINT(p, KTR_SYSRET)) {
  784                 KERNEL_PROC_LOCK(p);
  785                 ktrsysret(p,
  786                     (p->p_flag & P_PPWAIT) ? SYS_vfork : SYS_fork, 0, 0);
  787                 KERNEL_PROC_UNLOCK(p);
  788         }
  789 #endif
  790 }

/* [<][>][^][v][top][bottom][index][help] */