1 /* $OpenBSD: trap.c,v 1.85 2007/06/26 13:39:02 tom Exp $ */
2 /* $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $ */
3
4 /*-
5 * Copyright (c) 1995 Charles M. Hannum. All rights reserved.
6 * Copyright (c) 1990 The Regents of the University of California.
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the University of Utah, and William Jolitz.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)trap.c 7.4 (Berkeley) 5/13/91
37 */
38
39 /*
40 * 386 Trap and System call handling
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/signalvar.h>
47 #include <sys/user.h>
48 #include <sys/acct.h>
49 #include <sys/kernel.h>
50 #include <sys/signal.h>
51 #ifdef KTRACE
52 #include <sys/ktrace.h>
53 #endif
54 #include <sys/syscall.h>
55
56 #include "systrace.h"
57 #include <dev/systrace.h>
58
59 #include <uvm/uvm_extern.h>
60
61 #include <machine/cpu.h>
62 #include <machine/cpufunc.h>
63 #include <machine/psl.h>
64 #include <machine/reg.h>
65 #include <machine/trap.h>
66 #ifdef DDB
67 #include <machine/db_machdep.h>
68 #endif
69
70 #ifdef KGDB
71 #include <sys/kgdb.h>
72 #endif
73
74 #ifdef COMPAT_IBCS2
75 #include <compat/ibcs2/ibcs2_errno.h>
76 #include <compat/ibcs2/ibcs2_exec.h>
77 extern struct emul emul_ibcs2;
78 #endif
79 #include <sys/exec.h>
80 #ifdef COMPAT_LINUX
81 #include <compat/linux/linux_syscall.h>
82 extern struct emul emul_linux_aout, emul_linux_elf;
83 #endif
84 #ifdef COMPAT_FREEBSD
85 extern struct emul emul_freebsd_aout, emul_freebsd_elf;
86 #endif
87 #ifdef COMPAT_BSDOS
88 extern struct emul emul_bsdos;
89 #endif
90 #ifdef COMPAT_AOUT
91 extern struct emul emul_aout;
92 #endif
93 #ifdef KVM86
94 #include <machine/kvm86.h>
95 #define KVM86MODE (kvm86_incall)
96 #endif
97
98 #include "npx.h"
99
100 static __inline void userret(struct proc *);
101 void trap(struct trapframe);
102 void syscall(struct trapframe);
103
104 /*
105 * Define the code needed before returning to user mode, for
106 * trap and syscall.
107 */
108 static __inline void
109 userret(struct proc *p)
110 {
111 int sig;
112
113 /* take pending signals */
114 while ((sig = CURSIG(p)) != 0)
115 postsig(sig);
116
117 p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri;
118 }
119
120 char *trap_type[] = {
121 "privileged instruction fault", /* 0 T_PRIVINFLT */
122 "breakpoint trap", /* 1 T_BPTFLT */
123 "arithmetic trap", /* 2 T_ARITHTRAP */
124 "asynchronous system trap", /* 3 T_ASTFLT */
125 "protection fault", /* 4 T_PROTFLT */
126 "trace trap", /* 5 T_TRCTRAP */
127 "page fault", /* 6 T_PAGEFLT */
128 "alignment fault", /* 7 T_ALIGNFLT */
129 "integer divide fault", /* 8 T_DIVIDE */
130 "non-maskable interrupt", /* 9 T_NMI */
131 "overflow trap", /* 10 T_OFLOW */
132 "bounds check fault", /* 11 T_BOUND */
133 "FPU not available fault", /* 12 T_DNA */
134 "double fault", /* 13 T_DOUBLEFLT */
135 "FPU operand fetch fault", /* 14 T_FPOPFLT (![P]Pro) */
136 "invalid TSS fault", /* 15 T_TSSFLT */
137 "segment not present fault", /* 16 T_SEGNPFLT */
138 "stack fault", /* 17 T_STKFLT */
139 "machine check", /* 18 T_MACHK ([P]Pro) */
140 "SIMD FP fault", /* 19 T_XFTRAP */
141 "reserved trap", /* 20 T_RESERVED */
142 };
143 int trap_types = sizeof trap_type / sizeof trap_type[0];
144
145 #ifdef DEBUG
146 int trapdebug = 0;
147 #endif
148
149 /*
150 * trap(frame):
151 * Exception, fault, and trap interface to BSD kernel. This
152 * common code is called from assembly language IDT gate entry
153 * routines that prepare a suitable stack frame, and restore this
154 * frame after the exception has been processed. Note that the
155 * effect is as if the arguments were passed call by reference.
156 */
157 /*ARGSUSED*/
158 void
159 trap(struct trapframe frame)
160 {
161 struct proc *p = curproc;
162 int type = frame.tf_trapno;
163 struct pcb *pcb = NULL;
164 extern char resume_iret[], resume_pop_ds[], resume_pop_es[],
165 resume_pop_fs[], resume_pop_gs[];
166 struct trapframe *vframe;
167 int resume;
168 vm_prot_t vftype, ftype;
169 union sigval sv;
170 caddr_t onfault;
171 uint32_t cr2;
172
173 uvmexp.traps++;
174
175 /* SIGSEGV and SIGBUS need this */
176 if (frame.tf_err & PGEX_W) {
177 vftype = VM_PROT_WRITE;
178 ftype = VM_PROT_READ | VM_PROT_WRITE;
179 } else
180 ftype = vftype = VM_PROT_READ;
181
182 #ifdef DEBUG
183 if (trapdebug) {
184 printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
185 frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs,
186 frame.tf_eflags, rcr2(), lapic_tpr);
187 printf("curproc %p\n", curproc);
188 }
189 #endif
190
191 if (!KERNELMODE(frame.tf_cs, frame.tf_eflags)) {
192 type |= T_USER;
193 p->p_md.md_regs = &frame;
194 }
195
196 switch (type) {
197
198 /* trace trap */
199 case T_TRCTRAP: {
200 #if defined(DDB) || defined(KGDB)
201 /* Make sure nobody is single stepping into kernel land.
202 * The syscall has to turn off the trace bit itself. The
203 * easiest way, is to simply not call the debugger, until
204 * we are through the problematic "osyscall" stub. This
205 * is a hack, but it does seem to work.
206 */
207 extern int Xosyscall, Xosyscall_end;
208
209 if (frame.tf_eip >= (int)&Xosyscall &&
210 frame.tf_eip <= (int)&Xosyscall_end)
211 return;
212 #else
213 return; /* Just return if no DDB */
214 #endif
215 }
216 /* FALLTHROUGH */
217
218 default:
219 we_re_toast:
220 #ifdef KGDB
221 if (kgdb_trap(type, &frame))
222 return;
223 else {
224 /*
225 * If this is a breakpoint, don't panic
226 * if we're not connected.
227 */
228 if (type == T_BPTFLT) {
229 printf("kgdb: ignored %s\n", trap_type[type]);
230 return;
231 }
232 }
233 #endif
234
235 #ifdef DDB
236 if (kdb_trap(type, 0, &frame))
237 return;
238 #endif
239 if (frame.tf_trapno < trap_types)
240 printf("fatal %s (%d)", trap_type[frame.tf_trapno],
241 frame.tf_trapno);
242 else
243 printf("unknown trap %d", frame.tf_trapno);
244 printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
245 printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
246 type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), lapic_tpr);
247
248 panic("trap type %d, code=%x, pc=%x",
249 type, frame.tf_err, frame.tf_eip);
250 /*NOTREACHED*/
251
252 case T_PROTFLT:
253 #ifdef KVM86
254 if (KVM86MODE) {
255 kvm86_gpfault(&frame);
256 return;
257 }
258 #endif
259 case T_SEGNPFLT:
260 case T_ALIGNFLT:
261 /* Check for copyin/copyout fault. */
262 if (p && p->p_addr) {
263 pcb = &p->p_addr->u_pcb;
264 if (pcb->pcb_onfault != 0) {
265 copyfault:
266 frame.tf_eip = (int)pcb->pcb_onfault;
267 return;
268 }
269 }
270
271 /*
272 * Check for failure during return to user mode.
273 *
274 * We do this by looking at the instruction we faulted on. The
275 * specific instructions we recognize only happen when
276 * returning from a trap, syscall, or interrupt.
277 *
278 * XXX
279 * The heuristic used here will currently fail for the case of
280 * one of the 2 pop instructions faulting when returning from a
281 * a fast interrupt. This should not be possible. It can be
282 * fixed by rearranging the trap frame so that the stack format
283 * at this point is the same as on exit from a `slow'
284 * interrupt.
285 */
286 switch (*(u_char *)frame.tf_eip) {
287 case 0xcf: /* iret */
288 vframe = (void *)((int)&frame.tf_esp -
289 offsetof(struct trapframe, tf_eip));
290 resume = (int)resume_iret;
291 break;
292 case 0x1f: /* popl %ds */
293 vframe = (void *)((int)&frame.tf_esp -
294 offsetof(struct trapframe, tf_ds));
295 resume = (int)resume_pop_ds;
296 break;
297 case 0x07: /* popl %es */
298 vframe = (void *)((int)&frame.tf_esp -
299 offsetof(struct trapframe, tf_es));
300 resume = (int)resume_pop_es;
301 break;
302 case 0x0f: /* 0x0f prefix */
303 switch (*(u_char *)(frame.tf_eip+1)) {
304 case 0xa1: /* popl %fs */
305 vframe = (void *)((int)&frame.tf_esp -
306 offsetof(struct trapframe, tf_fs));
307 resume = (int)resume_pop_fs;
308 break;
309 case 0xa9: /* popl %gs */
310 vframe = (void *)((int)&frame.tf_esp -
311 offsetof(struct trapframe, tf_gs));
312 resume = (int)resume_pop_gs;
313 break;
314 default:
315 goto we_re_toast;
316 }
317 break;
318 default:
319 goto we_re_toast;
320 }
321 if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
322 goto we_re_toast;
323
324 frame.tf_eip = resume;
325 return;
326
327 case T_PROTFLT|T_USER: /* protection fault */
328 KERNEL_PROC_LOCK(p);
329 #ifdef VM86
330 if (frame.tf_eflags & PSL_VM) {
331 vm86_gpfault(p, type & ~T_USER);
332 KERNEL_PROC_UNLOCK(p);
333 goto out;
334 }
335 #endif
336 /* If pmap_exec_fixup does something, let's retry the trap. */
337 if (pmap_exec_fixup(&p->p_vmspace->vm_map, &frame,
338 &p->p_addr->u_pcb)) {
339 KERNEL_PROC_UNLOCK(p);
340 goto out;
341 }
342
343 sv.sival_int = frame.tf_eip;
344 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
345 KERNEL_PROC_UNLOCK(p);
346 goto out;
347
348 case T_TSSFLT|T_USER:
349 sv.sival_int = frame.tf_eip;
350 KERNEL_PROC_LOCK(p);
351 trapsignal(p, SIGBUS, vftype, BUS_OBJERR, sv);
352 KERNEL_PROC_UNLOCK(p);
353 goto out;
354
355 case T_SEGNPFLT|T_USER:
356 case T_STKFLT|T_USER:
357 sv.sival_int = frame.tf_eip;
358 KERNEL_PROC_LOCK(p);
359 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
360 KERNEL_PROC_UNLOCK(p);
361 goto out;
362
363 case T_ALIGNFLT|T_USER:
364 sv.sival_int = frame.tf_eip;
365 KERNEL_PROC_LOCK(p);
366 trapsignal(p, SIGBUS, vftype, BUS_ADRALN, sv);
367 KERNEL_PROC_UNLOCK(p);
368 goto out;
369
370 case T_PRIVINFLT|T_USER: /* privileged instruction fault */
371 sv.sival_int = frame.tf_eip;
372 KERNEL_PROC_LOCK(p);
373 trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv);
374 KERNEL_PROC_UNLOCK(p);
375 goto out;
376
377 case T_FPOPFLT|T_USER: /* coprocessor operand fault */
378 sv.sival_int = frame.tf_eip;
379 KERNEL_PROC_LOCK(p);
380 trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv);
381 KERNEL_PROC_UNLOCK(p);
382 goto out;
383
384 case T_ASTFLT|T_USER: /* Allow process switch */
385 uvmexp.softs++;
386 if (p->p_flag & P_OWEUPC) {
387 KERNEL_PROC_LOCK(p);
388 ADDUPROF(p);
389 KERNEL_PROC_UNLOCK(p);
390 }
391 if (want_resched)
392 preempt(NULL);
393 goto out;
394
395 case T_DNA|T_USER: {
396 printf("pid %d killed due to lack of floating point\n",
397 p->p_pid);
398 sv.sival_int = frame.tf_eip;
399 KERNEL_PROC_LOCK(p);
400 trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv);
401 KERNEL_PROC_UNLOCK(p);
402 goto out;
403 }
404
405 case T_BOUND|T_USER:
406 sv.sival_int = frame.tf_eip;
407 KERNEL_PROC_LOCK(p);
408 trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
409 KERNEL_PROC_UNLOCK(p);
410 goto out;
411 case T_OFLOW|T_USER:
412 sv.sival_int = frame.tf_eip;
413 KERNEL_PROC_LOCK(p);
414 trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
415 KERNEL_PROC_UNLOCK(p);
416 goto out;
417 case T_DIVIDE|T_USER:
418 sv.sival_int = frame.tf_eip;
419 KERNEL_PROC_LOCK(p);
420 trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv);
421 KERNEL_PROC_UNLOCK(p);
422 goto out;
423
424 case T_ARITHTRAP|T_USER:
425 sv.sival_int = frame.tf_eip;
426 KERNEL_PROC_LOCK(p);
427 trapsignal(p, SIGFPE, frame.tf_err, FPE_INTOVF, sv);
428 KERNEL_PROC_UNLOCK(p);
429 goto out;
430
431 case T_PAGEFLT: /* allow page faults in kernel mode */
432 if (p == 0 || p->p_addr == 0)
433 goto we_re_toast;
434 #ifdef LOCKDEBUG
435 /* If we page-fault while in scheduler, we're doomed. */
436 #ifdef notyet
437 if (simple_lock_held(&sched_lock))
438 #else
439 if (__mp_lock_held(&sched_lock))
440 #endif
441 goto we_re_toast;
442 #endif
443
444 pcb = &p->p_addr->u_pcb;
445 #if 0
446 /* XXX - check only applies to 386's and 486's with WP off */
447 if (frame.tf_err & PGEX_P)
448 goto we_re_toast;
449 #endif
450 cr2 = rcr2();
451 KERNEL_LOCK();
452 goto faultcommon;
453
454 case T_PAGEFLT|T_USER: { /* page fault */
455 vaddr_t va, fa;
456 struct vmspace *vm;
457 struct vm_map *map;
458 int rv;
459
460 cr2 = rcr2();
461 KERNEL_PROC_LOCK(p);
462 faultcommon:
463 vm = p->p_vmspace;
464 if (vm == NULL)
465 goto we_re_toast;
466 fa = (vaddr_t)cr2;
467 va = trunc_page(fa);
468 /*
469 * It is only a kernel address space fault iff:
470 * 1. (type & T_USER) == 0 and
471 * 2. pcb_onfault not set or
472 * 3. pcb_onfault set but supervisor space fault
473 * The last can occur during an exec() copyin where the
474 * argument space is lazy-allocated.
475 */
476 if (type == T_PAGEFLT && va >= KERNBASE)
477 map = kernel_map;
478 else
479 map = &vm->vm_map;
480
481 #ifdef DIAGNOSTIC
482 if (map == kernel_map && va == 0) {
483 printf("trap: bad kernel access at %lx\n", va);
484 goto we_re_toast;
485 }
486 #endif
487
488 onfault = p->p_addr->u_pcb.pcb_onfault;
489 p->p_addr->u_pcb.pcb_onfault = NULL;
490 rv = uvm_fault(map, va, 0, ftype);
491 p->p_addr->u_pcb.pcb_onfault = onfault;
492
493 if (rv == 0) {
494 if (map != kernel_map)
495 uvm_grow(p, va);
496 if (type == T_PAGEFLT) {
497 KERNEL_UNLOCK();
498 return;
499 }
500 KERNEL_PROC_UNLOCK(p);
501 goto out;
502 }
503
504 if (type == T_PAGEFLT) {
505 if (pcb->pcb_onfault != 0) {
506 KERNEL_UNLOCK();
507 goto copyfault;
508 }
509 printf("uvm_fault(%p, 0x%lx, 0, %d) -> %x\n",
510 map, va, ftype, rv);
511 goto we_re_toast;
512 }
513 sv.sival_int = fa;
514 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
515 KERNEL_PROC_UNLOCK(p);
516 break;
517 }
518
519 #if 0 /* Should this be left out? */
520 #if !defined(DDB) && !defined(KGDB)
521 /* XXX need to deal with this when DDB is present, too */
522 case T_TRCTRAP: /* kernel trace trap; someone single stepping lcall's */
523 /* syscall has to turn off the trace bit itself */
524 return;
525 #endif
526 #endif
527
528 case T_BPTFLT|T_USER: /* bpt instruction fault */
529 sv.sival_int = rcr2();
530 KERNEL_PROC_LOCK(p);
531 trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv);
532 KERNEL_PROC_UNLOCK(p);
533 break;
534 case T_TRCTRAP|T_USER: /* trace trap */
535 sv.sival_int = rcr2();
536 KERNEL_PROC_LOCK(p);
537 trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv);
538 KERNEL_PROC_UNLOCK(p);
539 break;
540
541 #if NISA > 0
542 case T_NMI:
543 case T_NMI|T_USER:
544 #if defined(DDB) || defined(KGDB)
545 /* NMI can be hooked up to a pushbutton for debugging */
546 printf ("NMI ... going to debugger\n");
547 #ifdef KGDB
548 if (kgdb_trap(type, &frame))
549 return;
550 #endif
551 #ifdef DDB
552 if (kdb_trap(type, 0, &frame))
553 return;
554 #endif
555 return;
556 #endif /* DDB || KGDB */
557 /* machine/parity/power fail/"kitchen sink" faults */
558 if (isa_nmi() == 0)
559 return;
560 else
561 goto we_re_toast;
562 #endif
563 }
564
565 if ((type & T_USER) == 0)
566 return;
567 out:
568 userret(p);
569 }
570
571 /*
572 * syscall(frame):
573 * System call request from POSIX system call gate interface to kernel.
574 * Like trap(), argument is call by reference.
575 */
576 /*ARGSUSED*/
577 void
578 syscall(struct trapframe frame)
579 {
580 caddr_t params;
581 struct sysent *callp;
582 struct proc *p;
583 int orig_error, error, opc, nsys;
584 size_t argsize;
585 register_t code, args[8], rval[2];
586 #ifdef DIAGNOSTIC
587 int ocpl = lapic_tpr;
588 #endif
589
590 uvmexp.syscalls++;
591 #ifdef DIAGNOSTIC
592 if (!USERMODE(frame.tf_cs, frame.tf_eflags))
593 panic("syscall");
594 #endif
595 p = curproc;
596 p->p_md.md_regs = &frame;
597 opc = frame.tf_eip;
598 code = frame.tf_eax;
599
600 nsys = p->p_emul->e_nsysent;
601 callp = p->p_emul->e_sysent;
602
603 #ifdef COMPAT_IBCS2
604 if (p->p_emul == &emul_ibcs2)
605 if (IBCS2_HIGH_SYSCALL(code))
606 code = IBCS2_CVT_HIGH_SYSCALL(code);
607 #endif
608 params = (caddr_t)frame.tf_esp + sizeof(int);
609
610 #ifdef VM86
611 /*
612 * VM86 mode application found our syscall trap gate by accident; let
613 * it get a SIGSYS and have the VM86 handler in the process take care
614 * of it.
615 */
616 if (frame.tf_eflags & PSL_VM)
617 code = -1;
618 else
619 #endif
620
621 switch (code) {
622 case SYS_syscall:
623 #ifdef COMPAT_LINUX
624 /* Linux has a special system setup call as number 0 */
625 if (p->p_emul == &emul_linux_aout ||
626 p->p_emul == &emul_linux_elf)
627 break;
628 #endif
629 /*
630 * Code is first argument, followed by actual args.
631 */
632 copyin(params, &code, sizeof(int));
633 params += sizeof(int);
634 break;
635 case SYS___syscall:
636 /*
637 * Like syscall, but code is a quad, so as to maintain
638 * quad alignment for the rest of the arguments.
639 */
640 if (callp != sysent
641 #ifdef COMPAT_FREEBSD
642 && p->p_emul != &emul_freebsd_aout
643 && p->p_emul != &emul_freebsd_elf
644 #endif
645 #ifdef COMPAT_AOUT
646 && p->p_emul != &emul_aout
647 #endif
648 #ifdef COMPAT_BSDOS
649 && p->p_emul != &emul_bsdos
650 #endif
651 )
652 break;
653 copyin(params + _QUAD_LOWWORD * sizeof(int), &code, sizeof(int));
654 params += sizeof(quad_t);
655 break;
656 default:
657 break;
658 }
659 if (code < 0 || code >= nsys)
660 callp += p->p_emul->e_nosys; /* illegal */
661 else
662 callp += code;
663 argsize = callp->sy_argsize;
664 #ifdef COMPAT_LINUX
665 /* XXX extra if() for every emul type.. */
666 if (p->p_emul == &emul_linux_aout || p->p_emul == &emul_linux_elf) {
667 /*
668 * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in
669 * increasing order.
670 */
671 switch (argsize) {
672 case 24:
673 args[5] = frame.tf_ebp;
674 case 20:
675 args[4] = frame.tf_edi;
676 case 16:
677 args[3] = frame.tf_esi;
678 case 12:
679 args[2] = frame.tf_edx;
680 case 8:
681 args[1] = frame.tf_ecx;
682 case 4:
683 args[0] = frame.tf_ebx;
684 case 0:
685 break;
686 default:
687 panic("linux syscall with weird argument size %d",
688 argsize);
689 break;
690 }
691 error = 0;
692 }
693 else
694 #endif
695 if (argsize)
696 error = copyin(params, (caddr_t)args, argsize);
697 else
698 error = 0;
699 orig_error = error;
700 KERNEL_PROC_LOCK(p);
701 #ifdef SYSCALL_DEBUG
702 scdebug_call(p, code, args);
703 #endif
704 #ifdef KTRACE
705 if (KTRPOINT(p, KTR_SYSCALL))
706 ktrsyscall(p, code, argsize, args);
707 #endif
708 if (error) {
709 KERNEL_PROC_UNLOCK(p);
710 goto bad;
711 }
712 rval[0] = 0;
713 rval[1] = frame.tf_edx;
714 #if NSYSTRACE > 0
715 if (ISSET(p->p_flag, P_SYSTRACE))
716 orig_error = error = systrace_redirect(code, p, args, rval);
717 else
718 #endif
719 orig_error = error = (*callp->sy_call)(p, args, rval);
720 KERNEL_PROC_UNLOCK(p);
721 switch (error) {
722 case 0:
723 frame.tf_eax = rval[0];
724 frame.tf_edx = rval[1];
725 frame.tf_eflags &= ~PSL_C; /* carry bit */
726 break;
727 case ERESTART:
728 /*
729 * The offset to adjust the PC by depends on whether we entered
730 * the kernel through the trap or call gate. We pushed the
731 * size of the instruction into tf_err on entry.
732 */
733 frame.tf_eip = opc - frame.tf_err;
734 break;
735 case EJUSTRETURN:
736 /* nothing to do */
737 break;
738 default:
739 bad:
740 if (p->p_emul->e_errno)
741 error = p->p_emul->e_errno[error];
742 frame.tf_eax = error;
743 frame.tf_eflags |= PSL_C; /* carry bit */
744 break;
745 }
746
747 #ifdef SYSCALL_DEBUG
748 KERNEL_PROC_LOCK(p);
749 scdebug_ret(p, code, orig_error, rval);
750 KERNEL_PROC_UNLOCK(p);
751 #endif
752 userret(p);
753 #ifdef KTRACE
754 if (KTRPOINT(p, KTR_SYSRET)) {
755 KERNEL_PROC_LOCK(p);
756 ktrsysret(p, code, orig_error, rval[0]);
757 KERNEL_PROC_UNLOCK(p);
758 }
759 #endif
760 #ifdef DIAGNOSTIC
761 if (lapic_tpr != ocpl) {
762 printf("WARNING: SPL (0x%x) NOT LOWERED ON "
763 "syscall(0x%x, 0x%x, 0x%x, 0x%x...) EXIT, PID %d\n",
764 lapic_tpr, code, args[0], args[1], args[2], p->p_pid);
765 lapic_tpr = ocpl;
766 }
767 #endif
768 }
769
770 void
771 child_return(void *arg)
772 {
773 struct proc *p = (struct proc *)arg;
774 struct trapframe *tf = p->p_md.md_regs;
775
776 tf->tf_eax = 0;
777 tf->tf_eflags &= ~PSL_C;
778
779 KERNEL_PROC_UNLOCK(p);
780
781 userret(p);
782 #ifdef KTRACE
783 if (KTRPOINT(p, KTR_SYSRET)) {
784 KERNEL_PROC_LOCK(p);
785 ktrsysret(p,
786 (p->p_flag & P_PPWAIT) ? SYS_vfork : SYS_fork, 0, 0);
787 KERNEL_PROC_UNLOCK(p);
788 }
789 #endif
790 }