This source file includes following definitions.
- userret
- trap
- syscall
- child_return
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/signalvar.h>
47 #include <sys/user.h>
48 #include <sys/acct.h>
49 #include <sys/kernel.h>
50 #include <sys/signal.h>
51 #ifdef KTRACE
52 #include <sys/ktrace.h>
53 #endif
54 #include <sys/syscall.h>
55
56 #include "systrace.h"
57 #include <dev/systrace.h>
58
59 #include <uvm/uvm_extern.h>
60
61 #include <machine/cpu.h>
62 #include <machine/cpufunc.h>
63 #include <machine/psl.h>
64 #include <machine/reg.h>
65 #include <machine/trap.h>
66 #ifdef DDB
67 #include <machine/db_machdep.h>
68 #endif
69
70 #ifdef KGDB
71 #include <sys/kgdb.h>
72 #endif
73
74 #ifdef COMPAT_IBCS2
75 #include <compat/ibcs2/ibcs2_errno.h>
76 #include <compat/ibcs2/ibcs2_exec.h>
77 extern struct emul emul_ibcs2;
78 #endif
79 #include <sys/exec.h>
80 #ifdef COMPAT_LINUX
81 #include <compat/linux/linux_syscall.h>
82 extern struct emul emul_linux_aout, emul_linux_elf;
83 #endif
84 #ifdef COMPAT_FREEBSD
85 extern struct emul emul_freebsd_aout, emul_freebsd_elf;
86 #endif
87 #ifdef COMPAT_BSDOS
88 extern struct emul emul_bsdos;
89 #endif
90 #ifdef COMPAT_AOUT
91 extern struct emul emul_aout;
92 #endif
93 #ifdef KVM86
94 #include <machine/kvm86.h>
95 #define KVM86MODE (kvm86_incall)
96 #endif
97
98 #include "npx.h"
99
100 static __inline void userret(struct proc *);
101 void trap(struct trapframe);
102 void syscall(struct trapframe);
103
104
105
106
107
108 static __inline void
109 userret(struct proc *p)
110 {
111 int sig;
112
113
114 while ((sig = CURSIG(p)) != 0)
115 postsig(sig);
116
117 p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri;
118 }
119
120 char *trap_type[] = {
121 "privileged instruction fault",
122 "breakpoint trap",
123 "arithmetic trap",
124 "asynchronous system trap",
125 "protection fault",
126 "trace trap",
127 "page fault",
128 "alignment fault",
129 "integer divide fault",
130 "non-maskable interrupt",
131 "overflow trap",
132 "bounds check fault",
133 "FPU not available fault",
134 "double fault",
135 "FPU operand fetch fault",
136 "invalid TSS fault",
137 "segment not present fault",
138 "stack fault",
139 "machine check",
140 "SIMD FP fault",
141 "reserved trap",
142 };
143 int trap_types = sizeof trap_type / sizeof trap_type[0];
144
145 #ifdef DEBUG
146 int trapdebug = 0;
147 #endif
148
149
150
151
152
153
154
155
156
157
158 void
159 trap(struct trapframe frame)
160 {
161 struct proc *p = curproc;
162 int type = frame.tf_trapno;
163 struct pcb *pcb = NULL;
164 extern char resume_iret[], resume_pop_ds[], resume_pop_es[],
165 resume_pop_fs[], resume_pop_gs[];
166 struct trapframe *vframe;
167 int resume;
168 vm_prot_t vftype, ftype;
169 union sigval sv;
170 caddr_t onfault;
171 uint32_t cr2;
172
173 uvmexp.traps++;
174
175
176 if (frame.tf_err & PGEX_W) {
177 vftype = VM_PROT_WRITE;
178 ftype = VM_PROT_READ | VM_PROT_WRITE;
179 } else
180 ftype = vftype = VM_PROT_READ;
181
182 #ifdef DEBUG
183 if (trapdebug) {
184 printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
185 frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs,
186 frame.tf_eflags, rcr2(), lapic_tpr);
187 printf("curproc %p\n", curproc);
188 }
189 #endif
190
191 if (!KERNELMODE(frame.tf_cs, frame.tf_eflags)) {
192 type |= T_USER;
193 p->p_md.md_regs = &frame;
194 }
195
196 switch (type) {
197
198
199 case T_TRCTRAP: {
200 #if defined(DDB) || defined(KGDB)
201
202
203
204
205
206
207 extern int Xosyscall, Xosyscall_end;
208
209 if (frame.tf_eip >= (int)&Xosyscall &&
210 frame.tf_eip <= (int)&Xosyscall_end)
211 return;
212 #else
213 return;
214 #endif
215 }
216
217
218 default:
219 we_re_toast:
220 #ifdef KGDB
221 if (kgdb_trap(type, &frame))
222 return;
223 else {
224
225
226
227
228 if (type == T_BPTFLT) {
229 printf("kgdb: ignored %s\n", trap_type[type]);
230 return;
231 }
232 }
233 #endif
234
235 #ifdef DDB
236 if (kdb_trap(type, 0, &frame))
237 return;
238 #endif
239 if (frame.tf_trapno < trap_types)
240 printf("fatal %s (%d)", trap_type[frame.tf_trapno],
241 frame.tf_trapno);
242 else
243 printf("unknown trap %d", frame.tf_trapno);
244 printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
245 printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
246 type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), lapic_tpr);
247
248 panic("trap type %d, code=%x, pc=%x",
249 type, frame.tf_err, frame.tf_eip);
250
251
252 case T_PROTFLT:
253 #ifdef KVM86
254 if (KVM86MODE) {
255 kvm86_gpfault(&frame);
256 return;
257 }
258 #endif
259 case T_SEGNPFLT:
260 case T_ALIGNFLT:
261
262 if (p && p->p_addr) {
263 pcb = &p->p_addr->u_pcb;
264 if (pcb->pcb_onfault != 0) {
265 copyfault:
266 frame.tf_eip = (int)pcb->pcb_onfault;
267 return;
268 }
269 }
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286 switch (*(u_char *)frame.tf_eip) {
287 case 0xcf:
288 vframe = (void *)((int)&frame.tf_esp -
289 offsetof(struct trapframe, tf_eip));
290 resume = (int)resume_iret;
291 break;
292 case 0x1f:
293 vframe = (void *)((int)&frame.tf_esp -
294 offsetof(struct trapframe, tf_ds));
295 resume = (int)resume_pop_ds;
296 break;
297 case 0x07:
298 vframe = (void *)((int)&frame.tf_esp -
299 offsetof(struct trapframe, tf_es));
300 resume = (int)resume_pop_es;
301 break;
302 case 0x0f:
303 switch (*(u_char *)(frame.tf_eip+1)) {
304 case 0xa1:
305 vframe = (void *)((int)&frame.tf_esp -
306 offsetof(struct trapframe, tf_fs));
307 resume = (int)resume_pop_fs;
308 break;
309 case 0xa9:
310 vframe = (void *)((int)&frame.tf_esp -
311 offsetof(struct trapframe, tf_gs));
312 resume = (int)resume_pop_gs;
313 break;
314 default:
315 goto we_re_toast;
316 }
317 break;
318 default:
319 goto we_re_toast;
320 }
321 if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
322 goto we_re_toast;
323
324 frame.tf_eip = resume;
325 return;
326
327 case T_PROTFLT|T_USER:
328 KERNEL_PROC_LOCK(p);
329 #ifdef VM86
330 if (frame.tf_eflags & PSL_VM) {
331 vm86_gpfault(p, type & ~T_USER);
332 KERNEL_PROC_UNLOCK(p);
333 goto out;
334 }
335 #endif
336
337 if (pmap_exec_fixup(&p->p_vmspace->vm_map, &frame,
338 &p->p_addr->u_pcb)) {
339 KERNEL_PROC_UNLOCK(p);
340 goto out;
341 }
342
343 sv.sival_int = frame.tf_eip;
344 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
345 KERNEL_PROC_UNLOCK(p);
346 goto out;
347
348 case T_TSSFLT|T_USER:
349 sv.sival_int = frame.tf_eip;
350 KERNEL_PROC_LOCK(p);
351 trapsignal(p, SIGBUS, vftype, BUS_OBJERR, sv);
352 KERNEL_PROC_UNLOCK(p);
353 goto out;
354
355 case T_SEGNPFLT|T_USER:
356 case T_STKFLT|T_USER:
357 sv.sival_int = frame.tf_eip;
358 KERNEL_PROC_LOCK(p);
359 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
360 KERNEL_PROC_UNLOCK(p);
361 goto out;
362
363 case T_ALIGNFLT|T_USER:
364 sv.sival_int = frame.tf_eip;
365 KERNEL_PROC_LOCK(p);
366 trapsignal(p, SIGBUS, vftype, BUS_ADRALN, sv);
367 KERNEL_PROC_UNLOCK(p);
368 goto out;
369
370 case T_PRIVINFLT|T_USER:
371 sv.sival_int = frame.tf_eip;
372 KERNEL_PROC_LOCK(p);
373 trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv);
374 KERNEL_PROC_UNLOCK(p);
375 goto out;
376
377 case T_FPOPFLT|T_USER:
378 sv.sival_int = frame.tf_eip;
379 KERNEL_PROC_LOCK(p);
380 trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv);
381 KERNEL_PROC_UNLOCK(p);
382 goto out;
383
384 case T_ASTFLT|T_USER:
385 uvmexp.softs++;
386 if (p->p_flag & P_OWEUPC) {
387 KERNEL_PROC_LOCK(p);
388 ADDUPROF(p);
389 KERNEL_PROC_UNLOCK(p);
390 }
391 if (want_resched)
392 preempt(NULL);
393 goto out;
394
395 case T_DNA|T_USER: {
396 printf("pid %d killed due to lack of floating point\n",
397 p->p_pid);
398 sv.sival_int = frame.tf_eip;
399 KERNEL_PROC_LOCK(p);
400 trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv);
401 KERNEL_PROC_UNLOCK(p);
402 goto out;
403 }
404
405 case T_BOUND|T_USER:
406 sv.sival_int = frame.tf_eip;
407 KERNEL_PROC_LOCK(p);
408 trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
409 KERNEL_PROC_UNLOCK(p);
410 goto out;
411 case T_OFLOW|T_USER:
412 sv.sival_int = frame.tf_eip;
413 KERNEL_PROC_LOCK(p);
414 trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
415 KERNEL_PROC_UNLOCK(p);
416 goto out;
417 case T_DIVIDE|T_USER:
418 sv.sival_int = frame.tf_eip;
419 KERNEL_PROC_LOCK(p);
420 trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv);
421 KERNEL_PROC_UNLOCK(p);
422 goto out;
423
424 case T_ARITHTRAP|T_USER:
425 sv.sival_int = frame.tf_eip;
426 KERNEL_PROC_LOCK(p);
427 trapsignal(p, SIGFPE, frame.tf_err, FPE_INTOVF, sv);
428 KERNEL_PROC_UNLOCK(p);
429 goto out;
430
431 case T_PAGEFLT:
432 if (p == 0 || p->p_addr == 0)
433 goto we_re_toast;
434 #ifdef LOCKDEBUG
435
436 #ifdef notyet
437 if (simple_lock_held(&sched_lock))
438 #else
439 if (__mp_lock_held(&sched_lock))
440 #endif
441 goto we_re_toast;
442 #endif
443
444 pcb = &p->p_addr->u_pcb;
445 #if 0
446
447 if (frame.tf_err & PGEX_P)
448 goto we_re_toast;
449 #endif
450 cr2 = rcr2();
451 KERNEL_LOCK();
452 goto faultcommon;
453
454 case T_PAGEFLT|T_USER: {
455 vaddr_t va, fa;
456 struct vmspace *vm;
457 struct vm_map *map;
458 int rv;
459
460 cr2 = rcr2();
461 KERNEL_PROC_LOCK(p);
462 faultcommon:
463 vm = p->p_vmspace;
464 if (vm == NULL)
465 goto we_re_toast;
466 fa = (vaddr_t)cr2;
467 va = trunc_page(fa);
468
469
470
471
472
473
474
475
476 if (type == T_PAGEFLT && va >= KERNBASE)
477 map = kernel_map;
478 else
479 map = &vm->vm_map;
480
481 #ifdef DIAGNOSTIC
482 if (map == kernel_map && va == 0) {
483 printf("trap: bad kernel access at %lx\n", va);
484 goto we_re_toast;
485 }
486 #endif
487
488 onfault = p->p_addr->u_pcb.pcb_onfault;
489 p->p_addr->u_pcb.pcb_onfault = NULL;
490 rv = uvm_fault(map, va, 0, ftype);
491 p->p_addr->u_pcb.pcb_onfault = onfault;
492
493 if (rv == 0) {
494 if (map != kernel_map)
495 uvm_grow(p, va);
496 if (type == T_PAGEFLT) {
497 KERNEL_UNLOCK();
498 return;
499 }
500 KERNEL_PROC_UNLOCK(p);
501 goto out;
502 }
503
504 if (type == T_PAGEFLT) {
505 if (pcb->pcb_onfault != 0) {
506 KERNEL_UNLOCK();
507 goto copyfault;
508 }
509 printf("uvm_fault(%p, 0x%lx, 0, %d) -> %x\n",
510 map, va, ftype, rv);
511 goto we_re_toast;
512 }
513 sv.sival_int = fa;
514 trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
515 KERNEL_PROC_UNLOCK(p);
516 break;
517 }
518
519 #if 0
520 #if !defined(DDB) && !defined(KGDB)
521
522 case T_TRCTRAP:
523
524 return;
525 #endif
526 #endif
527
528 case T_BPTFLT|T_USER:
529 sv.sival_int = rcr2();
530 KERNEL_PROC_LOCK(p);
531 trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv);
532 KERNEL_PROC_UNLOCK(p);
533 break;
534 case T_TRCTRAP|T_USER:
535 sv.sival_int = rcr2();
536 KERNEL_PROC_LOCK(p);
537 trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv);
538 KERNEL_PROC_UNLOCK(p);
539 break;
540
541 #if NISA > 0
542 case T_NMI:
543 case T_NMI|T_USER:
544 #if defined(DDB) || defined(KGDB)
545
546 printf ("NMI ... going to debugger\n");
547 #ifdef KGDB
548 if (kgdb_trap(type, &frame))
549 return;
550 #endif
551 #ifdef DDB
552 if (kdb_trap(type, 0, &frame))
553 return;
554 #endif
555 return;
556 #endif
557
558 if (isa_nmi() == 0)
559 return;
560 else
561 goto we_re_toast;
562 #endif
563 }
564
565 if ((type & T_USER) == 0)
566 return;
567 out:
568 userret(p);
569 }
570
571
572
573
574
575
576
577 void
578 syscall(struct trapframe frame)
579 {
580 caddr_t params;
581 struct sysent *callp;
582 struct proc *p;
583 int orig_error, error, opc, nsys;
584 size_t argsize;
585 register_t code, args[8], rval[2];
586 #ifdef DIAGNOSTIC
587 int ocpl = lapic_tpr;
588 #endif
589
590 uvmexp.syscalls++;
591 #ifdef DIAGNOSTIC
592 if (!USERMODE(frame.tf_cs, frame.tf_eflags))
593 panic("syscall");
594 #endif
595 p = curproc;
596 p->p_md.md_regs = &frame;
597 opc = frame.tf_eip;
598 code = frame.tf_eax;
599
600 nsys = p->p_emul->e_nsysent;
601 callp = p->p_emul->e_sysent;
602
603 #ifdef COMPAT_IBCS2
604 if (p->p_emul == &emul_ibcs2)
605 if (IBCS2_HIGH_SYSCALL(code))
606 code = IBCS2_CVT_HIGH_SYSCALL(code);
607 #endif
608 params = (caddr_t)frame.tf_esp + sizeof(int);
609
610 #ifdef VM86
611
612
613
614
615
616 if (frame.tf_eflags & PSL_VM)
617 code = -1;
618 else
619 #endif
620
621 switch (code) {
622 case SYS_syscall:
623 #ifdef COMPAT_LINUX
624
625 if (p->p_emul == &emul_linux_aout ||
626 p->p_emul == &emul_linux_elf)
627 break;
628 #endif
629
630
631
632 copyin(params, &code, sizeof(int));
633 params += sizeof(int);
634 break;
635 case SYS___syscall:
636
637
638
639
640 if (callp != sysent
641 #ifdef COMPAT_FREEBSD
642 && p->p_emul != &emul_freebsd_aout
643 && p->p_emul != &emul_freebsd_elf
644 #endif
645 #ifdef COMPAT_AOUT
646 && p->p_emul != &emul_aout
647 #endif
648 #ifdef COMPAT_BSDOS
649 && p->p_emul != &emul_bsdos
650 #endif
651 )
652 break;
653 copyin(params + _QUAD_LOWWORD * sizeof(int), &code, sizeof(int));
654 params += sizeof(quad_t);
655 break;
656 default:
657 break;
658 }
659 if (code < 0 || code >= nsys)
660 callp += p->p_emul->e_nosys;
661 else
662 callp += code;
663 argsize = callp->sy_argsize;
664 #ifdef COMPAT_LINUX
665
666 if (p->p_emul == &emul_linux_aout || p->p_emul == &emul_linux_elf) {
667
668
669
670
671 switch (argsize) {
672 case 24:
673 args[5] = frame.tf_ebp;
674 case 20:
675 args[4] = frame.tf_edi;
676 case 16:
677 args[3] = frame.tf_esi;
678 case 12:
679 args[2] = frame.tf_edx;
680 case 8:
681 args[1] = frame.tf_ecx;
682 case 4:
683 args[0] = frame.tf_ebx;
684 case 0:
685 break;
686 default:
687 panic("linux syscall with weird argument size %d",
688 argsize);
689 break;
690 }
691 error = 0;
692 }
693 else
694 #endif
695 if (argsize)
696 error = copyin(params, (caddr_t)args, argsize);
697 else
698 error = 0;
699 orig_error = error;
700 KERNEL_PROC_LOCK(p);
701 #ifdef SYSCALL_DEBUG
702 scdebug_call(p, code, args);
703 #endif
704 #ifdef KTRACE
705 if (KTRPOINT(p, KTR_SYSCALL))
706 ktrsyscall(p, code, argsize, args);
707 #endif
708 if (error) {
709 KERNEL_PROC_UNLOCK(p);
710 goto bad;
711 }
712 rval[0] = 0;
713 rval[1] = frame.tf_edx;
714 #if NSYSTRACE > 0
715 if (ISSET(p->p_flag, P_SYSTRACE))
716 orig_error = error = systrace_redirect(code, p, args, rval);
717 else
718 #endif
719 orig_error = error = (*callp->sy_call)(p, args, rval);
720 KERNEL_PROC_UNLOCK(p);
721 switch (error) {
722 case 0:
723 frame.tf_eax = rval[0];
724 frame.tf_edx = rval[1];
725 frame.tf_eflags &= ~PSL_C;
726 break;
727 case ERESTART:
728
729
730
731
732
733 frame.tf_eip = opc - frame.tf_err;
734 break;
735 case EJUSTRETURN:
736
737 break;
738 default:
739 bad:
740 if (p->p_emul->e_errno)
741 error = p->p_emul->e_errno[error];
742 frame.tf_eax = error;
743 frame.tf_eflags |= PSL_C;
744 break;
745 }
746
747 #ifdef SYSCALL_DEBUG
748 KERNEL_PROC_LOCK(p);
749 scdebug_ret(p, code, orig_error, rval);
750 KERNEL_PROC_UNLOCK(p);
751 #endif
752 userret(p);
753 #ifdef KTRACE
754 if (KTRPOINT(p, KTR_SYSRET)) {
755 KERNEL_PROC_LOCK(p);
756 ktrsysret(p, code, orig_error, rval[0]);
757 KERNEL_PROC_UNLOCK(p);
758 }
759 #endif
760 #ifdef DIAGNOSTIC
761 if (lapic_tpr != ocpl) {
762 printf("WARNING: SPL (0x%x) NOT LOWERED ON "
763 "syscall(0x%x, 0x%x, 0x%x, 0x%x...) EXIT, PID %d\n",
764 lapic_tpr, code, args[0], args[1], args[2], p->p_pid);
765 lapic_tpr = ocpl;
766 }
767 #endif
768 }
769
770 void
771 child_return(void *arg)
772 {
773 struct proc *p = (struct proc *)arg;
774 struct trapframe *tf = p->p_md.md_regs;
775
776 tf->tf_eax = 0;
777 tf->tf_eflags &= ~PSL_C;
778
779 KERNEL_PROC_UNLOCK(p);
780
781 userret(p);
782 #ifdef KTRACE
783 if (KTRPOINT(p, KTR_SYSRET)) {
784 KERNEL_PROC_LOCK(p);
785 ktrsysret(p,
786 (p->p_flag & P_PPWAIT) ? SYS_vfork : SYS_fork, 0, 0);
787 KERNEL_PROC_UNLOCK(p);
788 }
789 #endif
790 }