1 /* $OpenBSD: npx.c,v 1.42 2006/10/18 19:48:32 tom Exp $ */
2 /* $NetBSD: npx.c,v 1.57 1996/05/12 23:12:24 mycroft Exp $ */
3
4 #if 0
5 #define IPRINTF(x) printf x
6 #else
7 #define IPRINTF(x)
8 #endif
9
10 /*-
11 * Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved.
12 * Copyright (c) 1990 William Jolitz.
13 * Copyright (c) 1991 The Regents of the University of California.
14 * All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)npx.c 7.2 (Berkeley) 5/12/91
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/conf.h>
46 #include <sys/file.h>
47 #include <sys/proc.h>
48 #include <sys/signalvar.h>
49 #include <sys/user.h>
50 #include <sys/ioctl.h>
51 #include <sys/device.h>
52
53 #include <uvm/uvm_extern.h>
54
55 #include <machine/cpu.h>
56 #include <machine/intr.h>
57 #include <machine/npx.h>
58 #include <machine/pio.h>
59 #include <machine/cpufunc.h>
60 #include <machine/pcb.h>
61 #include <machine/trap.h>
62 #include <machine/specialreg.h>
63 #include <machine/i8259.h>
64
65 #include <dev/isa/isareg.h>
66 #include <dev/isa/isavar.h>
67
68 /*
69 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
70 *
71 * We do lazy initialization and switching using the TS bit in cr0 and the
72 * MDP_USEDFPU bit in mdproc.
73 *
74 * DNA exceptions are handled like this:
75 *
76 * 1) If there is no NPX, return and go to the emulator.
77 * 2) If someone else has used the NPX, save its state into that process's PCB.
78 * 3a) If MDP_USEDFPU is not set, set it and initialize the NPX.
79 * 3b) Otherwise, reload the process's previous NPX state.
80 *
81 * When a process is created or exec()s, its saved cr0 image has the TS bit
82 * set and the MDP_USEDFPU bit clear. The MDP_USEDFPU bit is set when the
83 * process first gets a DNA and the NPX is initialized. The TS bit is turned
84 * off when the NPX is used, and turned on again later when the process's NPX
85 * state is saved.
86 */
87
88 #define fldcw(addr) __asm("fldcw %0" : : "m" (*addr))
89 #define fnclex() __asm("fnclex")
90 #define fninit() __asm("fninit")
91 #define fnsave(addr) __asm("fnsave %0" : "=m" (*addr))
92 #define fnstcw(addr) __asm("fnstcw %0" : "=m" (*addr))
93 #define fnstsw(addr) __asm("fnstsw %0" : "=m" (*addr))
94 #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fwait")
95 #define frstor(addr) __asm("frstor %0" : : "m" (*addr))
96 #define fwait() __asm("fwait")
97 #define clts() __asm("clts")
98 #define stts() lcr0(rcr0() | CR0_TS)
99
100 int npxintr(void *);
101 static int npxprobe1(struct isa_attach_args *);
102 static int x86fpflags_to_siginfo(u_int32_t);
103
104
105 struct npx_softc {
106 struct device sc_dev;
107 void *sc_ih;
108 };
109
110 int npxprobe(struct device *, void *, void *);
111 void npxattach(struct device *, struct device *, void *);
112
113 struct cfattach npx_ca = {
114 sizeof(struct npx_softc), npxprobe, npxattach
115 };
116
117 struct cfdriver npx_cd = {
118 NULL, "npx", DV_DULL
119 };
120
121 enum npx_type {
122 NPX_NONE = 0,
123 NPX_INTERRUPT,
124 NPX_EXCEPTION,
125 NPX_BROKEN,
126 NPX_CPUID,
127 };
128
129 static enum npx_type npx_type;
130 static volatile u_int npx_intrs_while_probing;
131 static volatile u_int npx_traps_while_probing;
132
133 extern int i386_fpu_present;
134 extern int i386_fpu_exception;
135 extern int i386_fpu_fdivbug;
136
137 #ifdef I686_CPU
138 #define fxsave(addr) __asm("fxsave %0" : "=m" (*addr))
139 #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr))
140 #endif /* I686_CPU */
141
142 static __inline void
143 fpu_save(union savefpu *addr)
144 {
145
146 #ifdef I686_CPU
147 if (i386_use_fxsave) {
148 fxsave(&addr->sv_xmm);
149 /* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */
150 fninit();
151 } else
152 #endif /* I686_CPU */
153 fnsave(&addr->sv_87);
154 }
155
156 static int
157 npxdna_notset(struct cpu_info *ci)
158 {
159 panic("npxdna vector not initialized");
160 }
161
162 int (*npxdna_func)(struct cpu_info *) = npxdna_notset;
163 int npxdna_s87(struct cpu_info *);
164 #ifdef I686_CPU
165 int npxdna_xmm(struct cpu_info *);
166 #endif /* I686_CPU */
167 void npxexit(void);
168
169 /*
170 * Special interrupt handlers. Someday intr0-intr15 will be used to count
171 * interrupts. We'll still need a special exception 16 handler. The busy
172 * latch stuff in probintr() can be moved to npxprobe().
173 */
174 void probeintr(void);
175 asm (".text\n\t"
176 "probeintr:\n\t"
177 "ss\n\t"
178 "incl npx_intrs_while_probing\n\t"
179 "pushl %eax\n\t"
180 "movb $0x20,%al # EOI (asm in strings loses cpp features)\n\t"
181 "outb %al,$0xa0 # IO_ICU2\n\t"
182 "outb %al,$0x20 # IO_ICU1\n\t"
183 "movb $0,%al\n\t"
184 "outb %al,$0xf0 # clear BUSY# latch\n\t"
185 "popl %eax\n\t"
186 "iret\n\t");
187
188 void probetrap(void);
189 asm (".text\n\t"
190 "probetrap:\n\t"
191 "ss\n\t"
192 "incl npx_traps_while_probing\n\t"
193 "fnclex\n\t"
194 "iret\n\t");
195
196 static inline int
197 npxprobe1(struct isa_attach_args *ia)
198 {
199 int control;
200 int status;
201
202 ia->ia_iosize = 16;
203 ia->ia_msize = 0;
204
205 /*
206 * Finish resetting the coprocessor, if any. If there is an error
207 * pending, then we may get a bogus IRQ13, but probeintr() will handle
208 * it OK. Bogus halts have never been observed, but we enabled
209 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
210 */
211 fninit();
212 delay(1000); /* wait for any IRQ13 (fwait might hang) */
213
214 /*
215 * Check for a status of mostly zero.
216 */
217 status = 0x5a5a;
218 fnstsw(&status);
219 if ((status & 0xb8ff) == 0) {
220 /*
221 * Good, now check for a proper control word.
222 */
223 control = 0x5a5a;
224 fnstcw(&control);
225 if ((control & 0x1f3f) == 0x033f) {
226 /*
227 * We have an npx, now divide by 0 to see if exception
228 * 16 works.
229 */
230 control &= ~(1 << 2); /* enable divide by 0 trap */
231 fldcw(&control);
232 npx_traps_while_probing = npx_intrs_while_probing = 0;
233 fp_divide_by_0();
234 delay(1);
235 if (npx_traps_while_probing != 0) {
236 /*
237 * Good, exception 16 works.
238 */
239 npx_type = NPX_EXCEPTION;
240 ia->ia_irq = IRQUNK; /* zap the interrupt */
241 i386_fpu_exception = 1;
242 } else if (npx_intrs_while_probing != 0) {
243 /*
244 * Bad, we are stuck with IRQ13.
245 */
246 npx_type = NPX_INTERRUPT;
247 } else {
248 /*
249 * Worse, even IRQ13 is broken.
250 */
251 npx_type = NPX_BROKEN;
252 ia->ia_irq = IRQUNK;
253 }
254 return 1;
255 }
256 }
257
258 /*
259 * Probe failed. There is no usable FPU.
260 */
261 npx_type = NPX_NONE;
262 return 0;
263 }
264
265 /*
266 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait
267 * whether the device exists or not (XXX should be elsewhere). Set flags
268 * to tell npxattach() what to do. Modify device struct if npx doesn't
269 * need to use interrupts. Return 1 if device exists.
270 */
271 int
272 npxprobe(struct device *parent, void *match, void *aux)
273 {
274 struct isa_attach_args *ia = aux;
275 int irq;
276 int result;
277 u_long save_eflags;
278 unsigned save_imen;
279 struct gate_descriptor save_idt_npxintr;
280 struct gate_descriptor save_idt_npxtrap;
281
282 if (cpu_feature & CPUID_FPU) {
283 npx_type = NPX_CPUID;
284 i386_fpu_exception = 1;
285 ia->ia_irq = IRQUNK; /* Don't want the interrupt vector */
286 ia->ia_iosize = 16;
287 ia->ia_msize = 0;
288 return 1;
289 }
290
291 /*
292 * This routine is now just a wrapper for npxprobe1(), to install
293 * special npx interrupt and trap handlers, to enable npx interrupts
294 * and to disable other interrupts. Someday isa_configure() will
295 * install suitable handlers and run with interrupts enabled so we
296 * won't need to do so much here.
297 */
298 irq = NRSVIDT + ia->ia_irq;
299 save_eflags = read_eflags();
300 disable_intr();
301 save_idt_npxintr = idt[irq];
302 save_idt_npxtrap = idt[16];
303 setgate(&idt[irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL);
304 setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL);
305 save_imen = imen;
306 imen = ~((1 << IRQ_SLAVE) | (1 << ia->ia_irq));
307 SET_ICUS();
308
309 /*
310 * Partially reset the coprocessor, if any. Some BIOS's don't reset
311 * it after a warm boot.
312 */
313 outb(0xf1, 0); /* full reset on some systems, NOP on others */
314 delay(1000);
315 outb(0xf0, 0); /* clear BUSY# latch */
316
317 /*
318 * We set CR0 in locore to trap all ESC and WAIT instructions.
319 * We have to turn off the CR0_EM bit temporarily while probing.
320 */
321 lcr0(rcr0() & ~(CR0_EM|CR0_TS));
322 enable_intr();
323 result = npxprobe1(ia);
324 disable_intr();
325 lcr0(rcr0() | (CR0_EM|CR0_TS));
326
327 imen = save_imen;
328 SET_ICUS();
329 idt[irq] = save_idt_npxintr;
330 idt[16] = save_idt_npxtrap;
331 write_eflags(save_eflags);
332 return (result);
333 }
334
335 int npx586bug1(int, int);
336 asm (".text\n\t"
337 "npx586bug1:\n\t"
338 "fildl 4(%esp) # x\n\t"
339 "fildl 8(%esp) # y\n\t"
340 "fld %st(1)\n\t"
341 "fdiv %st(1),%st # x/y\n\t"
342 "fmulp %st,%st(1) # (x/y)*y\n\t"
343 "fsubrp %st,%st(1) # x-(x/y)*y\n\t"
344 "pushl $0\n\t"
345 "fistpl (%esp)\n\t"
346 "popl %eax\n\t"
347 "ret\n\t");
348
349 void
350 npxinit(struct cpu_info *ci)
351 {
352 lcr0(rcr0() & ~(CR0_EM|CR0_TS));
353 fninit();
354 if (npx586bug1(4195835, 3145727) != 0) {
355 i386_fpu_fdivbug = 1;
356 printf("%s: WARNING: Pentium FDIV bug detected!\n",
357 ci->ci_dev.dv_xname);
358 }
359 lcr0(rcr0() | (CR0_TS));
360 }
361
362 /*
363 * Attach routine - announce which it is, and wire into system
364 */
365 void
366 npxattach(struct device *parent, struct device *self, void *aux)
367 {
368 struct npx_softc *sc = (void *)self;
369 struct isa_attach_args *ia = aux;
370
371 switch (npx_type) {
372 case NPX_INTERRUPT:
373 printf("\n");
374 lcr0(rcr0() & ~CR0_NE);
375 sc->sc_ih = isa_intr_establish(ia->ia_ic, ia->ia_irq,
376 IST_EDGE, IPL_NONE, npxintr, 0, sc->sc_dev.dv_xname);
377 break;
378 case NPX_EXCEPTION:
379 printf(": using exception 16\n");
380 break;
381 case NPX_CPUID:
382 printf(": reported by CPUID; using exception 16\n");
383 npx_type = NPX_EXCEPTION;
384 break;
385 case NPX_BROKEN:
386 printf(": error reporting broken; not using\n");
387 npx_type = NPX_NONE;
388 return;
389 case NPX_NONE:
390 return;
391 }
392
393 npxinit(&cpu_info_primary);
394 i386_fpu_present = 1;
395
396 #ifdef I686_CPU
397 if (i386_use_fxsave)
398 npxdna_func = npxdna_xmm;
399 else
400 #endif /* I686_CPU */
401 npxdna_func = npxdna_s87;
402 }
403
404 /*
405 * Record the FPU state and reinitialize it all except for the control word.
406 * Then generate a SIGFPE.
407 *
408 * Reinitializing the state allows naive SIGFPE handlers to longjmp without
409 * doing any fixups.
410 *
411 * XXX there is currently no way to pass the full error state to signal
412 * handlers, and if this is a nested interrupt there is no way to pass even
413 * a status code! So there is no way to have a non-naive SIGFPE handler. At
414 * best a handler could do an fninit followed by an fldcw of a static value.
415 * fnclex would be of little use because it would leave junk on the FPU stack.
416 * Returning from the handler would be even less safe than usual because
417 * IRQ13 exception handling makes exceptions even less precise than usual.
418 */
419 int
420 npxintr(void *arg)
421 {
422 struct cpu_info *ci = curcpu();
423 struct proc *p = ci->ci_fpcurproc;
424 union savefpu *addr;
425 struct intrframe *frame = arg;
426 int code;
427 union sigval sv;
428
429 uvmexp.traps++;
430 IPRINTF(("%s: fp intr\n", ci->ci_dev.dv_xname));
431
432 if (p == NULL || npx_type == NPX_NONE) {
433 /* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */
434 printf("npxintr: p = %lx, curproc = %lx, npx_type = %d\n",
435 (u_long) p, (u_long) curproc, npx_type);
436 panic("npxintr from nowhere");
437 }
438 /*
439 * Clear the interrupt latch.
440 */
441 outb(0xf0, 0);
442 /*
443 * If we're saving, ignore the interrupt. The FPU will happily
444 * generate another one when we restore the state later.
445 */
446 if (ci->ci_fpsaving)
447 return (1);
448
449 #ifdef DIAGNOSTIC
450 /*
451 * At this point, fpcurproc should be curproc. If it wasn't, the TS
452 * bit should be set, and we should have gotten a DNA exception.
453 */
454 if (p != curproc)
455 panic("npxintr: wrong process");
456 #endif
457
458 /*
459 * Find the address of fpcurproc's saved FPU state. (Given the
460 * invariant above, this is always the one in curpcb.)
461 */
462 addr = &p->p_addr->u_pcb.pcb_savefpu;
463 /*
464 * Save state. This does an implied fninit. It had better not halt
465 * the cpu or we'll hang.
466 */
467 fpu_save(addr);
468 fwait();
469 /*
470 * Restore control word (was clobbered by fpu_save).
471 */
472 if (i386_use_fxsave) {
473 fldcw(&addr->sv_xmm.sv_env.en_cw);
474 /*
475 * FNINIT doesn't affect MXCSR or the XMM registers;
476 * no need to re-load MXCSR here.
477 */
478 } else
479 fldcw(&addr->sv_87.sv_env.en_cw);
480 fwait();
481 /*
482 * Remember the exception status word and tag word. The current
483 * (almost fninit'ed) fpu state is in the fpu and the exception
484 * state just saved will soon be junk. However, the implied fninit
485 * doesn't change the error pointers or register contents, and we
486 * preserved the control word and will copy the status and tag
487 * words, so the complete exception state can be recovered.
488 */
489 if (i386_use_fxsave) {
490 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw;
491 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw;
492 } else {
493 addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw;
494 addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw;
495 }
496
497 /*
498 * Pass exception to process. If it's the current process, try to do
499 * it immediately.
500 */
501 if (p == curproc && USERMODE(frame->if_cs, frame->if_eflags)) {
502 /*
503 * Interrupt is essentially a trap, so we can afford to call
504 * the SIGFPE handler (if any) as soon as the interrupt
505 * returns.
506 *
507 * XXX little or nothing is gained from this, and plenty is
508 * lost - the interrupt frame has to contain the trap frame
509 * (this is otherwise only necessary for the rescheduling trap
510 * in doreti, and the frame for that could easily be set up
511 * just before it is used).
512 */
513 p->p_md.md_regs = (struct trapframe *)&frame->if_fs;
514
515 /*
516 * Encode the appropriate code for detailed information on
517 * this exception.
518 */
519 if (i386_use_fxsave)
520 code = x86fpflags_to_siginfo(addr->sv_xmm.sv_ex_sw);
521 else
522 code = x86fpflags_to_siginfo(addr->sv_87.sv_ex_sw);
523 sv.sival_int = frame->if_eip;
524 trapsignal(p, SIGFPE, T_ARITHTRAP, code, sv);
525 } else {
526 /*
527 * Nested interrupt. These losers occur when:
528 * o an IRQ13 is bogusly generated at a bogus time, e.g.:
529 * o immediately after an fnsave or frstor of an
530 * error state.
531 * o a couple of 386 instructions after
532 * "fstpl _memvar" causes a stack overflow.
533 * These are especially nasty when combined with a
534 * trace trap.
535 * o an IRQ13 occurs at the same time as another higher-
536 * priority interrupt.
537 *
538 * Treat them like a true async interrupt.
539 */
540 psignal(p, SIGFPE);
541 }
542
543 return (1);
544 }
545
546 static int
547 x86fpflags_to_siginfo(u_int32_t flags)
548 {
549 int i;
550 static int x86fp_siginfo_table[] = {
551 FPE_FLTINV, /* bit 0 - invalid operation */
552 FPE_FLTRES, /* bit 1 - denormal operand */
553 FPE_FLTDIV, /* bit 2 - divide by zero */
554 FPE_FLTOVF, /* bit 3 - fp overflow */
555 FPE_FLTUND, /* bit 4 - fp underflow */
556 FPE_FLTRES, /* bit 5 - fp precision */
557 FPE_FLTINV, /* bit 6 - stack fault */
558 };
559
560 for (i=0;i < sizeof(x86fp_siginfo_table)/sizeof(int); i++) {
561 if (flags & (1 << i))
562 return (x86fp_siginfo_table[i]);
563 }
564 /* punt if flags not set */
565 return (FPE_FLTINV);
566 }
567
568 /*
569 * Implement device not available (DNA) exception
570 *
571 * If we were the last process to use the FPU, we can simply return.
572 * Otherwise, we save the previous state, if necessary, and restore our last
573 * saved state.
574 */
575
576 /*
577 * XXX It is unclear if the code below is correct in the multiprocessor
578 * XXX case. Check the NetBSD sources once again to be sure.
579 */
580 #ifdef I686_CPU
581 int
582 npxdna_xmm(struct cpu_info *ci)
583 {
584 struct proc *p;
585 int s;
586
587 if (ci->ci_fpsaving) {
588 printf("recursive npx trap; cr0=%x\n", rcr0());
589 return (0);
590 }
591
592 s = splipi(); /* lock out IPI's while we clean house.. */
593
594 #ifdef MULTIPROCESSOR
595 p = ci->ci_curproc;
596 #else
597 p = curproc;
598 #endif
599
600 IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p,
601 (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : ""));
602
603 /*
604 * XXX should have a fast-path here when no save/restore is necessary
605 */
606 /*
607 * Initialize the FPU state to clear any exceptions. If someone else
608 * was using the FPU, save their state (which does an implicit
609 * initialization).
610 */
611 if (ci->ci_fpcurproc != NULL) {
612 IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname,
613 (u_long)ci->ci_fpcurproc));
614 npxsave_cpu(ci, 1);
615 } else {
616 clts();
617 IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname));
618 fninit();
619 fwait();
620 stts();
621 }
622 splx(s);
623
624 IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname));
625 KDASSERT(ci->ci_fpcurproc == NULL);
626 #ifndef MULTIPROCESSOR
627 KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
628 #else
629 if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
630 npxsave_proc(p, 1);
631 #endif
632 p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
633 clts();
634 s = splipi();
635 ci->ci_fpcurproc = p;
636 p->p_addr->u_pcb.pcb_fpcpu = ci;
637 splx(s);
638 uvmexp.fpswtch++;
639
640 if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
641 fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm.sv_env.en_cw);
642 p->p_md.md_flags |= MDP_USEDFPU;
643 } else {
644 static double zero = 0.0;
645
646 /*
647 * amd fpu does not restore fip, fdp, fop on fxrstor
648 * thus leaking other process's execution history.
649 */
650 fnclex();
651 __asm __volatile("ffree %%st(7)\n\tfld %0" : : "m" (zero));
652 fxrstor(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm);
653 }
654
655 return (1);
656 }
657 #endif /* I686_CPU */
658
659 int
660 npxdna_s87(struct cpu_info *ci)
661 {
662 struct proc *p;
663 int s;
664
665 KDASSERT(i386_use_fxsave == 0);
666
667 if (ci->ci_fpsaving) {
668 printf("recursive npx trap; cr0=%x\n", rcr0());
669 return (0);
670 }
671
672 s = splipi(); /* lock out IPI's while we clean house.. */
673 #ifdef MULTIPROCESSOR
674 p = ci->ci_curproc;
675 #else
676 p = curproc;
677 #endif
678
679 IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p,
680 (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : ""));
681
682 /*
683 * If someone else was using our FPU, save their state (which does an
684 * implicit initialization); otherwise, initialize the FPU state to
685 * clear any exceptions.
686 */
687 if (ci->ci_fpcurproc != NULL) {
688 IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname,
689 (u_long)ci->ci_fpcurproc));
690 npxsave_cpu(ci, 1);
691 } else {
692 clts();
693 IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname));
694 fninit();
695 fwait();
696 stts();
697 }
698 splx(s);
699
700 IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname));
701 KDASSERT(ci->ci_fpcurproc == NULL);
702 #ifndef MULTIPROCESSOR
703 KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
704 #else
705 if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
706 npxsave_proc(p, 1);
707 #endif
708 p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
709 clts();
710 s = splipi();
711 ci->ci_fpcurproc = p;
712 p->p_addr->u_pcb.pcb_fpcpu = ci;
713 splx(s);
714 uvmexp.fpswtch++;
715
716 if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
717 fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_87.sv_env.en_cw);
718 p->p_md.md_flags |= MDP_USEDFPU;
719 } else {
720 /*
721 * The following frstor may cause an IRQ13 when the state being
722 * restored has a pending error. The error will appear to have
723 * been triggered by the current (npx) user instruction even
724 * when that instruction is a no-wait instruction that should
725 * not trigger an error (e.g., fnclex). On at least one 486
726 * system all of the no-wait instructions are broken the same
727 * as frstor, so our treatment does not amplify the breakage.
728 * On at least one 386/Cyrix 387 system, fnclex works correctly
729 * while frstor and fnsave are broken, so our treatment breaks
730 * fnclex if it is the first FPU instruction after a context
731 * switch.
732 */
733 frstor(&p->p_addr->u_pcb.pcb_savefpu.sv_87);
734 }
735
736 return (1);
737 }
738
739 /*
740 * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU
741 * immediately, we clear fpcurproc and turn on CR0_TS to force a DNA and a
742 * reload of the FPU state the next time we try to use it. This routine
743 * is only called when forking, core dumping, or debugging, or swapping,
744 * so the lazy reload at worst forces us to trap once per fork(), and at best
745 * saves us a reload once per fork().
746 */
747 void
748 npxsave_cpu(struct cpu_info *ci, int save)
749 {
750 struct proc *p;
751 int s;
752
753 KDASSERT(ci == curcpu());
754
755 p = ci->ci_fpcurproc;
756 if (p == NULL)
757 return;
758
759 IPRINTF(("%s: fp cpu %s %lx\n", ci->ci_dev.dv_xname,
760 save ? "save" : "flush", (u_long)p));
761
762 if (save) {
763 #ifdef DIAGNOSTIC
764 if (ci->ci_fpsaving != 0)
765 panic("npxsave_cpu: recursive save!");
766 #endif
767 /*
768 * Set ci->ci_fpsaving, so that any pending exception will be
769 * thrown away. (It will be caught again if/when the FPU
770 * state is restored.)
771 *
772 * XXX on i386 and earlier, this routine should always be
773 * called at spl0; if it might called with the NPX interrupt
774 * masked, it would be necessary to forcibly unmask the NPX
775 * interrupt so that it could succeed.
776 * XXX this is irrelevant on 486 and above (systems
777 * which report FP failures via traps rather than irq13).
778 * XXX punting for now..
779 */
780 clts();
781 ci->ci_fpsaving = 1;
782 fpu_save(&p->p_addr->u_pcb.pcb_savefpu);
783 ci->ci_fpsaving = 0;
784 /* It is unclear if this is needed. */
785 fwait();
786 }
787
788 /*
789 * We set the TS bit in the saved CR0 for this process, so that it
790 * will get a DNA exception on any FPU instruction and force a reload.
791 */
792 stts();
793 p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
794
795 s = splipi();
796 p->p_addr->u_pcb.pcb_fpcpu = NULL;
797 ci->ci_fpcurproc = NULL;
798 splx(s);
799 }
800
801 /*
802 * Save p's FPU state, which may be on this processor or another processor.
803 */
804 void
805 npxsave_proc(struct proc *p, int save)
806 {
807 struct cpu_info *ci = curcpu();
808 struct cpu_info *oci;
809
810 KDASSERT(p->p_addr != NULL);
811
812 oci = p->p_addr->u_pcb.pcb_fpcpu;
813 if (oci == NULL)
814 return;
815
816 IPRINTF(("%s: fp proc %s %lx\n", ci->ci_dev.dv_xname,
817 save ? "save" : "flush", (u_long)p));
818
819 #if defined(MULTIPROCESSOR)
820 if (oci == ci) {
821 int s = splipi();
822 npxsave_cpu(ci, save);
823 splx(s);
824 } else {
825 #ifdef DIAGNOSTIC
826 int spincount;
827 #endif
828
829 IPRINTF(("%s: fp ipi to %s %s %lx\n", ci->ci_dev.dv_xname,
830 oci->ci_dev.dv_xname, save ? "save" : "flush", (u_long)p));
831
832 i386_send_ipi(oci,
833 save ? I386_IPI_SYNCH_FPU : I386_IPI_FLUSH_FPU);
834
835 #ifdef DIAGNOSTIC
836 spincount = 0;
837 #endif
838 while (p->p_addr->u_pcb.pcb_fpcpu != NULL) {
839 SPINLOCK_SPIN_HOOK;
840 #ifdef DIAGNOSTIC
841 if (spincount++ > 100000000)
842 panic("%s: fp_save ipi didn't (%s)",
843 ci->ci_dev.dv_xname, oci->ci_dev.dv_xname);
844 #endif
845 }
846 }
847 #else
848 KASSERT(ci->ci_fpcurproc == p);
849 npxsave_cpu(ci, save);
850 #endif
851 }