1 /* $OpenBSD: amd64errata.c,v 1.1 2007/02/17 17:38:37 tom Exp $ */
2 /* $NetBSD: errata.c,v 1.6 2007/02/05 21:05:45 ad Exp $ */
3
4 /*-
5 * Copyright (c) 2007 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Detect, report on, and work around known errata with AMD amd64 CPUs.
42 *
43 * This is generalised because there are quite a few problems that the
44 * BIOS can patch via MSR, but it is not known if the OS can patch these
45 * yet. The list is expected to grow over time.
46 *
47 * The data here are from: Revision Guide for AMD Athlon 64 and
48 * AMD Opteron Processors, Publication #25759, Revision: 3.69,
49 * Issue Date: September 2006
50 */
51
52 #include <sys/param.h>
53
54 #include <sys/types.h>
55 #include <sys/systm.h>
56
57 #include <machine/cpu.h>
58 #include <machine/cpufunc.h>
59 #include <machine/specialreg.h>
60
61
62 typedef struct errata {
63 u_short e_num;
64 u_short e_reported;
65 u_int e_data1;
66 const uint8_t *e_set;
67 int (*e_act)(struct cpu_info *, struct errata *);
68 uint64_t e_data2;
69 } errata_t;
70
71 typedef enum cpurev {
72 BH_E4, CH_CG, CH_D0, DH_CG, DH_D0, DH_E3, DH_E6, JH_E1,
73 JH_E6, SH_B0, SH_B3, SH_C0, SH_CG, SH_D0, SH_E4, SH_E5,
74 OINK
75 } cpurev_t;
76
77 static const u_int cpurevs[] = {
78 BH_E4, 0x0020fb1, CH_CG, 0x0000f82, CH_CG, 0x0000fb2,
79 CH_D0, 0x0010f80, CH_D0, 0x0010fb0, DH_CG, 0x0000fc0,
80 DH_CG, 0x0000fe0, DH_CG, 0x0000ff0, DH_D0, 0x0010fc0,
81 DH_D0, 0x0010ff0, DH_E3, 0x0020fc0, DH_E3, 0x0020ff0,
82 DH_E6, 0x0020fc2, DH_E6, 0x0020ff2, JH_E1, 0x0020f10,
83 JH_E6, 0x0020f12, JH_E6, 0x0020f32, SH_B0, 0x0000f40,
84 SH_B3, 0x0000f51, SH_C0, 0x0000f48, SH_C0, 0x0000f58,
85 SH_CG, 0x0000f4a, SH_CG, 0x0000f5a, SH_CG, 0x0000f7a,
86 SH_D0, 0x0010f40, SH_D0, 0x0010f50, SH_D0, 0x0010f70,
87 SH_E4, 0x0020f51, SH_E4, 0x0020f71, SH_E5, 0x0020f42,
88 OINK
89 };
90
91 static const uint8_t amd64_errata_set1[] = {
92 SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, OINK
93 };
94
95 static const uint8_t amd64_errata_set2[] = {
96 SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
97 };
98
99 static const uint8_t amd64_errata_set3[] = {
100 JH_E1, DH_E3, OINK
101 };
102
103 static const uint8_t amd64_errata_set4[] = {
104 SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, JH_E1,
105 DH_E3, SH_E4, BH_E4, SH_E5, DH_E6, JH_E6, OINK
106 };
107
108 static const uint8_t amd64_errata_set5[] = {
109 SH_B3, OINK
110 };
111
112 static const uint8_t amd64_errata_set6[] = {
113 SH_C0, SH_CG, DH_CG, CH_CG, OINK
114 };
115
116 static const uint8_t amd64_errata_set7[] = {
117 SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
118 };
119
120 static const uint8_t amd64_errata_set8[] = {
121 BH_E4, CH_CG, CH_CG, CH_D0, CH_D0, DH_CG, DH_CG, DH_CG,
122 DH_D0, DH_D0, DH_E3, DH_E3, DH_E6, DH_E6, JH_E1, JH_E6,
123 JH_E6, SH_B0, SH_B3, SH_C0, SH_C0, SH_CG, SH_CG, SH_CG,
124 SH_D0, SH_D0, SH_D0, SH_E4, SH_E4, SH_E5, OINK
125 };
126
127 static int amd64_errata_setmsr(struct cpu_info *, errata_t *);
128 static int amd64_errata_testmsr(struct cpu_info *, errata_t *);
129
130 static errata_t errata[] = {
131 /*
132 * 81: Cache Coherency Problem with Hardware Prefetching
133 * and Streaming Stores
134 */
135 {
136 81, 0, MSR_DC_CFG, amd64_errata_set5,
137 amd64_errata_testmsr, DC_CFG_DIS_SMC_CHK_BUF
138 },
139 /*
140 * 86: DRAM Data Masking Feature Can Cause ECC Failures
141 */
142 {
143 86, 0, MSR_NB_CFG, amd64_errata_set1,
144 amd64_errata_testmsr, NB_CFG_DISDATMSK
145 },
146 /*
147 * 89: Potential Deadlock With Locked Transactions
148 */
149 {
150 89, 0, MSR_NB_CFG, amd64_errata_set8,
151 amd64_errata_testmsr, NB_CFG_DISIOREQLOCK
152 },
153 /*
154 * 94: Sequential Prefetch Feature May Cause Incorrect
155 * Processor Operation
156 */
157 {
158 94, 0, MSR_IC_CFG, amd64_errata_set1,
159 amd64_errata_testmsr, IC_CFG_DIS_SEQ_PREFETCH
160 },
161 /*
162 * 97: 128-Bit Streaming Stores May Cause Coherency
163 * Failure
164 *
165 * XXX "This workaround must not be applied to processors
166 * prior to revision C0." We don't apply it, but if it
167 * can't be applied, it shouldn't be reported.
168 */
169 {
170 97, 0, MSR_DC_CFG, amd64_errata_set6,
171 amd64_errata_testmsr, DC_CFG_DIS_CNV_WC_SSO
172 },
173 /*
174 * 104: DRAM Data Masking Feature Causes ChipKill ECC
175 * Failures When Enabled With x8/x16 DRAM Devices
176 */
177 {
178 104, 0, MSR_NB_CFG, amd64_errata_set7,
179 amd64_errata_testmsr, NB_CFG_DISDATMSK
180 },
181 /*
182 * 113: Enhanced Write-Combining Feature Causes System Hang
183 */
184 {
185 113, 0, MSR_BU_CFG, amd64_errata_set3,
186 amd64_errata_setmsr, BU_CFG_WBENHWSBDIS
187 },
188 #ifdef MULTIPROCESSOR
189 /*
190 * 69: Multiprocessor Coherency Problem with Hardware
191 * Prefetch Mechanism
192 */
193 {
194 69, 0, MSR_BU_CFG, amd64_errata_set5,
195 amd64_errata_setmsr, BU_CFG_WBPFSMCCHKDIS
196 },
197 /*
198 * 101: DRAM Scrubber May Cause Data Corruption When Using
199 * Node-Interleaved Memory
200 */
201 {
202 101, 0, 0, amd64_errata_set2,
203 NULL, 0
204 },
205 /*
206 * 106: Potential Deadlock with Tightly Coupled Semaphores
207 * in an MP System
208 */
209 {
210 106, 0, MSR_LS_CFG, amd64_errata_set2,
211 amd64_errata_testmsr, LS_CFG_DIS_LS2_SQUISH
212 },
213 /*
214 * 107: Possible Multiprocessor Coherency Problem with
215 * Setting Page Table A/D Bits
216 */
217 {
218 107, 0, MSR_BU_CFG, amd64_errata_set2,
219 amd64_errata_testmsr, BU_CFG_THRL2IDXCMPDIS
220 },
221 #if 0
222 /*
223 * 122: TLB Flush Filter May Cause Coherency Problem in
224 * Multiprocessor Systems
225 */
226 {
227 122, 0, MSR_HWCR, amd64_errata_set4,
228 amd64_errata_setmsr, HWCR_FFDIS
229 },
230 #endif
231 #endif /* MULTIPROCESSOR */
232 };
233
234 static int
235 amd64_errata_testmsr(struct cpu_info *ci, errata_t *e)
236 {
237 uint64_t val;
238
239 (void)ci;
240
241 val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
242 if ((val & e->e_data2) != 0)
243 return 0; /* not found */
244
245 e->e_reported = 1;
246 return 1; /* found */
247 }
248
249 static int
250 amd64_errata_setmsr(struct cpu_info *ci, errata_t *e)
251 {
252 uint64_t val;
253
254 (void)ci;
255
256 val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
257 if ((val & e->e_data2) != 0)
258 return 0; /* not found */
259
260 wrmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE, val | e->e_data2);
261
262 #ifdef ERRATA_DEBUG
263 printf("ERRATA: writing a fix\n");
264 val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
265 if ((val & e->e_data2) != 0)
266 printf("ERRATA: fix seems to have worked!\n");
267 #endif
268
269 e->e_reported = 1;
270 return 2; /* found and fixed */
271 }
272
273 void
274 amd64_errata(struct cpu_info *ci)
275 {
276 errata_t *e, *ex;
277 cpurev_t rev;
278 int i, j;
279 int rc;
280 int found = 0;
281 int corrected = 0;
282 u_int32_t regs[4];
283
284 cpuid(0x80000001, regs);
285
286 for (i = 0; ; i += 2) {
287 if ((rev = cpurevs[i]) == OINK) {
288 #ifdef ERRATA_DEBUG
289 printf("ERRATA: this CPU ok\n");
290 #endif
291 return;
292 }
293 if (cpurevs[i + 1] == regs[0]) {
294 #ifdef ERRATA_DEBUG
295 printf("ERRATA: this CPU has errata\n");
296 #endif
297 break;
298 }
299 }
300
301 ex = errata + sizeof(errata) / sizeof(errata[0]);
302
303 /* Reset e_reporteds (for multiple CPUs) */
304 for (e = errata; e < ex; e++)
305 e->e_reported = 0;
306
307 for (e = errata; e < ex; e++) {
308 if (e->e_reported)
309 continue;
310 if (e->e_set != NULL) {
311 for (j = 0; e->e_set[j] != OINK; j++)
312 if (e->e_set[j] == rev)
313 break;
314 if (e->e_set[j] == OINK)
315 continue;
316 }
317
318 #ifdef ERRATA_DEBUG
319 printf("%s: testing for erratum %d\n",
320 ci->ci_dev.dv_xname, e->e_num);
321 #endif
322
323 /*
324 * If we have an action routine, call it, otherwise
325 * the default is that this erratum is present.
326 */
327 rc = (e->e_act == NULL) ? 1 : (*e->e_act)(ci, e);
328
329 if (rc == 0) /* not found */
330 continue;
331 if (rc == 1)
332 found++;
333 if (rc == 2)
334 corrected++;
335
336 e->e_reported = rc;
337
338 #ifdef ERRATA_DEBUG
339 printf("%s: erratum %d present%s\n",
340 ci->ci_dev.dv_xname, e->e_num,
341 (rc == 2) ? " and patched" : "");
342 #endif
343 }
344
345 #define ERRATA_VERBOSE
346 #ifdef ERRATA_VERBOSE
347 if (corrected) {
348 int first = 1;
349
350 /* Print out found and corrected */
351 printf("%s: AMD %s", ci->ci_dev.dv_xname,
352 (corrected == 1) ? "erratum" : "errata");
353 for (e = errata; e < ex; e++) {
354 if (e->e_reported == 2) {
355 if (! first)
356 printf(",");
357 printf(" %d", e->e_num);
358 first = 0;
359 }
360 }
361 printf(" detected and fixed\n");
362 }
363 #endif
364
365 if (found) {
366 int first = 1;
367
368 /* Print out found but not corrected */
369 printf("%s: AMD %s", ci->ci_dev.dv_xname,
370 (found == 1) ? "erratum" : "errata");
371 for (e = errata; e < ex; e++) {
372 if (e->e_reported == 1) {
373 if (! first)
374 printf(",");
375 printf(" %d", e->e_num);
376 first = 0;
377 }
378 }
379 printf(" present, BIOS upgrade may be required\n");
380 }
381 }