1 | /* |
2 | * Intel 3200/3210 Memory Controller kernel module |
3 | * Copyright (C) 2008-2009 Akamai Technologies, Inc. |
4 | * Portions by Hitoshi Mitake <h.mitake@gmail.com>. |
5 | * |
6 | * This file may be distributed under the terms of the |
7 | * GNU General Public License. |
8 | */ |
9 | |
10 | #include <linux/module.h> |
11 | #include <linux/init.h> |
12 | #include <linux/pci.h> |
13 | #include <linux/pci_ids.h> |
14 | #include <linux/edac.h> |
15 | #include <linux/io.h> |
16 | #include "edac_module.h" |
17 | |
18 | #include <linux/io-64-nonatomic-lo-hi.h> |
19 | |
20 | #define EDAC_MOD_STR "i3200_edac" |
21 | |
22 | #define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0 |
23 | |
24 | #define I3200_DIMMS 4 |
25 | #define I3200_RANKS 8 |
26 | #define I3200_RANKS_PER_CHANNEL 4 |
27 | #define I3200_CHANNELS 2 |
28 | |
29 | /* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */ |
30 | |
31 | #define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */ |
32 | #define I3200_MCHBAR_HIGH 0x4c |
33 | #define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */ |
34 | #define I3200_MMR_WINDOW_SIZE 16384 |
35 | |
36 | #define I3200_TOM 0xa0 /* Top of Memory (16b) |
37 | * |
38 | * 15:10 reserved |
39 | * 9:0 total populated physical memory |
40 | */ |
41 | #define I3200_TOM_MASK 0x3ff /* bits 9:0 */ |
42 | #define I3200_TOM_SHIFT 26 /* 64MiB grain */ |
43 | |
44 | #define I3200_ERRSTS 0xc8 /* Error Status Register (16b) |
45 | * |
46 | * 15 reserved |
47 | * 14 Isochronous TBWRR Run Behind FIFO Full |
48 | * (ITCV) |
49 | * 13 Isochronous TBWRR Run Behind FIFO Put |
50 | * (ITSTV) |
51 | * 12 reserved |
52 | * 11 MCH Thermal Sensor Event |
53 | * for SMI/SCI/SERR (GTSE) |
54 | * 10 reserved |
55 | * 9 LOCK to non-DRAM Memory Flag (LCKF) |
56 | * 8 reserved |
57 | * 7 DRAM Throttle Flag (DTF) |
58 | * 6:2 reserved |
59 | * 1 Multi-bit DRAM ECC Error Flag (DMERR) |
60 | * 0 Single-bit DRAM ECC Error Flag (DSERR) |
61 | */ |
62 | #define I3200_ERRSTS_UE 0x0002 |
63 | #define I3200_ERRSTS_CE 0x0001 |
64 | #define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE) |
65 | |
66 | |
67 | /* Intel MMIO register space - device 0 function 0 - MMR space */ |
68 | |
69 | #define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4) |
70 | * |
71 | * 15:10 reserved |
72 | * 9:0 Channel 0 DRAM Rank Boundary Address |
73 | */ |
74 | #define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */ |
75 | #define I3200_DRB_MASK 0x3ff /* bits 9:0 */ |
76 | #define I3200_DRB_SHIFT 26 /* 64MiB grain */ |
77 | |
78 | #define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b) |
79 | * |
80 | * 63:48 Error Column Address (ERRCOL) |
81 | * 47:32 Error Row Address (ERRROW) |
82 | * 31:29 Error Bank Address (ERRBANK) |
83 | * 28:27 Error Rank Address (ERRRANK) |
84 | * 26:24 reserved |
85 | * 23:16 Error Syndrome (ERRSYND) |
86 | * 15: 2 reserved |
87 | * 1 Multiple Bit Error Status (MERRSTS) |
88 | * 0 Correctable Error Status (CERRSTS) |
89 | */ |
90 | #define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */ |
91 | #define I3200_ECCERRLOG_CE 0x1 |
92 | #define I3200_ECCERRLOG_UE 0x2 |
93 | #define I3200_ECCERRLOG_RANK_BITS 0x18000000 |
94 | #define I3200_ECCERRLOG_RANK_SHIFT 27 |
95 | #define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000 |
96 | #define I3200_ECCERRLOG_SYNDROME_SHIFT 16 |
97 | #define I3200_CAPID0 0xe0 /* P.95 of spec for details */ |
98 | |
99 | struct i3200_priv { |
100 | void __iomem *window; |
101 | }; |
102 | |
103 | static int nr_channels; |
104 | |
105 | static int how_many_channels(struct pci_dev *pdev) |
106 | { |
107 | int n_channels; |
108 | |
109 | unsigned char capid0_8b; /* 8th byte of CAPID0 */ |
110 | |
111 | pci_read_config_byte(dev: pdev, I3200_CAPID0 + 8, val: &capid0_8b); |
112 | |
113 | if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ |
114 | edac_dbg(0, "In single channel mode\n" ); |
115 | n_channels = 1; |
116 | } else { |
117 | edac_dbg(0, "In dual channel mode\n" ); |
118 | n_channels = 2; |
119 | } |
120 | |
121 | if (capid0_8b & 0x10) /* check if both channels are filled */ |
122 | edac_dbg(0, "2 DIMMS per channel disabled\n" ); |
123 | else |
124 | edac_dbg(0, "2 DIMMS per channel enabled\n" ); |
125 | |
126 | return n_channels; |
127 | } |
128 | |
129 | static unsigned long eccerrlog_syndrome(u64 log) |
130 | { |
131 | return (log & I3200_ECCERRLOG_SYNDROME_BITS) >> |
132 | I3200_ECCERRLOG_SYNDROME_SHIFT; |
133 | } |
134 | |
135 | static int eccerrlog_row(int channel, u64 log) |
136 | { |
137 | u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >> |
138 | I3200_ECCERRLOG_RANK_SHIFT); |
139 | return rank | (channel * I3200_RANKS_PER_CHANNEL); |
140 | } |
141 | |
142 | enum i3200_chips { |
143 | I3200 = 0, |
144 | }; |
145 | |
146 | struct i3200_dev_info { |
147 | const char *ctl_name; |
148 | }; |
149 | |
150 | struct i3200_error_info { |
151 | u16 errsts; |
152 | u16 errsts2; |
153 | u64 eccerrlog[I3200_CHANNELS]; |
154 | }; |
155 | |
156 | static const struct i3200_dev_info i3200_devs[] = { |
157 | [I3200] = { |
158 | .ctl_name = "i3200" |
159 | }, |
160 | }; |
161 | |
162 | static struct pci_dev *mci_pdev; |
163 | static int i3200_registered = 1; |
164 | |
165 | |
166 | static void i3200_clear_error_info(struct mem_ctl_info *mci) |
167 | { |
168 | struct pci_dev *pdev; |
169 | |
170 | pdev = to_pci_dev(mci->pdev); |
171 | |
172 | /* |
173 | * Clear any error bits. |
174 | * (Yes, we really clear bits by writing 1 to them.) |
175 | */ |
176 | pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS, |
177 | I3200_ERRSTS_BITS); |
178 | } |
179 | |
180 | static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci, |
181 | struct i3200_error_info *info) |
182 | { |
183 | struct pci_dev *pdev; |
184 | struct i3200_priv *priv = mci->pvt_info; |
185 | void __iomem *window = priv->window; |
186 | |
187 | pdev = to_pci_dev(mci->pdev); |
188 | |
189 | /* |
190 | * This is a mess because there is no atomic way to read all the |
191 | * registers at once and the registers can transition from CE being |
192 | * overwritten by UE. |
193 | */ |
194 | pci_read_config_word(dev: pdev, I3200_ERRSTS, val: &info->errsts); |
195 | if (!(info->errsts & I3200_ERRSTS_BITS)) |
196 | return; |
197 | |
198 | info->eccerrlog[0] = readq(addr: window + I3200_C0ECCERRLOG); |
199 | if (nr_channels == 2) |
200 | info->eccerrlog[1] = readq(addr: window + I3200_C1ECCERRLOG); |
201 | |
202 | pci_read_config_word(dev: pdev, I3200_ERRSTS, val: &info->errsts2); |
203 | |
204 | /* |
205 | * If the error is the same for both reads then the first set |
206 | * of reads is valid. If there is a change then there is a CE |
207 | * with no info and the second set of reads is valid and |
208 | * should be UE info. |
209 | */ |
210 | if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { |
211 | info->eccerrlog[0] = readq(addr: window + I3200_C0ECCERRLOG); |
212 | if (nr_channels == 2) |
213 | info->eccerrlog[1] = readq(addr: window + I3200_C1ECCERRLOG); |
214 | } |
215 | |
216 | i3200_clear_error_info(mci); |
217 | } |
218 | |
219 | static void i3200_process_error_info(struct mem_ctl_info *mci, |
220 | struct i3200_error_info *info) |
221 | { |
222 | int channel; |
223 | u64 log; |
224 | |
225 | if (!(info->errsts & I3200_ERRSTS_BITS)) |
226 | return; |
227 | |
228 | if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { |
229 | edac_mc_handle_error(type: HW_EVENT_ERR_UNCORRECTED, mci, error_count: 1, page_frame_number: 0, offset_in_page: 0, syndrome: 0, |
230 | top_layer: -1, mid_layer: -1, low_layer: -1, msg: "UE overwrote CE" , other_detail: "" ); |
231 | info->errsts = info->errsts2; |
232 | } |
233 | |
234 | for (channel = 0; channel < nr_channels; channel++) { |
235 | log = info->eccerrlog[channel]; |
236 | if (log & I3200_ECCERRLOG_UE) { |
237 | edac_mc_handle_error(type: HW_EVENT_ERR_UNCORRECTED, mci, error_count: 1, |
238 | page_frame_number: 0, offset_in_page: 0, syndrome: 0, |
239 | top_layer: eccerrlog_row(channel, log), |
240 | mid_layer: -1, low_layer: -1, |
241 | msg: "i3000 UE" , other_detail: "" ); |
242 | } else if (log & I3200_ECCERRLOG_CE) { |
243 | edac_mc_handle_error(type: HW_EVENT_ERR_CORRECTED, mci, error_count: 1, |
244 | page_frame_number: 0, offset_in_page: 0, syndrome: eccerrlog_syndrome(log), |
245 | top_layer: eccerrlog_row(channel, log), |
246 | mid_layer: -1, low_layer: -1, |
247 | msg: "i3000 CE" , other_detail: "" ); |
248 | } |
249 | } |
250 | } |
251 | |
252 | static void i3200_check(struct mem_ctl_info *mci) |
253 | { |
254 | struct i3200_error_info info; |
255 | |
256 | i3200_get_and_clear_error_info(mci, info: &info); |
257 | i3200_process_error_info(mci, info: &info); |
258 | } |
259 | |
260 | static void __iomem *i3200_map_mchbar(struct pci_dev *pdev) |
261 | { |
262 | union { |
263 | u64 mchbar; |
264 | struct { |
265 | u32 mchbar_low; |
266 | u32 mchbar_high; |
267 | }; |
268 | } u; |
269 | void __iomem *window; |
270 | |
271 | pci_read_config_dword(dev: pdev, I3200_MCHBAR_LOW, val: &u.mchbar_low); |
272 | pci_read_config_dword(dev: pdev, I3200_MCHBAR_HIGH, val: &u.mchbar_high); |
273 | u.mchbar &= I3200_MCHBAR_MASK; |
274 | |
275 | if (u.mchbar != (resource_size_t)u.mchbar) { |
276 | printk(KERN_ERR |
277 | "i3200: mmio space beyond accessible range (0x%llx)\n" , |
278 | (unsigned long long)u.mchbar); |
279 | return NULL; |
280 | } |
281 | |
282 | window = ioremap(offset: u.mchbar, I3200_MMR_WINDOW_SIZE); |
283 | if (!window) |
284 | printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n" , |
285 | (unsigned long long)u.mchbar); |
286 | |
287 | return window; |
288 | } |
289 | |
290 | |
291 | static void i3200_get_drbs(void __iomem *window, |
292 | u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) |
293 | { |
294 | int i; |
295 | |
296 | for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { |
297 | drbs[0][i] = readw(addr: window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; |
298 | drbs[1][i] = readw(addr: window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; |
299 | |
300 | edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n" , i, drbs[0][i], i, drbs[1][i]); |
301 | } |
302 | } |
303 | |
304 | static bool i3200_is_stacked(struct pci_dev *pdev, |
305 | u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) |
306 | { |
307 | u16 tom; |
308 | |
309 | pci_read_config_word(dev: pdev, I3200_TOM, val: &tom); |
310 | tom &= I3200_TOM_MASK; |
311 | |
312 | return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom; |
313 | } |
314 | |
315 | static unsigned long drb_to_nr_pages( |
316 | u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked, |
317 | int channel, int rank) |
318 | { |
319 | int n; |
320 | |
321 | n = drbs[channel][rank]; |
322 | if (!n) |
323 | return 0; |
324 | |
325 | if (rank > 0) |
326 | n -= drbs[channel][rank - 1]; |
327 | if (stacked && (channel == 1) && |
328 | drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1]) |
329 | n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1]; |
330 | |
331 | n <<= (I3200_DRB_SHIFT - PAGE_SHIFT); |
332 | return n; |
333 | } |
334 | |
335 | static int i3200_probe1(struct pci_dev *pdev, int dev_idx) |
336 | { |
337 | int rc; |
338 | int i, j; |
339 | struct mem_ctl_info *mci = NULL; |
340 | struct edac_mc_layer layers[2]; |
341 | u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]; |
342 | bool stacked; |
343 | void __iomem *window; |
344 | struct i3200_priv *priv; |
345 | |
346 | edac_dbg(0, "MC:\n" ); |
347 | |
348 | window = i3200_map_mchbar(pdev); |
349 | if (!window) |
350 | return -ENODEV; |
351 | |
352 | i3200_get_drbs(window, drbs); |
353 | nr_channels = how_many_channels(pdev); |
354 | |
355 | layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; |
356 | layers[0].size = I3200_DIMMS; |
357 | layers[0].is_virt_csrow = true; |
358 | layers[1].type = EDAC_MC_LAYER_CHANNEL; |
359 | layers[1].size = nr_channels; |
360 | layers[1].is_virt_csrow = false; |
361 | mci = edac_mc_alloc(mc_num: 0, ARRAY_SIZE(layers), layers, |
362 | sz_pvt: sizeof(struct i3200_priv)); |
363 | if (!mci) |
364 | return -ENOMEM; |
365 | |
366 | edac_dbg(3, "MC: init mci\n" ); |
367 | |
368 | mci->pdev = &pdev->dev; |
369 | mci->mtype_cap = MEM_FLAG_DDR2; |
370 | |
371 | mci->edac_ctl_cap = EDAC_FLAG_SECDED; |
372 | mci->edac_cap = EDAC_FLAG_SECDED; |
373 | |
374 | mci->mod_name = EDAC_MOD_STR; |
375 | mci->ctl_name = i3200_devs[dev_idx].ctl_name; |
376 | mci->dev_name = pci_name(pdev); |
377 | mci->edac_check = i3200_check; |
378 | mci->ctl_page_to_phys = NULL; |
379 | priv = mci->pvt_info; |
380 | priv->window = window; |
381 | |
382 | stacked = i3200_is_stacked(pdev, drbs); |
383 | |
384 | /* |
385 | * The dram rank boundary (DRB) reg values are boundary addresses |
386 | * for each DRAM rank with a granularity of 64MB. DRB regs are |
387 | * cumulative; the last one will contain the total memory |
388 | * contained in all ranks. |
389 | */ |
390 | for (i = 0; i < I3200_DIMMS; i++) { |
391 | unsigned long nr_pages; |
392 | |
393 | for (j = 0; j < nr_channels; j++) { |
394 | struct dimm_info *dimm = edac_get_dimm(mci, layer0: i, layer1: j, layer2: 0); |
395 | |
396 | nr_pages = drb_to_nr_pages(drbs, stacked, channel: j, rank: i); |
397 | if (nr_pages == 0) |
398 | continue; |
399 | |
400 | edac_dbg(0, "csrow %d, channel %d%s, size = %ld MiB\n" , i, j, |
401 | stacked ? " (stacked)" : "" , PAGES_TO_MiB(nr_pages)); |
402 | |
403 | dimm->nr_pages = nr_pages; |
404 | dimm->grain = nr_pages << PAGE_SHIFT; |
405 | dimm->mtype = MEM_DDR2; |
406 | dimm->dtype = DEV_UNKNOWN; |
407 | dimm->edac_mode = EDAC_UNKNOWN; |
408 | } |
409 | } |
410 | |
411 | i3200_clear_error_info(mci); |
412 | |
413 | rc = -ENODEV; |
414 | if (edac_mc_add_mc(mci)) { |
415 | edac_dbg(3, "MC: failed edac_mc_add_mc()\n" ); |
416 | goto fail; |
417 | } |
418 | |
419 | /* get this far and it's successful */ |
420 | edac_dbg(3, "MC: success\n" ); |
421 | return 0; |
422 | |
423 | fail: |
424 | iounmap(addr: window); |
425 | if (mci) |
426 | edac_mc_free(mci); |
427 | |
428 | return rc; |
429 | } |
430 | |
431 | static int i3200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) |
432 | { |
433 | int rc; |
434 | |
435 | edac_dbg(0, "MC:\n" ); |
436 | |
437 | if (pci_enable_device(dev: pdev) < 0) |
438 | return -EIO; |
439 | |
440 | rc = i3200_probe1(pdev, dev_idx: ent->driver_data); |
441 | if (!mci_pdev) |
442 | mci_pdev = pci_dev_get(dev: pdev); |
443 | |
444 | return rc; |
445 | } |
446 | |
447 | static void i3200_remove_one(struct pci_dev *pdev) |
448 | { |
449 | struct mem_ctl_info *mci; |
450 | struct i3200_priv *priv; |
451 | |
452 | edac_dbg(0, "\n" ); |
453 | |
454 | mci = edac_mc_del_mc(dev: &pdev->dev); |
455 | if (!mci) |
456 | return; |
457 | |
458 | priv = mci->pvt_info; |
459 | iounmap(addr: priv->window); |
460 | |
461 | edac_mc_free(mci); |
462 | |
463 | pci_disable_device(dev: pdev); |
464 | } |
465 | |
466 | static const struct pci_device_id i3200_pci_tbl[] = { |
467 | { |
468 | PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, |
469 | I3200}, |
470 | { |
471 | 0, |
472 | } /* 0 terminated list. */ |
473 | }; |
474 | |
475 | MODULE_DEVICE_TABLE(pci, i3200_pci_tbl); |
476 | |
477 | static struct pci_driver i3200_driver = { |
478 | .name = EDAC_MOD_STR, |
479 | .probe = i3200_init_one, |
480 | .remove = i3200_remove_one, |
481 | .id_table = i3200_pci_tbl, |
482 | }; |
483 | |
484 | static int __init i3200_init(void) |
485 | { |
486 | int pci_rc; |
487 | |
488 | edac_dbg(3, "MC:\n" ); |
489 | |
490 | /* Ensure that the OPSTATE is set correctly for POLL or NMI */ |
491 | opstate_init(); |
492 | |
493 | pci_rc = pci_register_driver(&i3200_driver); |
494 | if (pci_rc < 0) |
495 | goto fail0; |
496 | |
497 | if (!mci_pdev) { |
498 | i3200_registered = 0; |
499 | mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, |
500 | PCI_DEVICE_ID_INTEL_3200_HB, NULL); |
501 | if (!mci_pdev) { |
502 | edac_dbg(0, "i3200 pci_get_device fail\n" ); |
503 | pci_rc = -ENODEV; |
504 | goto fail1; |
505 | } |
506 | |
507 | pci_rc = i3200_init_one(pdev: mci_pdev, ent: i3200_pci_tbl); |
508 | if (pci_rc < 0) { |
509 | edac_dbg(0, "i3200 init fail\n" ); |
510 | pci_rc = -ENODEV; |
511 | goto fail1; |
512 | } |
513 | } |
514 | |
515 | return 0; |
516 | |
517 | fail1: |
518 | pci_unregister_driver(dev: &i3200_driver); |
519 | |
520 | fail0: |
521 | pci_dev_put(dev: mci_pdev); |
522 | |
523 | return pci_rc; |
524 | } |
525 | |
526 | static void __exit i3200_exit(void) |
527 | { |
528 | edac_dbg(3, "MC:\n" ); |
529 | |
530 | pci_unregister_driver(dev: &i3200_driver); |
531 | if (!i3200_registered) { |
532 | i3200_remove_one(pdev: mci_pdev); |
533 | pci_dev_put(dev: mci_pdev); |
534 | } |
535 | } |
536 | |
537 | module_init(i3200_init); |
538 | module_exit(i3200_exit); |
539 | |
540 | MODULE_LICENSE("GPL" ); |
541 | MODULE_AUTHOR("Akamai Technologies, Inc." ); |
542 | MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers" ); |
543 | |
544 | module_param(edac_op_state, int, 0444); |
545 | MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI" ); |
546 | |