edac_core.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /*
  2. * Defines, structures, APIs for edac_core module
  3. *
  4. * (C) 2007 Linux Networx (http://lnxi.com)
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Thayne Harbaugh
  9. * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10. * http://www.anime.net/~goemon/linux-ecc/
  11. *
  12. * NMI handling support added by
  13. * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
  14. *
  15. * Refactored for multi-source files:
  16. * Doug Thompson <norsk5@xmission.com>
  17. *
  18. */
  19. #ifndef _EDAC_CORE_H_
  20. #define _EDAC_CORE_H_
  21. #include <linux/kernel.h>
  22. #include <linux/types.h>
  23. #include <linux/module.h>
  24. #include <linux/spinlock.h>
  25. #include <linux/smp.h>
  26. #include <linux/pci.h>
  27. #include <linux/time.h>
  28. #include <linux/nmi.h>
  29. #include <linux/rcupdate.h>
  30. #include <linux/completion.h>
  31. #include <linux/kobject.h>
  32. #include <linux/platform_device.h>
  33. #include <linux/workqueue.h>
  34. #include <linux/edac.h>
  35. #define EDAC_DEVICE_NAME_LEN 31
  36. #define EDAC_ATTRIB_VALUE_LEN 15
  37. #if PAGE_SHIFT < 20
  38. #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
  39. #define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
  40. #else /* PAGE_SHIFT > 20 */
  41. #define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
  42. #define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
  43. #endif
  44. #define edac_printk(level, prefix, fmt, arg...) \
  45. printk(level "EDAC " prefix ": " fmt, ##arg)
  46. #define edac_mc_printk(mci, level, fmt, arg...) \
  47. printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
  48. #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
  49. printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
  50. #define edac_device_printk(ctl, level, fmt, arg...) \
  51. printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
  52. #define edac_pci_printk(ctl, level, fmt, arg...) \
  53. printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
  54. /* prefixes for edac_printk() and edac_mc_printk() */
  55. #define EDAC_MC "MC"
  56. #define EDAC_PCI "PCI"
  57. #define EDAC_DEBUG "DEBUG"
  58. extern const char *edac_mem_types[];
  59. #ifdef CONFIG_EDAC_DEBUG
  60. extern int edac_debug_level;
  61. #define edac_debug_printk(level, fmt, arg...) \
  62. do { \
  63. if (level <= edac_debug_level) \
  64. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  65. "%s: " fmt, __func__, ##arg); \
  66. } while (0)
  67. #define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ )
  68. #define debugf1( ... ) edac_debug_printk(1, __VA_ARGS__ )
  69. #define debugf2( ... ) edac_debug_printk(2, __VA_ARGS__ )
  70. #define debugf3( ... ) edac_debug_printk(3, __VA_ARGS__ )
  71. #define debugf4( ... ) edac_debug_printk(4, __VA_ARGS__ )
  72. #else /* !CONFIG_EDAC_DEBUG */
  73. #define debugf0( ... )
  74. #define debugf1( ... )
  75. #define debugf2( ... )
  76. #define debugf3( ... )
  77. #define debugf4( ... )
  78. #endif /* !CONFIG_EDAC_DEBUG */
  79. #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
  80. PCI_DEVICE_ID_ ## vend ## _ ## dev
  81. #define edac_dev_name(dev) (dev)->dev_name
  82. /*
  83. * The following are the structures to provide for a generic
  84. * or abstract 'edac_device'. This set of structures and the
  85. * code that implements the APIs for the same, provide for
  86. * registering EDAC type devices which are NOT standard memory.
  87. *
  88. * CPU caches (L1 and L2)
  89. * DMA engines
  90. * Core CPU swithces
  91. * Fabric switch units
  92. * PCIe interface controllers
  93. * other EDAC/ECC type devices that can be monitored for
  94. * errors, etc.
  95. *
  96. * It allows for a 2 level set of hiearchry. For example:
  97. *
  98. * cache could be composed of L1, L2 and L3 levels of cache.
  99. * Each CPU core would have its own L1 cache, while sharing
  100. * L2 and maybe L3 caches.
  101. *
  102. * View them arranged, via the sysfs presentation:
  103. * /sys/devices/system/edac/..
  104. *
  105. * mc/ <existing memory device directory>
  106. * cpu/cpu0/.. <L1 and L2 block directory>
  107. * /L1-cache/ce_count
  108. * /ue_count
  109. * /L2-cache/ce_count
  110. * /ue_count
  111. * cpu/cpu1/.. <L1 and L2 block directory>
  112. * /L1-cache/ce_count
  113. * /ue_count
  114. * /L2-cache/ce_count
  115. * /ue_count
  116. * ...
  117. *
  118. * the L1 and L2 directories would be "edac_device_block's"
  119. */
  120. struct edac_device_counter {
  121. u32 ue_count;
  122. u32 ce_count;
  123. };
  124. /* forward reference */
  125. struct edac_device_ctl_info;
  126. struct edac_device_block;
  127. /* edac_dev_sysfs_attribute structure
  128. * used for driver sysfs attributes in mem_ctl_info
  129. * for extra controls and attributes:
  130. * like high level error Injection controls
  131. */
  132. struct edac_dev_sysfs_attribute {
  133. struct attribute attr;
  134. ssize_t (*show)(struct edac_device_ctl_info *, char *);
  135. ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
  136. };
  137. /* edac_dev_sysfs_block_attribute structure
  138. *
  139. * used in leaf 'block' nodes for adding controls/attributes
  140. *
  141. * each block in each instance of the containing control structure
  142. * can have an array of the following. The show and store functions
  143. * will be filled in with the show/store function in the
  144. * low level driver.
  145. *
  146. * The 'value' field will be the actual value field used for
  147. * counting
  148. */
  149. struct edac_dev_sysfs_block_attribute {
  150. struct attribute attr;
  151. ssize_t (*show)(struct kobject *, struct attribute *, char *);
  152. ssize_t (*store)(struct kobject *, struct attribute *,
  153. const char *, size_t);
  154. struct edac_device_block *block;
  155. unsigned int value;
  156. };
  157. /* device block control structure */
  158. struct edac_device_block {
  159. struct edac_device_instance *instance; /* Up Pointer */
  160. char name[EDAC_DEVICE_NAME_LEN + 1];
  161. struct edac_device_counter counters; /* basic UE and CE counters */
  162. int nr_attribs; /* how many attributes */
  163. /* this block's attributes, could be NULL */
  164. struct edac_dev_sysfs_block_attribute *block_attributes;
  165. /* edac sysfs device control */
  166. struct kobject kobj;
  167. };
  168. /* device instance control structure */
  169. struct edac_device_instance {
  170. struct edac_device_ctl_info *ctl; /* Up pointer */
  171. char name[EDAC_DEVICE_NAME_LEN + 4];
  172. struct edac_device_counter counters; /* instance counters */
  173. u32 nr_blocks; /* how many blocks */
  174. struct edac_device_block *blocks; /* block array */
  175. /* edac sysfs device control */
  176. struct kobject kobj;
  177. };
  178. /*
  179. * Abstract edac_device control info structure
  180. *
  181. */
  182. struct edac_device_ctl_info {
  183. /* for global list of edac_device_ctl_info structs */
  184. struct list_head link;
  185. struct module *owner; /* Module owner of this control struct */
  186. int dev_idx;
  187. /* Per instance controls for this edac_device */
  188. int log_ue; /* boolean for logging UEs */
  189. int log_ce; /* boolean for logging CEs */
  190. int panic_on_ue; /* boolean for panic'ing on an UE */
  191. unsigned poll_msec; /* number of milliseconds to poll interval */
  192. unsigned long delay; /* number of jiffies for poll_msec */
  193. /* Additional top controller level attributes, but specified
  194. * by the low level driver.
  195. *
  196. * Set by the low level driver to provide attributes at the
  197. * controller level, same level as 'ue_count' and 'ce_count' above.
  198. * An array of structures, NULL terminated
  199. *
  200. * If attributes are desired, then set to array of attributes
  201. * If no attributes are desired, leave NULL
  202. */
  203. struct edac_dev_sysfs_attribute *sysfs_attributes;
  204. /* pointer to main 'edac' subsys in sysfs */
  205. struct bus_type *edac_subsys;
  206. /* the internal state of this controller instance */
  207. int op_state;
  208. /* work struct for this instance */
  209. struct delayed_work work;
  210. /* pointer to edac polling checking routine:
  211. * If NOT NULL: points to polling check routine
  212. * If NULL: Then assumes INTERRUPT operation, where
  213. * MC driver will receive events
  214. */
  215. void (*edac_check) (struct edac_device_ctl_info * edac_dev);
  216. struct device *dev; /* pointer to device structure */
  217. const char *mod_name; /* module name */
  218. const char *ctl_name; /* edac controller name */
  219. const char *dev_name; /* pci/platform/etc... name */
  220. void *pvt_info; /* pointer to 'private driver' info */
  221. unsigned long start_time; /* edac_device load start time (jiffies) */
  222. struct completion removal_complete;
  223. /* sysfs top name under 'edac' directory
  224. * and instance name:
  225. * cpu/cpu0/...
  226. * cpu/cpu1/...
  227. * cpu/cpu2/...
  228. * ...
  229. */
  230. char name[EDAC_DEVICE_NAME_LEN + 1];
  231. /* Number of instances supported on this control structure
  232. * and the array of those instances
  233. */
  234. u32 nr_instances;
  235. struct edac_device_instance *instances;
  236. /* Event counters for the this whole EDAC Device */
  237. struct edac_device_counter counters;
  238. /* edac sysfs device control for the 'name'
  239. * device this structure controls
  240. */
  241. struct kobject kobj;
  242. };
  243. /* To get from the instance's wq to the beginning of the ctl structure */
  244. #define to_edac_mem_ctl_work(w) \
  245. container_of(w, struct mem_ctl_info, work)
  246. #define to_edac_device_ctl_work(w) \
  247. container_of(w,struct edac_device_ctl_info,work)
  248. /*
  249. * The alloc() and free() functions for the 'edac_device' control info
  250. * structure. A MC driver will allocate one of these for each edac_device
  251. * it is going to control/register with the EDAC CORE.
  252. */
  253. extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
  254. unsigned sizeof_private,
  255. char *edac_device_name, unsigned nr_instances,
  256. char *edac_block_name, unsigned nr_blocks,
  257. unsigned offset_value,
  258. struct edac_dev_sysfs_block_attribute *block_attributes,
  259. unsigned nr_attribs,
  260. int device_index);
  261. /* The offset value can be:
  262. * -1 indicating no offset value
  263. * 0 for zero-based block numbers
  264. * 1 for 1-based block number
  265. * other for other-based block number
  266. */
  267. #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
  268. extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
  269. #ifdef CONFIG_PCI
  270. struct edac_pci_counter {
  271. atomic_t pe_count;
  272. atomic_t npe_count;
  273. };
  274. /*
  275. * Abstract edac_pci control info structure
  276. *
  277. */
  278. struct edac_pci_ctl_info {
  279. /* for global list of edac_pci_ctl_info structs */
  280. struct list_head link;
  281. int pci_idx;
  282. struct bus_type *edac_subsys; /* pointer to subsystem */
  283. /* the internal state of this controller instance */
  284. int op_state;
  285. /* work struct for this instance */
  286. struct delayed_work work;
  287. /* pointer to edac polling checking routine:
  288. * If NOT NULL: points to polling check routine
  289. * If NULL: Then assumes INTERRUPT operation, where
  290. * MC driver will receive events
  291. */
  292. void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
  293. struct device *dev; /* pointer to device structure */
  294. const char *mod_name; /* module name */
  295. const char *ctl_name; /* edac controller name */
  296. const char *dev_name; /* pci/platform/etc... name */
  297. void *pvt_info; /* pointer to 'private driver' info */
  298. unsigned long start_time; /* edac_pci load start time (jiffies) */
  299. struct completion complete;
  300. /* sysfs top name under 'edac' directory
  301. * and instance name:
  302. * cpu/cpu0/...
  303. * cpu/cpu1/...
  304. * cpu/cpu2/...
  305. * ...
  306. */
  307. char name[EDAC_DEVICE_NAME_LEN + 1];
  308. /* Event counters for the this whole EDAC Device */
  309. struct edac_pci_counter counters;
  310. /* edac sysfs device control for the 'name'
  311. * device this structure controls
  312. */
  313. struct kobject kobj;
  314. struct completion kobj_complete;
  315. };
  316. #define to_edac_pci_ctl_work(w) \
  317. container_of(w, struct edac_pci_ctl_info,work)
  318. /* write all or some bits in a byte-register*/
  319. static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
  320. u8 mask)
  321. {
  322. if (mask != 0xff) {
  323. u8 buf;
  324. pci_read_config_byte(pdev, offset, &buf);
  325. value &= mask;
  326. buf &= ~mask;
  327. value |= buf;
  328. }
  329. pci_write_config_byte(pdev, offset, value);
  330. }
  331. /* write all or some bits in a word-register*/
  332. static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
  333. u16 value, u16 mask)
  334. {
  335. if (mask != 0xffff) {
  336. u16 buf;
  337. pci_read_config_word(pdev, offset, &buf);
  338. value &= mask;
  339. buf &= ~mask;
  340. value |= buf;
  341. }
  342. pci_write_config_word(pdev, offset, value);
  343. }
  344. /*
  345. * pci_write_bits32
  346. *
  347. * edac local routine to do pci_write_config_dword, but adds
  348. * a mask parameter. If mask is all ones, ignore the mask.
  349. * Otherwise utilize the mask to isolate specified bits
  350. *
  351. * write all or some bits in a dword-register
  352. */
  353. static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
  354. u32 value, u32 mask)
  355. {
  356. if (mask != 0xffffffff) {
  357. u32 buf;
  358. pci_read_config_dword(pdev, offset, &buf);
  359. value &= mask;
  360. buf &= ~mask;
  361. value |= buf;
  362. }
  363. pci_write_config_dword(pdev, offset, value);
  364. }
  365. #endif /* CONFIG_PCI */
  366. extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
  367. unsigned nr_chans, int edac_index);
  368. extern int edac_mc_add_mc(struct mem_ctl_info *mci);
  369. extern void edac_mc_free(struct mem_ctl_info *mci);
  370. extern struct mem_ctl_info *edac_mc_find(int idx);
  371. extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
  372. extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
  373. extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  374. unsigned long page);
  375. /*
  376. * The no info errors are used when error overflows are reported.
  377. * There are a limited number of error logging registers that can
  378. * be exausted. When all registers are exhausted and an additional
  379. * error occurs then an error overflow register records that an
  380. * error occurred and the type of error, but doesn't have any
  381. * further information. The ce/ue versions make for cleaner
  382. * reporting logic and function interface - reduces conditional
  383. * statement clutter and extra function arguments.
  384. */
  385. extern void edac_mc_handle_ce(struct mem_ctl_info *mci,
  386. unsigned long page_frame_number,
  387. unsigned long offset_in_page,
  388. unsigned long syndrome, int row, int channel,
  389. const char *msg);
  390. extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
  391. const char *msg);
  392. extern void edac_mc_handle_ue(struct mem_ctl_info *mci,
  393. unsigned long page_frame_number,
  394. unsigned long offset_in_page, int row,
  395. const char *msg);
  396. extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
  397. const char *msg);
  398. extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow,
  399. unsigned int channel0, unsigned int channel1,
  400. char *msg);
  401. extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow,
  402. unsigned int channel, char *msg);
  403. /*
  404. * edac_device APIs
  405. */
  406. extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
  407. extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
  408. extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
  409. int inst_nr, int block_nr, const char *msg);
  410. extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
  411. int inst_nr, int block_nr, const char *msg);
  412. extern int edac_device_alloc_index(void);
  413. /*
  414. * edac_pci APIs
  415. */
  416. extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
  417. const char *edac_pci_name);
  418. extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
  419. extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
  420. unsigned long value);
  421. extern int edac_pci_alloc_index(void);
  422. extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
  423. extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
  424. extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
  425. struct device *dev,
  426. const char *mod_name);
  427. extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
  428. extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
  429. extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
  430. /*
  431. * edac misc APIs
  432. */
  433. extern char *edac_op_state_to_string(int op_state);
  434. #endif /* _EDAC_CORE_H_ */