edac_mc.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. /*
  2. * Defines, structures, APIs for edac_mc module
  3. *
  4. * (C) 2007 Linux Networx (http://lnxi.com)
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Thayne Harbaugh
  9. * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10. * http://www.anime.net/~goemon/linux-ecc/
  11. *
  12. * NMI handling support added by
  13. * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
  14. *
  15. * Refactored for multi-source files:
  16. * Doug Thompson <norsk5@xmission.com>
  17. *
  18. * Please look at Documentation/driver-api/edac.rst for more info about
  19. * EDAC core structs and functions.
  20. */
  21. #ifndef _EDAC_MC_H_
  22. #define _EDAC_MC_H_
  23. #include <linux/kernel.h>
  24. #include <linux/types.h>
  25. #include <linux/module.h>
  26. #include <linux/spinlock.h>
  27. #include <linux/smp.h>
  28. #include <linux/pci.h>
  29. #include <linux/time.h>
  30. #include <linux/nmi.h>
  31. #include <linux/rcupdate.h>
  32. #include <linux/completion.h>
  33. #include <linux/kobject.h>
  34. #include <linux/platform_device.h>
  35. #include <linux/workqueue.h>
  36. #include <linux/edac.h>
  37. #if PAGE_SHIFT < 20
  38. #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
  39. #define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
  40. #else /* PAGE_SHIFT > 20 */
  41. #define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
  42. #define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
  43. #endif
  44. #define edac_printk(level, prefix, fmt, arg...) \
  45. printk(level "EDAC " prefix ": " fmt, ##arg)
  46. #define edac_mc_printk(mci, level, fmt, arg...) \
  47. printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
  48. #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
  49. printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
  50. #define edac_device_printk(ctl, level, fmt, arg...) \
  51. printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
  52. #define edac_pci_printk(ctl, level, fmt, arg...) \
  53. printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
  54. /* prefixes for edac_printk() and edac_mc_printk() */
  55. #define EDAC_MC "MC"
  56. #define EDAC_PCI "PCI"
  57. #define EDAC_DEBUG "DEBUG"
  58. extern const char * const edac_mem_types[];
  59. #ifdef CONFIG_EDAC_DEBUG
  60. extern int edac_debug_level;
  61. #define edac_dbg(level, fmt, ...) \
  62. do { \
  63. if (level <= edac_debug_level) \
  64. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  65. "%s: " fmt, __func__, ##__VA_ARGS__); \
  66. } while (0)
  67. #else /* !CONFIG_EDAC_DEBUG */
  68. #define edac_dbg(level, fmt, ...) \
  69. do { \
  70. if (0) \
  71. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  72. "%s: " fmt, __func__, ##__VA_ARGS__); \
  73. } while (0)
  74. #endif /* !CONFIG_EDAC_DEBUG */
  75. #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
  76. PCI_DEVICE_ID_ ## vend ## _ ## dev
  77. #define edac_dev_name(dev) (dev)->dev_name
  78. #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
  79. /**
  80. * edac_mc_alloc() - Allocate and partially fill a struct &mem_ctl_info.
  81. *
  82. * @mc_num: Memory controller number
  83. * @n_layers: Number of MC hierarchy layers
  84. * @layers: Describes each layer as seen by the Memory Controller
  85. * @sz_pvt: size of private storage needed
  86. *
  87. *
  88. * Everything is kmalloc'ed as one big chunk - more efficient.
  89. * Only can be used if all structures have the same lifetime - otherwise
  90. * you have to allocate and initialize your own structures.
  91. *
  92. * Use edac_mc_free() to free mc structures allocated by this function.
  93. *
  94. * .. note::
  95. *
  96. * drivers handle multi-rank memories in different ways: in some
  97. * drivers, one multi-rank memory stick is mapped as one entry, while, in
  98. * others, a single multi-rank memory stick would be mapped into several
  99. * entries. Currently, this function will allocate multiple struct dimm_info
  100. * on such scenarios, as grouping the multiple ranks require drivers change.
  101. *
  102. * Returns:
  103. * On success, return a pointer to struct mem_ctl_info pointer;
  104. * %NULL otherwise
  105. */
  106. struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
  107. unsigned n_layers,
  108. struct edac_mc_layer *layers,
  109. unsigned sz_pvt);
  110. /**
  111. * edac_mc_add_mc_with_groups() - Insert the @mci structure into the mci
  112. * global list and create sysfs entries associated with @mci structure.
  113. *
  114. * @mci: pointer to the mci structure to be added to the list
  115. * @groups: optional attribute groups for the driver-specific sysfs entries
  116. *
  117. * Returns:
  118. * 0 on Success, or an error code on failure
  119. */
  120. extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
  121. const struct attribute_group **groups);
  122. #define edac_mc_add_mc(mci) edac_mc_add_mc_with_groups(mci, NULL)
  123. /**
  124. * edac_mc_free() - Frees a previously allocated @mci structure
  125. *
  126. * @mci: pointer to a struct mem_ctl_info structure
  127. */
  128. extern void edac_mc_free(struct mem_ctl_info *mci);
  129. /**
  130. * edac_has_mcs() - Check if any MCs have been allocated.
  131. *
  132. * Returns:
  133. * True if MC instances have been registered successfully.
  134. * False otherwise.
  135. */
  136. extern bool edac_has_mcs(void);
  137. /**
  138. * edac_mc_find() - Search for a mem_ctl_info structure whose index is @idx.
  139. *
  140. * @idx: index to be seek
  141. *
  142. * If found, return a pointer to the structure.
  143. * Else return NULL.
  144. */
  145. extern struct mem_ctl_info *edac_mc_find(int idx);
  146. /**
  147. * find_mci_by_dev() - Scan list of controllers looking for the one that
  148. * manages the @dev device.
  149. *
  150. * @dev: pointer to a struct device related with the MCI
  151. *
  152. * Returns: on success, returns a pointer to struct &mem_ctl_info;
  153. * %NULL otherwise.
  154. */
  155. extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
  156. /**
  157. * edac_mc_del_mc() - Remove sysfs entries for mci structure associated with
  158. * @dev and remove mci structure from global list.
  159. *
  160. * @dev: Pointer to struct &device representing mci structure to remove.
  161. *
  162. * Returns: pointer to removed mci structure, or %NULL if device not found.
  163. */
  164. extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
  165. /**
  166. * edac_mc_find_csrow_by_page() - Ancillary routine to identify what csrow
  167. * contains a memory page.
  168. *
  169. * @mci: pointer to a struct mem_ctl_info structure
  170. * @page: memory page to find
  171. *
  172. * Returns: on success, returns the csrow. -1 if not found.
  173. */
  174. extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  175. unsigned long page);
  176. /**
  177. * edac_raw_mc_handle_error() - Reports a memory event to userspace without
  178. * doing anything to discover the error location.
  179. *
  180. * @type: severity of the error (CE/UE/Fatal)
  181. * @mci: a struct mem_ctl_info pointer
  182. * @e: error description
  183. *
  184. * This raw function is used internally by edac_mc_handle_error(). It should
  185. * only be called directly when the hardware error come directly from BIOS,
  186. * like in the case of APEI GHES driver.
  187. */
  188. void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
  189. struct mem_ctl_info *mci,
  190. struct edac_raw_error_desc *e);
  191. /**
  192. * edac_mc_handle_error() - Reports a memory event to userspace.
  193. *
  194. * @type: severity of the error (CE/UE/Fatal)
  195. * @mci: a struct mem_ctl_info pointer
  196. * @error_count: Number of errors of the same type
  197. * @page_frame_number: mem page where the error occurred
  198. * @offset_in_page: offset of the error inside the page
  199. * @syndrome: ECC syndrome
  200. * @top_layer: Memory layer[0] position
  201. * @mid_layer: Memory layer[1] position
  202. * @low_layer: Memory layer[2] position
  203. * @msg: Message meaningful to the end users that
  204. * explains the event
  205. * @other_detail: Technical details about the event that
  206. * may help hardware manufacturers and
  207. * EDAC developers to analyse the event
  208. */
  209. void edac_mc_handle_error(const enum hw_event_mc_err_type type,
  210. struct mem_ctl_info *mci,
  211. const u16 error_count,
  212. const unsigned long page_frame_number,
  213. const unsigned long offset_in_page,
  214. const unsigned long syndrome,
  215. const int top_layer,
  216. const int mid_layer,
  217. const int low_layer,
  218. const char *msg,
  219. const char *other_detail);
  220. /*
  221. * edac misc APIs
  222. */
  223. extern char *edac_op_state_to_string(int op_state);
  224. #endif /* _EDAC_MC_H_ */