1
2EDAC(3) EDAC error reporting library EDAC(3)
3
4
5
7 libedac - EDAC error reporting library
8
9
11 #include <edac.h>
12
13 cc ... -ledac
14
15 edac_handle * edac_handle_create (void);
16
17 void edac_handle_destroy (edac_handle *edac);
18
19 int edac_handle_init (edac_handle *edac);
20
21 unsigned int edac_mc_count (edac_handle *edac);
22
23 int edac_handle_reset (edac_handle *edac);
24
25 int edac_error_totals (edac_handle *edac, struct edac_totals *totals);
26
27 edac_mc * edac_next_mc (edac_handle *edac);
28
29 int edac_mc_get_info (edac_mc *mc, struct edac_mc_info *info);
30
31 edac_mc *edac_next_mc_info (edac_handle *edac,
32 struct edac_mc_info *info);
33
34 int edac_mc_reset (struct edac_mc *mc);
35
36 edac_csrow * edac_next_csrow (struct edac_mc *mc);
37
38 int edac_csrow_get_info (edac_csrow *csrow,
39 struct edac_csrow_info *info);
40
41 edac_csrow * edac_next_csrow_info (edac_mc *mc,
42 struct edac_csrow_info *info);
43
44 const char * edac_strerror (edac_handle *edac);
45
46 edac_for_each_mc_info (edac_handle *edac, edac_mc *mc,
47 struct edac_csrow_info *info) { ... }
48
49 edac_for_each_csrow_info (edac_mc *mc, edac_csrow *csrow,
50 struct edac_csrow_info *info) { ... }
51
52
54 The libedac library offers a very simple programming interface to the
55 information exported from in-kernel EDAC (Error Detection and Correc‐
56 tion) drivers in sysfs. The edac-util(8) utility uses libedac to report
57 errors in a user-friendly manner from the command line.
58
59 EDAC errors for most systems are recorded in sysfs on a per memory con‐
60 troller (MC) basis. Memory controllers are further subdivided by csrow
61 and channel. The libedac library provides a method to loop through mul‐
62 tiple MCs, and their corresponding csrows, obtaining information about
63 each component from sysfs along the way. There is also a simple single
64 call to retrieve the total error counts for a given machine.
65
66 In order to use libedac an edac_handle must first be opened via the
67 call edac_handle_create(). Once the handle is created, sysfs data can
68 be loaded into the handle with edac_handle_init(). A final call to
69 edac_handle_destroy() will free all memory and open files associated
70 with the edac handle.
71
72 edac_handle_create() will return NULL on failure to allocate memory.
73
74 The edac_strerror function will return a descriptive string representa‐
75 tion of the last error for the libedac handle edac.
76
77 The edac_error_totals() function will return the total counts of memory
78 and pci errors in the totals structure passed to the function. The
79 totals structure is of type edac_totals which has the form:
80
81 struct edac_totals {
82 unsigned int ce_total; /* Total corrected errors */
83 unsigned int ue_total; /* Total uncorrected errors */
84 unsigned int pci_parity_total; /* Total PCI Parity errors */
85 };
86
88 Systems may have one or more memory controllers (MCs) with EDAC infor‐
89 mation. The number of MCs detected by EDAC drivers may be queried with
90 the edac_mc_count() function, while the edac_next_mc function will
91 return a handle to the next memory controller in the libedac handle´s
92 internal list. This memory controller is represented by the opaque
93 edac_mc type.
94
95 edac_next_mc will return NULL when there are no further memory con‐
96 trollers to return. Thus the following example code is another method
97 to count all EDAC MCs (assuming the EDAC library handle edac has
98 already been initialized):
99
100 int i = 0;
101 edac_mc *mc;
102 while ((mc = edac_next_mc (edac)))
103 i++;
104 return (i);
105
106 To query information about an edac_mc, use the edac_mc_get_info func‐
107 tion. This function fills in the given info structure, which is of type
108 edac_mc_info:
109
110 struct edac_mc_info {
111 char id[]; /* Id of memory controller */
112 char mc_name[]; /* Name of MC */
113 unsigned int size_mb; /* Amount of RAM in MB */
114 unsigned int ce_count; /* Corrected error count */
115 unsigned int ce_noinfo_count;/* noinfo Corrected errors */
116 unsigned int ue_count; /* Uncorrected error count */
117 unsigned int ue_noinfo_count;/* noinfo Uncorrected errors*/
118 };
119
120 The function edac_next_mc_info() can be used to loop through all EDAC
121 memory controllers and obtain MC information in a single call. It is a
122 combined edac_next_mc() and edac_mc_get_info().
123
124 The function edac_handle_reset() will reset the internal memory con‐
125 troller iterator in the libedac handle. A subsequent call to
126 edac_next_mc() would thus return the first EDAC MC.
127
128 A convenience macro, edac_for_each_mc_info(), is provided which defines
129 a for loop that iterates through all memory controller
130 objects for a given EDAC handle, returning the MC information in the
131 info structure on each iteration. For example (assuming initialized
132 libedac handle edac):
133
134 edac_mc *mc;
135 struct edac_mc_info info;
136 int count = 0;
137
138 edac_for_each_mc_info (edac, mc, info) {
139 count++;
140 printf ("MC info: id=%s name=%s\n", info.id, info.mc_name);
141 }
142
144 Each EDAC memory controller may have one or more csrows associated with
145 it. Similar to the MC iterator functions described above, the
146 edac_next_csrow() function allows libedac users to loop through all
147 csrows within a given MC. Once the last csrow is reached, the function
148 will return NULL.
149
150 The edac_csrow_get_info() function returns information about edac_csrow
151 in the edac_csrow_info structure, which has the contents:
152
153 struct edac_csrow_info {
154 char id[]; /* CSROW Identity (e.g. csrow0) */
155 unsigned int size_mb; /* CSROW size in MB */
156 unsigned int ce_count; /* Total corrected errors */
157 unsigned int ue_count; /* Total uncorrected errors */
158 struct edac_channel channel[EDAC_MAX_CHANNELS];
159 };
160
161 struct edac_channel {
162 int valid; /* Is this channel valid */
163 unsigned int ce_count; /* Corrected error count */
164 int dimm_label_valid; /* Is DIMM label valid? */
165 char dimm_label[]; /* DIMM name */
166 };
167
168 The edac_next_csrow_info() function is a combined version of
169 edac_next_csrow() and edac_csrow_get_info() for convenience.
170
171 The edac_mc_reset() function is provided to reset the edac_mc internal
172 csrow iterator.
173
174 A convenience macro, edac_for_each_csrow_info(), is provided which
175 defines a for loop that iterates through all csrow objects in an EDAC
176 memory controller, returning the csrow information in the info struc‐
177 ture on each iteration.
178
179
181 Initialize libedac handle:
182
183 edac_handle *edac;
184
185 if (!(edac = edac_handle_create ())) {
186 fprintf (stderr, "edac_handle_create: Out of memory!\n");
187 exit (1);
188 }
189
190 if (edac_handle_init (edac) < 0) {
191 fprintf (stderr, "Unable to get EDAC data: %s\n",
192 edac_strerror (edac));
193 exit (1);
194 }
195
196 printf ("EDAC initialized with %d MCs\n", edac_mc_count (edac));
197
198 edac_handle_destroy (edac);
199
200 Report all DIMM labels for MC:csrow:channel combinations
201
202 edac_mc *mc;
203 edac_csrow *csrow;
204 struct edac_mc_info mci;
205 struct edac_csrow_info csi;
206
207 edac_for_each_mc_info (ctx->edac, mc, mci) {
208 edac_for_each_csrow_info (mc, csrow, csi) {
209 char *label[2] = { "unset", "unset" };
210
211 if (csi.channel[0].dimm_label_valid)
212 label[0] = csi.channel[0].dimm_label;
213 if (csi.channel[1].dimm_label_valid)
214 label[1] = csi.channel[1].dimm_label;
215
216 printf ("%s:%s:ch0 = %s\n", mci.id, csi.id, label[0]);
217 printf ("%s:%s:ch1 = %s\n", mci.id, csi.id, label[1]);
218 }
219 }
220
222 edac-util(8), edac-ctl(8)
223
224
225
226
227@META_ALIAS 2009-01-30 EDAC(3)