contrib/package/freifunk-watchdog: only monitor ad-hoc interfaces
[project/luci.git] / contrib / package / freifunk-watchdog / src / watchdog.c
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
17 */
18
19 #include "watchdog.h"
20
21 /* Global watchdog fd, required by signal handler */
22 int wdfd = -1;
23
24 /* Watchdog shutdown helper */
25 static void shutdown_watchdog(int sig)
26 {
27 static int wdelay = 3600;
28 static const char wshutdown = WATCH_SHUTDOWN;
29
30 if( wdfd > -1 )
31 {
32 syslog(LOG_INFO, "Stopping watchdog timer");
33 write(wdfd, &wshutdown, 1);
34
35 /* Older Kamikaze versions are compiled with
36 * CONFIG_WATCHDOG_NOWAYOUT=y, this can be
37 * harmful if we're in the middle of an upgrade.
38 * Increase the watchdog timeout to 3600 seconds
39 * here to avoid unplanned reboots. */
40 ioctl(wdfd, WDIOC_SETTIMEOUT, &wdelay);
41
42 close(wdfd);
43 wdfd = -1;
44 }
45
46 exit(0);
47 }
48
49 /* Get BSSID of given interface */
50 static int iw_get_bssid(int iwfd, const char *ifname, char *bssid)
51 {
52 struct iwreq iwrq;
53
54 if( iw_ioctl(iwfd, ifname, SIOCGIWAP, &iwrq) >= 0 )
55 {
56 unsigned char *addr = (unsigned char *)iwrq.u.ap_addr.sa_data;
57
58 sprintf(bssid, "%02X:%02X:%02X:%02X:%02X:%02X",
59 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
60
61 return 0;
62 }
63
64 return -1;
65 }
66
67 /* Get channel of given interface */
68 static int iw_get_channel(int iwfd, const char *ifname, int *channel)
69 {
70 int i;
71 char buffer[sizeof(struct iw_range)];
72 double cur_freq, cmp_freq;
73 struct iwreq iwrq;
74 struct iw_range *range;
75
76 memset(buffer, 0, sizeof(buffer));
77
78 iwrq.u.data.pointer = (char *)buffer;
79 iwrq.u.data.length = sizeof(buffer);
80 iwrq.u.data.flags = 0;
81
82 if( iw_ioctl(iwfd, ifname, SIOCGIWRANGE, &iwrq) < 0)
83 {
84 *channel = -1;
85 return -1;
86 }
87
88 range = (struct iw_range *)buffer;
89
90 if( iw_ioctl(iwfd, ifname, SIOCGIWFREQ, &iwrq) >= 0 )
91 {
92 cur_freq = ((double)iwrq.u.freq.m) * pow(10, iwrq.u.freq.e);
93 if( cur_freq < 1000.00 )
94 {
95 *channel = (int)cur_freq;
96 return 0;
97 }
98
99 for(i = 0; i < range->num_frequency; i++)
100 {
101 cmp_freq = ((double)range->freq[i].m) * pow(10, range->freq[i].e);
102 if( cmp_freq == cur_freq )
103 {
104 *channel = (int)range->freq[i].i;
105 return 0;
106 }
107 }
108 }
109
110 *channel = -1;
111 return -1;
112 }
113
114 /* Get the (first) pid of given process name */
115 static int find_process(const char *name)
116 {
117 int pid = -1;
118 int file;
119 char buffer[128];
120 char cmpname[128];
121 DIR *dir;
122 struct dirent *entry;
123
124 if( (dir = opendir("/proc")) != NULL )
125 {
126 snprintf(cmpname, sizeof(cmpname), "Name:\t%s\n", name);
127
128 while( (entry = readdir(dir)) != NULL )
129 {
130 if( !strcmp(entry->d_name, "..") || !isdigit(*entry->d_name) )
131 continue;
132
133 sprintf(buffer, "/proc/%s/status", entry->d_name);
134 if( (file = open(buffer, O_RDONLY)) > -1 )
135 {
136 read(file, buffer, sizeof(buffer));
137 close(file);
138
139 if( strstr(buffer, cmpname) == buffer )
140 {
141 pid = atoi(entry->d_name);
142
143 /* Skip myself ... */
144 if( pid == getpid() )
145 pid = -1;
146 else
147 break;
148 }
149 }
150 }
151
152 closedir(dir);
153 return pid;
154 }
155
156 syslog(LOG_CRIT, "Unable to open /proc: %s",
157 strerror(errno));
158
159 return -1;
160 }
161
162 /* Get the 5 minute load average */
163 static double find_loadavg(void)
164 {
165 int fd;
166 char buffer[10];
167 double load = 0.00;
168
169 if( (fd = open("/proc/loadavg", O_RDONLY)) > -1 )
170 {
171 if( read(fd, buffer, sizeof(buffer)) == sizeof(buffer) )
172 load = atof(&buffer[5]);
173
174 close(fd);
175 }
176
177 return load;
178 }
179
180 /* Check if given uci file was updated */
181 static int check_uci_update(const char *config, time_t *mtime)
182 {
183 struct stat s;
184 char path[128];
185
186 snprintf(path, sizeof(path), "/var/state/%s", config);
187 if( stat(path, &s) > -1 )
188 {
189 if( (*mtime == 0) || (s.st_mtime > *mtime) )
190 {
191 *mtime = s.st_mtime;
192 return 1;
193 }
194
195 return 0;
196 }
197
198 return -1;
199 }
200
201 /* Add tuple */
202 static void load_wifi_uci_add_iface(const char *section, struct uci_itr_ctx *itr)
203 {
204 wifi_tuple_t *t;
205 const char *ucitmp;
206 int val = 0;
207
208 ucitmp = ucix_get_option(itr->ctx, "wireless", section, "mode");
209 if( ucitmp && !strncmp(ucitmp, "adhoc", 5) )
210 {
211 if( (t = (wifi_tuple_t *)malloc(sizeof(wifi_tuple_t))) != NULL )
212 {
213 ucitmp = ucix_get_option(itr->ctx, "wireless", section, "ifname");
214 if(ucitmp)
215 {
216 strncpy(t->ifname, ucitmp, sizeof(t->ifname));
217 val++;
218 }
219
220 ucitmp = ucix_get_option(itr->ctx, "wireless", section, "bssid");
221 if(ucitmp)
222 {
223 strncpy(t->bssid, ucitmp, sizeof(t->bssid));
224 val++;
225 }
226
227 ucitmp = ucix_get_option(itr->ctx, "wireless", section, "device");
228 if(ucitmp)
229 {
230 ucitmp = ucix_get_option(itr->ctx, "wireless", ucitmp, "channel");
231 if(ucitmp)
232 {
233 t->channel = atoi(ucitmp);
234 val++;
235 }
236 }
237
238 if( val == 3 )
239 {
240 syslog(LOG_INFO, "Monitoring %s: bssid=%s channel=%d",
241 t->ifname, t->bssid, t->channel);
242
243 t->next = itr->list;
244 itr->list = t;
245 }
246 else
247 {
248 free(t);
249 }
250 }
251 }
252 }
253
254 /* Load config */
255 static wifi_tuple_t * load_wifi_uci(wifi_tuple_t *ifs, time_t *modtime)
256 {
257 struct uci_context *ctx;
258 struct uci_itr_ctx itr;
259 wifi_tuple_t *cur, *next;
260
261 if( check_uci_update("wireless", modtime) )
262 {
263 syslog(LOG_INFO, "Config changed, reloading");
264
265 if( (ctx = ucix_init("wireless")) != NULL )
266 {
267 if( ifs != NULL )
268 {
269 for(cur = ifs; cur; cur = next)
270 {
271 next = cur->next;
272 free(cur);
273 }
274 }
275
276 itr.list = NULL;
277 itr.ctx = ctx;
278
279 ucix_for_each_section_type(ctx, "wireless", "wifi-iface",
280 (void *)load_wifi_uci_add_iface, &itr);
281
282 return itr.list;
283 }
284 }
285
286 return ifs;
287 }
288
289 /* Daemon implementation */
290 static int do_daemon(void)
291 {
292 static int wdtrigger = 1;
293 static int wdtimeout = INTERVAL * 2;
294 static const char wdkeepalive = WATCH_KEEPALIVE;
295
296 int iwfd;
297 int channel;
298 char bssid[18];
299 struct sigaction sa;
300
301 wifi_tuple_t *ifs = NULL, *curif;
302 time_t modtime = 0;
303
304 int restart_wifi = 0;
305 int restart_cron = 0;
306 int restart_sshd = 0;
307 int loadavg_panic = 0;
308
309 openlog(SYSLOG_IDENT, 0, LOG_DAEMON);
310 //daemon(1, 1);
311
312 if( (iwfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1 )
313 {
314 syslog(LOG_ERR, "Can not open wireless control socket: %s",
315 strerror(errno));
316
317 return 1;
318 }
319
320 if( (wdfd = open(WATCH_DEVICE, O_WRONLY)) > -1 )
321 {
322 syslog(LOG_INFO, "Opened %s - polling every %i seconds",
323 WATCH_DEVICE, INTERVAL);
324
325 /* Install signal handler to halt watchdog on shutdown */
326 sa.sa_handler = shutdown_watchdog;
327 sa.sa_flags = SA_NOCLDWAIT | SA_RESTART;
328 sigaction(SIGHUP, &sa, NULL);
329 sigaction(SIGINT, &sa, NULL);
330 sigaction(SIGPIPE, &sa, NULL);
331 sigaction(SIGTERM, &sa, NULL);
332 sigaction(SIGUSR1, &sa, NULL);
333 sigaction(SIGUSR2, &sa, NULL);
334
335 /* Set watchdog timeout to twice the interval */
336 ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtimeout);
337 }
338
339 while( 1 )
340 {
341 /* Check average load */
342 if( find_loadavg() >= LOAD_TRESHOLD )
343 loadavg_panic++;
344 else
345 loadavg_panic = 0;
346
347 /* Check crond */
348 if( find_process("crond") < 0 )
349 restart_cron++;
350
351 /* Check SSHd */
352 if( find_process("dropbear") < 0 )
353 restart_sshd++;
354
355 /* Check wireless interfaces */
356 ifs = load_wifi_uci(ifs, &modtime);
357 for( curif = ifs; curif; curif = curif->next )
358 {
359 /* Get current channel and bssid */
360 if( (iw_get_bssid(iwfd, curif->ifname, bssid) == 0) &&
361 (iw_get_channel(iwfd, curif->ifname, &channel) == 0) )
362 {
363 /* Check BSSID */
364 if( strcasecmp(bssid, curif->bssid) != 0 )
365 {
366 syslog(LOG_WARNING, "BSSID mismatch on %s: current=%s wanted=%s",
367 curif->ifname, bssid, curif->bssid);
368
369 restart_wifi++;
370 }
371
372 /* Check channel */
373 else if( channel != curif->channel )
374 {
375 syslog(LOG_WARNING, "Channel mismatch on %s: current=%d wanted=%d",
376 curif->ifname, channel, curif->channel);
377
378 restart_wifi++;
379 }
380 }
381 else
382 {
383 syslog(LOG_WARNING, "Requested interface %s not present", curif->ifname);
384 }
385 }
386
387
388 /* Wifi restart required? */
389 if( restart_wifi >= HYSTERESIS )
390 {
391 restart_wifi = 0;
392 syslog(LOG_WARNING, "Channel or BSSID mismatch on wireless interface, restarting");
393 EXEC(WIFI_ACTION);
394 }
395
396 /* Cron restart required? */
397 if( restart_cron >= HYSTERESIS )
398 {
399 restart_cron = 0;
400 syslog(LOG_WARNING, "The cron process died, restarting");
401 EXEC(CRON_ACTION);
402 }
403
404 /* SSHd restart required? */
405 if( restart_sshd >= HYSTERESIS )
406 {
407 restart_sshd = 0;
408 syslog(LOG_WARNING, "The ssh process died, restarting");
409 EXEC(SSHD_ACTION);
410 }
411
412 /* Is there a load problem? */
413 if( loadavg_panic >= HYSTERESIS )
414 {
415 syslog(LOG_EMERG, "Critical system load level, triggering reset!");
416
417 /* Try watchdog, fall back to reboot */
418 if( wdfd > -1 )
419 ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtrigger);
420 else
421 EXEC(LOAD_ACTION);
422 }
423
424 /* Reset watchdog timer */
425 if( wdfd > -1 )
426 write(wdfd, &wdkeepalive, 1);
427
428 sleep(INTERVAL);
429 }
430
431 shutdown_watchdog(0);
432 closelog();
433
434 return 0;
435 }
436
437
438 int main(int argc, char *argv[])
439 {
440 /* Check if watchdog is running ... */
441 if( (argc > 1) && (strcmp(argv[1], "running") == 0) )
442 {
443 return (find_process(BINARY) == -1);
444 }
445
446 /* Start daemon */
447 return do_daemon();
448 }