supervise.c (3658B) download
1#include "config.h"
2#include "service.h"
3#include "util.h"
4
5#include <errno.h>
6#include <fcntl.h>
7#include <limits.h>
8#include <setjmp.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <sys/socket.h>
13#include <sys/stat.h>
14#include <sys/un.h>
15#include <sys/wait.h>
16#include <unistd.h>
17
18
19static void signal_child(int unused) {
20 (void) unused;
21
22 int status;
23 pid_t died_pid;
24 struct service* s = NULL;
25
26 if ((died_pid = wait(&status)) == -1) {
27 fprint(1, "error: cannot wait for process: %r\n");
28 return;
29 }
30
31 if (!WIFEXITED(status) && !WIFSIGNALED(status))
32 return;
33
34 for (int i = 0; i < services_size; i++) {
35 if (services[i].pid == died_pid) {
36 s = &services[i];
37 break;
38 }
39 }
40 if (s == NULL)
41 return;
42
43 service_handle_exit(s, WIFSIGNALED(status), WIFSIGNALED(status) ? WTERMSIG(status) : WEXITSTATUS(status));
44}
45
46static void update_services(void) {
47 struct service* s;
48
49 for (int i = 0; i < services_size; i++) {
50 s = &services[i];
51 if (s->state == STATE_INACTIVE || s->state == STATE_ERROR)
52 s->stop_timeout = 0;
53
54 if (s->state == STATE_ERROR)
55 continue;
56
57 if (s->stop_timeout != 0) {
58 if (time(NULL) - s->stop_timeout >= SV_STOP_TIMEOUT) {
59 print(":: service '%s' doesn't terminate, killing...\n", s->name);
60 service_kill(s, SIGKILL);
61 s->stop_timeout = 0;
62 }
63 } else if (s->state == STATE_INACTIVE && service_need_restart(s)) {
64 service_start(s);
65 }
66 }
67}
68
69static void control_sockets(void) {
70 struct service* s;
71 char cmd;
72
73 for (int i = 0; i < services_size; i++) {
74 s = &services[i];
75 while (read(s->control, &cmd, 1) == 1) {
76 print("handling '%c' from %s\n", cmd, s->name);
77 service_handle_command(s, cmd);
78 }
79 }
80}
81
82void stop_dummies(void) {
83 for (int i = 0; i < services_size; i++) {
84 if (services[i].state != STATE_ACTIVE_DUMMY || services[i].restart == S_RESTART)
85 continue;
86
87 for (int j = 0; j < services[i].children_size; j++) {
88 struct service* dep = services[i].children[j];
89 if (dep->state != STATE_INACTIVE && dep->state != STATE_ERROR)
90 goto dont_stop;
91 }
92
93 service_stop(&services[i]);
94
95 dont_stop:;
96 }
97}
98
99int service_supervise(const char* service_dir_, const char* service, bool once) {
100 struct sigaction sigact = { 0 };
101 struct service* s;
102
103 daemon_running = true;
104
105 sigact.sa_handler = signal_child;
106 sigaction(SIGCHLD, &sigact, NULL);
107 sigact.sa_handler = SIG_IGN;
108 sigaction(SIGPIPE, &sigact, NULL);
109
110 service_dir_path = service_dir_;
111 if ((service_dir = open(service_dir_, O_DIRECTORY)) == -1) {
112 print_errno("error: cannot open directory %s: %s\n", service_dir_);
113 return 1;
114 }
115
116 if ((null_fd = open("/dev/null", O_RDWR)) == -1) {
117 fprint(1, "error: cannot open /dev/null: %r\n");
118 null_fd = 1;
119 }
120
121 print(":: starting services\n");
122
123 service_refresh_directory();
124
125 if ((s = service_get(service)) == NULL) {
126 fprint(1, "error: cannot start '%s': not found\n", service);
127 goto cleanup;
128 }
129
130 s->restart = once ? S_ONCE : S_RESTART;
131 service_start(s);
132
133
134 bool cont;
135 // accept connections and handle requests
136 do {
137 if (!daemon_running) {
138 for (int i = 0; i < services_size; i++) {
139 s = &services[i];
140 service_stop(s);
141 }
142 }
143
144 service_refresh_directory();
145 stop_dummies();
146 control_sockets();
147 update_services();
148
149 sleep(SV_CHECK_INTERVAL);
150
151 cont = false;
152 for (int i = 0; i < services_size; i++) {
153 if (services[i].state != STATE_INACTIVE && services[i].state != STATE_ERROR)
154 cont = true;
155 }
156 } while (cont);
157
158 print(":: terminating\n");
159
160 print(":: all services stopped\n");
161
162cleanup:
163
164 close(service_dir);
165 close(null_fd);
166
167 signal(SIGPIPE, SIG_DFL);
168 signal(SIGCHLD, SIG_DFL);
169 return 0;
170}