unix/fiss

src/fsvs/supervise.c in master
Repositories | Summary | Log | Files | LICENSE

supervise.c (3658B) download


  1#include "config.h"
  2#include "service.h"
  3#include "util.h"
  4
  5#include <errno.h>
  6#include <fcntl.h>
  7#include <limits.h>
  8#include <setjmp.h>
  9#include <stdio.h>
 10#include <stdlib.h>
 11#include <string.h>
 12#include <sys/socket.h>
 13#include <sys/stat.h>
 14#include <sys/un.h>
 15#include <sys/wait.h>
 16#include <unistd.h>
 17
 18
 19static void signal_child(int unused) {
 20	(void) unused;
 21
 22	int             status;
 23	pid_t           died_pid;
 24	struct service* s = NULL;
 25
 26	if ((died_pid = wait(&status)) == -1) {
 27		fprint(1, "error: cannot wait for process: %r\n");
 28		return;
 29	}
 30
 31	if (!WIFEXITED(status) && !WIFSIGNALED(status))
 32		return;
 33
 34	for (int i = 0; i < services_size; i++) {
 35		if (services[i].pid == died_pid) {
 36			s = &services[i];
 37			break;
 38		}
 39	}
 40	if (s == NULL)
 41		return;
 42
 43	service_handle_exit(s, WIFSIGNALED(status), WIFSIGNALED(status) ? WTERMSIG(status) : WEXITSTATUS(status));
 44}
 45
 46static void update_services(void) {
 47	struct service* s;
 48
 49	for (int i = 0; i < services_size; i++) {
 50		s = &services[i];
 51		if (s->state == STATE_INACTIVE || s->state == STATE_ERROR)
 52			s->stop_timeout = 0;
 53
 54		if (s->state == STATE_ERROR)
 55			continue;
 56
 57		if (s->stop_timeout != 0) {
 58			if (time(NULL) - s->stop_timeout >= SV_STOP_TIMEOUT) {
 59				print(":: service '%s' doesn't terminate, killing...\n", s->name);
 60				service_kill(s, SIGKILL);
 61				s->stop_timeout = 0;
 62			}
 63		} else if (s->state == STATE_INACTIVE && service_need_restart(s)) {
 64			service_start(s);
 65		}
 66	}
 67}
 68
 69static void control_sockets(void) {
 70	struct service* s;
 71	char            cmd;
 72
 73	for (int i = 0; i < services_size; i++) {
 74		s = &services[i];
 75		while (read(s->control, &cmd, 1) == 1) {
 76			print("handling '%c' from %s\n", cmd, s->name);
 77			service_handle_command(s, cmd);
 78		}
 79	}
 80}
 81
 82void stop_dummies(void) {
 83	for (int i = 0; i < services_size; i++) {
 84		if (services[i].state != STATE_ACTIVE_DUMMY || services[i].restart == S_RESTART)
 85			continue;
 86
 87		for (int j = 0; j < services[i].children_size; j++) {
 88			struct service* dep = services[i].children[j];
 89			if (dep->state != STATE_INACTIVE && dep->state != STATE_ERROR)
 90				goto dont_stop;
 91		}
 92
 93		service_stop(&services[i]);
 94
 95	dont_stop:;
 96	}
 97}
 98
 99int service_supervise(const char* service_dir_, const char* service, bool once) {
100	struct sigaction sigact = { 0 };
101	struct service*  s;
102
103	daemon_running = true;
104
105	sigact.sa_handler = signal_child;
106	sigaction(SIGCHLD, &sigact, NULL);
107	sigact.sa_handler = SIG_IGN;
108	sigaction(SIGPIPE, &sigact, NULL);
109
110	service_dir_path = service_dir_;
111	if ((service_dir = open(service_dir_, O_DIRECTORY)) == -1) {
112		print_errno("error: cannot open directory %s: %s\n", service_dir_);
113		return 1;
114	}
115
116	if ((null_fd = open("/dev/null", O_RDWR)) == -1) {
117		fprint(1, "error: cannot open /dev/null: %r\n");
118		null_fd = 1;
119	}
120
121	print(":: starting services\n");
122
123	service_refresh_directory();
124
125	if ((s = service_get(service)) == NULL) {
126		fprint(1, "error: cannot start '%s': not found\n", service);
127		goto cleanup;
128	}
129
130	s->restart = once ? S_ONCE : S_RESTART;
131	service_start(s);
132
133
134	bool cont;
135	// accept connections and handle requests
136	do {
137		if (!daemon_running) {
138			for (int i = 0; i < services_size; i++) {
139				s = &services[i];
140				service_stop(s);
141			}
142		}
143
144		service_refresh_directory();
145		stop_dummies();
146		control_sockets();
147		update_services();
148
149		sleep(SV_CHECK_INTERVAL);
150
151		cont = false;
152		for (int i = 0; i < services_size; i++) {
153			if (services[i].state != STATE_INACTIVE && services[i].state != STATE_ERROR)
154				cont = true;
155		}
156	} while (cont);
157
158	print(":: terminating\n");
159
160	print(":: all services stopped\n");
161
162cleanup:
163
164	close(service_dir);
165	close(null_fd);
166
167	signal(SIGPIPE, SIG_DFL);
168	signal(SIGCHLD, SIG_DFL);
169	return 0;
170}