luatest icon indicating copy to clipboard operation
luatest copied to clipboard

Improve error detection during server termination

Open ochaplashkin opened this issue 1 year ago • 1 comments

We added default conditions, while working on #349 and #252:

Segmentation fault:

if self.process.output_beautifier.stderr:find('Segmentation fault') then
    error(
        ('Segmentation fault during process termination (alias: %s, workdir: %s, pid: %d)\n%s')
        ...
    )
end

Memory leak from sanitizer:

if self.process.output_beautifier.stderr:find('LeakSanitizer') then
    error(
	('Memory leak during process termination (alias: %s, workdir: %s, pid: %s)\n%s')
	...
    )
end

Obviously, if we add error handling they will extend the server:stop() method. It is necessary to think about some kind of uniform approach.

Error codes in Tarantool source code.

ochaplashkin avatar Mar 14 '24 15:03 ochaplashkin

It would be nice to catch an exit code/a signal number. I've experimented around a bit.

It seems, we can't just use waitpid(), because libev reaps childs on its own using signalfd.

OTOH, we can handle SIGCHLD using libev from tarantool sources.

diff --git a/src/lua/popen.c b/src/lua/popen.c
index 942f99d083..9fdd4c2e7a 100644
--- a/src/lua/popen.c
+++ b/src/lua/popen.c
@@ -37,6 +37,8 @@
 
 #include <small/region.h>
 
+#include "tarantool_ev.h"
+
 #include "diag.h"
 #include "core/popen.h"
 #include "core/fiber.h"
@@ -2410,6 +2412,59 @@ lbox_popen_gc(struct lua_State *L)
 	return 0;
 }
 
+struct ww {
+	pid_t pid;
+	bool done;
+	int wstatus;
+	ev_child ev_sigchld;
+};
+
+static void
+sigchld_handler(EV_P_ ev_child *w, int revents)
+{
+	(void)revents;
+
+	ev_child_stop(EV_A_ w);
+
+	struct ww *ww = container_of(w, struct ww, ev_sigchld);
+	assert(w->rpid == ww->pid);
+	ww->wstatus = w->rstatus;
+	ww->done = true;
+}
+
+static int
+lbox_popen_w(struct lua_State *L)
+{
+	struct ww ww;
+
+	ww.pid = lua_tointeger(L, 1);
+	ww.done = false;
+	ww.wstatus = 0;
+
+	ev_child_init(&ww.ev_sigchld, sigchld_handler, ww.pid, 0);
+	ev_child_start(EV_DEFAULT_ &ww.ev_sigchld);
+
+	while (!ww.done) {
+		fiber_sleep(0.1);
+	}
+
+	lua_createtable(L, 0, 2);
+
+	if (WIFEXITED(ww.wstatus)) {
+		lua_pushliteral(L, "exited");
+		lua_setfield(L, -2, "state");
+		lua_pushinteger(L, WEXITSTATUS(ww.wstatus));
+		lua_setfield(L, -2, "exit_code");
+	} else {
+		lua_pushliteral(L, "signaled");
+		lua_setfield(L, -2, "state");
+		lua_pushinteger(L, WTERMSIG(ww.wstatus));
+		lua_setfield(L, -2, "signo");
+	}
+
+	return 1;
+}
+
 /* }}} */
 
 /* {{{ Module initialization */
@@ -2453,6 +2508,7 @@ tarantool_lua_popen_init(struct lua_State *L)
 	static const struct luaL_Reg popen_methods[] = {
 		{"new",		lbox_popen_new,		},
 		{"shell",	lbox_popen_shell,	},
+		{"w",		lbox_popen_w,		},
 		{NULL, NULL},
 	};
 	luaT_newmodule(L, "popen", popen_methods);

And use this function from luatest.

diff --git a/luatest/process.lua b/luatest/process.lua
index 9d447b3..2084dec 100644
--- a/luatest/process.lua
+++ b/luatest/process.lua
@@ -4,6 +4,7 @@ local fun = require('fun')
 local ffi = require('ffi')
 local fio = require('fio')
 local log = require('log')
+local fiber = require('fiber')
 
 local Class = require('luatest.class')
 local OutputBeautifier = require('luatest.output_beautifier')
@@ -55,7 +56,15 @@ function Process:start(path, args, env, options)
         if output_beautifier then
             output_beautifier:enable({track_pid = pid})
         end
-        return self:from({pid = pid, ignore_gc = options.ignore_gc, output_beautifier = output_beautifier})
+        local x = self:from({pid = pid, ignore_gc = options.ignore_gc, output_beautifier = output_beautifier})
+        x._wait_cond = fiber.cond()
+        x._wait_cond_signaled = false
+        fiber.create(function()
+            x.wstatus = require('popen').w(pid)
+            x._wait_cond_signaled = true
+            x._wait_cond:broadcast()
+        end)
+        return x
     end
     -- luacov: disable
     if options.chdir then
@@ -91,6 +100,13 @@ function Process.mt:is_alive()
     return self.pid ~= nil and self.class.is_pid_alive(self.pid)
 end
 
+function Process.mt:wait()
+    if not self._wait_cond_signaled then
+        self._wait_cond:wait()
+    end
+    return self.wstatus
+end
+
 function Process.kill_pid(pid, signal, options)
     checks('number|string', '?number|string', {quiet = '?boolean'})
     -- Signal values are platform-dependent so we can not use ffi here
diff --git a/luatest/server.lua b/luatest/server.lua
index 5506871..d5abcb5 100644
--- a/luatest/server.lua
+++ b/luatest/server.lua
@@ -440,15 +440,15 @@ function Server:stop()
         self.net_box = nil
     end
 
-    if self.process and self.process:is_alive() then
-        self.process:kill()
-        local ok, err = pcall(wait_for_condition, 'process is terminated', self, function()
-            return not self.process:is_alive()
-        end)
-        if not ok and not err:find('Process is terminated when waiting for') then
-            error(err)
+    if self.process then
+        if self.process:is_alive() then
+            self.process:kill()
         end
-        if self.process.output_beautifier.stderr:find('Segmentation fault') then
+
+        local wstatus = self.process:wait()
+        local ok = wstatus.state == 'exited' and wstatus.exit_code == 0
+
+        if not ok and self.process.output_beautifier.stderr:find('Segmentation fault') then
             error(
                 ('Segmentation fault during process termination (alias: %s, workdir: %s, pid: %d)\n%s')
                 :format(
@@ -459,6 +459,9 @@ function Server:stop()
                 )
             )
         end
+        if not ok then
+            error(('wstatus: %s'):format(json.encode(wstatus)))
+        end
         log.debug('Killed server process PID ' .. self.process.pid)
         self.process = nil
     end

(BTW, the built-in popen module performs all these things for processes started using popen.new() or popen.shell().)

It is interesting that I see the exit code 0, when tarantool is terminated by SIGTERM. It is a bit surprising, but I guess that it is implemented deliberately.

Totktonada avatar Mar 15 '24 09:03 Totktonada