Replace e2e tests PID namespace with PR_SET_CHILD_SUBREAPER

Cherry pick apptainer approach to drop PID namespace entirely in e2e tests, and handle reaping children. apptainer/apptainer@9c0b50a Too many tests are now becoming partly dependent on systemd cgroups management, which doesn't work in the PID namespace. It's not practical to continue splitting them into different groups. Original PR: Replace e2e tests PID namespace with PR_SET_CHILD_SUBREAPER Signed-off-by: Cédric Clerget <[email protected]>
genomics-dev · Dec 13, 2022 · da2ff83 · da2ff83
1 parent 01bfa7c
commit da2ff83
Show file tree

Hide file tree

Showing 6 changed files with 137 additions and 184 deletions.
diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go
@@ -1,4 +1,6 @@
 // Copyright (c) 2019-2022, Sylabs Inc. All rights reserved.
+// Copyright (c) Contributors to the Apptainer project, established as
+//   Apptainer a Series of LF Projects LLC.
 // This software is licensed under a 3-clause BSD license. Please consult the
 // LICENSE.md file distributed with the sources of this project regarding your
 // rights to use or distribute this software.
@@ -8,19 +10,122 @@
 package e2e
 
 import (
+	"bytes"
+	"fmt"
+	"log"
+	"os"
+	"os/exec"
+	"os/signal"
+	"path/filepath"
+	"syscall"
 	"testing"
 
 	// This import will execute a CGO section with the help of a C constructor
-	// section "init". As we always require to run e2e tests as root, the C part
-	// is responsible of finding the original user who executes tests; it will
-	// also create a dedicated mount namespace for the e2e tests, and a PID
-	// namespace if "SINGULARITY_E2E_NO_PID_NS" is not set. Finally, it will
-	// restore identity to the original user but will retain privileges for
-	// Privileged method enabling the execution of a function with root
-	// privileges when required
+	// section "init". It will create a dedicated mount namespace for the e2e tests
+	// and will restore identity to the original user but will retain privileges for
+	// Privileged method enabling the execution of a function with root privileges
+	// when required
 	_ "github.com/sylabs/singularity/e2e/internal/e2e/init"
+
+	"golang.org/x/sys/unix"
 )
 
 func TestE2E(t *testing.T) {
 	RunE2ETests(t)
 }
+
+func TestMain(m *testing.M) {
+	if os.Getenv("E2E_NO_REAPER") != "" {
+		ret := m.Run()
+		os.Exit(ret)
+	}
+
+	// start reaper process
+	if err := unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0); err != nil {
+		log.Fatalf("failed to create reaper process: %s", err)
+	}
+
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh)
+
+	executable, err := os.Executable()
+	if err != nil {
+		log.Fatalf("unable to determine current executable path: %s", err)
+	}
+
+	os.Setenv("E2E_NO_REAPER", "1")
+
+	cmd := exec.Command(executable, os.Args[1:]...)
+	cmd.Stderr = os.Stderr
+	cmd.Stdout = os.Stdout
+	cmd.Stdin = os.Stdin
+	// create a mount namespace
+	cmd.SysProcAttr = &syscall.SysProcAttr{
+		Cloneflags: syscall.CLONE_NEWNS,
+	}
+
+	if err := cmd.Start(); err != nil {
+		log.Fatalf("e2e test re-execution failed: %s", err)
+	}
+	cmdPid := cmd.Process.Pid
+
+	for s := range sigCh {
+		switch s {
+		case syscall.SIGCHLD:
+			// reap all childs
+			for {
+				var status syscall.WaitStatus
+
+				childPid, err := syscall.Wait4(-1, &status, syscall.WNOHANG, nil)
+				if childPid <= 0 || err != nil {
+					break
+				}
+				if childPid == cmdPid {
+					killAllChilds()
+					os.Exit(status.ExitStatus())
+				}
+			}
+		default:
+			// forward signals to e2e test command
+			syscall.Kill(cmdPid, s.(syscall.Signal))
+		case syscall.SIGURG:
+			// ignore goroutine preemption
+			break
+		}
+	}
+}
+
+// kill all direct childs
+func killAllChilds() {
+	currentPid := os.Getpid()
+
+	matches, err := filepath.Glob("/proc/*/stat")
+	if err != nil {
+		log.Fatal(err)
+	}
+	for _, match := range matches {
+		statData := ""
+		switch match {
+		case "/proc/net/stat", "/proc/self/stat", "/proc/thread-self/stat":
+		default:
+			d, err := os.ReadFile(match)
+			if err != nil {
+				continue
+			}
+			statData = string(bytes.TrimSpace(d))
+		}
+		if statData == "" {
+			continue
+		}
+		pid := 0
+		ppid := 0
+		if n, err := fmt.Sscanf(statData, "%d %s %c %d", &pid, new(string), new(byte), &ppid); err != nil {
+			continue
+		} else if n != 4 || ppid != currentPid {
+			continue
+		}
+		// best effort to wait child
+		_ = syscall.Kill(pid, syscall.SIGKILL)
+		_, _ = syscall.Wait4(pid, nil, 0, nil)
+	}
+}
diff --git a/e2e/internal/e2e/init/init_linux.go b/e2e/internal/e2e/init/init_linux.go
@@ -1,4 +1,6 @@
 // Copyright (c) 2019, 2022 Sylabs Inc. All rights reserved.
+// Copyright (c) Contributors to the Apptainer project, established as
+//   Apptainer a Series of LF Projects LLC.
 // This software is licensed under a 3-clause BSD license. Please consult the
 // LICENSE.md file distributed with the sources of this project regarding your
 // rights to use or distribute this software.
@@ -17,120 +19,6 @@ package init
 #include <sys/types.h>
 #include <sys/wait.h>
 
-#define SIZE    128
-
-// getProcInfo returns the parent PID, UID, and GID associated with the
-// supplied PID.
-static pid_t getProcInfo(pid_t pid, uid_t *uid, gid_t *gid) {
-	FILE *status;
-	char procPath[SIZE];
-	char *line = NULL;
-	size_t len = 0;
-	pid_t ppid = 1;
-
-	memset(procPath, 0, SIZE);
-	if ( snprintf(procPath, SIZE-1, "/proc/%d/status", pid) > SIZE-1 ) {
-		// set returned PID to 1 to trigger error from getUnprivIDs call
-		return 1;
-	}
-
-	status = fopen(procPath, "r");
-	if ( status == NULL ) {
-		// set returned PID to 1 to trigger error from getUnprivIDs call
-		return 1;
-	}
-
-	while ( getline(&line, &len, status) != -1 ) {
-		if ( ppid == 1 ) {
-			sscanf(line, "PPid:\t%d", &ppid);
-		}
-		if ( *uid == 0 ) {
-			sscanf(line, "Uid:\t%d", uid);
-		}
-		if ( *gid == 0 ) {
-			sscanf(line, "Gid:\t%d", gid);
-		}
-	}
-
-	free(line);
-	fclose(status);
-
-	return ppid;
-}
-
-// getUnprivIDs searches recursively up the process parent chain to find a
-// process with a non-root UID, then returns the UID and GID of that process.
-static int getUnprivIDs(pid_t pid, uid_t *uid, gid_t *gid) {
-	// PID 1 here means we didn't find a process containing
-	// identity of the original user or an error occurred in
-	// getProcInfo
-	if ( pid == 1 ) {
-		return -1;
-	}
-	pid_t ppid = getProcInfo(pid, uid, gid);
-	if ( *uid == 0 || *gid == 0 ) {
-		return getUnprivIDs(ppid, uid, gid);
-	}
-	return 0;
-}
-
-// create and use a PID namespace if possible to avoid leaving some processes
-// once tests are done. Child process won't catch orphaned child processes like
-// instances, and we can't really catch them correctly to avoid conflicts during
-// `cmd.Wait()` calls. But this is not a big deal compared to detached processes
-// that could keep running on host machine after the tests execution.
-static void create_pid_namespace(void) {
-	if ( unshare(CLONE_NEWPID) == 0 ) {
-		pid_t forked = fork();
-		if ( forked > 0 ) {
-			// parent process will wait that tests execution finished
-			int status, exit_status = 0;
-			pid_t child;
-
-			child = waitpid(forked, &status, 0);
-			if ( child < 0 ) {
-				fprintf(stderr, "unexpected error while waiting children: %s\n", strerror(errno));
-				exit(1);
-			}
-
-			if ( WIFEXITED(status) ) {
-				if ( WEXITSTATUS(status) != 0 ) {
-					exit_status = WEXITSTATUS(status);
-				}
-			} else if ( WIFSIGNALED(status) ) {
-				kill(getpid(), WTERMSIG(status));
-				exit_status = 128 + WTERMSIG(status);
-			}
-			exit(exit_status);
-		}
-
-		// mount a new proc filesystem for the new PID namespace
-		if ( mount(NULL, "/proc", "proc", MS_NOSUID|MS_NODEV, NULL) < 0 ) {
-			fprintf(stderr, "failed to set private mount propagation: %s\n", strerror(errno));
-			exit(1);
-		}
-		// return to the child process
-	}
-}
-
-// create and use a mount namespace in order to bind a temporary
-// filesystem on top of home directories and not screw them up by
-// accident during tests execution.
-static void create_mount_namespace(void) {
-	if ( unshare(CLONE_FS) < 0 ) {
-		fprintf(stderr, "failed to unshare filesystem: %s\n", strerror(errno));
-		exit(1);
-	}
-	if ( unshare(CLONE_NEWNS) < 0 ) {
-		fprintf(stderr, "failed to create mount namespace: %s\n", strerror(errno));
-		exit(1);
-	}
-	if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) {
-		fprintf(stderr, "failed to set private mount propagation: %s\n", strerror(errno));
-		exit(1);
-	}
-}
-
 // This is the CGO init constructor called before executing any Go code
 // in e2e/e2e_test.go.
 __attribute__((constructor)) static void init(void) {
@@ -139,25 +27,25 @@ __attribute__((constructor)) static void init(void) {
 
 	if ( getuid() != 0 ) {
 		fprintf(stderr, "tests must be executed as root user\n");
-		fprintf(stderr, "%d %d", uid, gid);
-		exit(1);
-	}
-	if ( getUnprivIDs(getppid(), &uid, &gid) < 0 ) {
-		fprintf(stderr, "failed to retrieve user information\n");
+		fprintf(stderr, "%d %d", getuid(), getgid());
 		exit(1);
+	} else if ( getenv("E2E_NO_REAPER") == NULL ) {
+		return;
 	}
-	if ( uid == 0 || gid == 0 ) {
-		fprintf(stderr, "failed to retrieve user information\n");
-		exit(1);
+
+	if ( getenv("E2E_ORIG_GID") == NULL ) {
+		fprintf(stderr, "E2E_ORIG_GID environment variable not set\n");
 	}
+	gid = atoi(getenv("E2E_ORIG_GID"));
 
-	fprintf(stderr, "Creating E2E mount namespace\n");
-	create_mount_namespace();
+	if ( getenv("E2E_ORIG_UID") == NULL ) {
+		fprintf(stderr, "E2E_ORIG_UID environment variable not set\n");
+	}
+	uid = atoi(getenv("E2E_ORIG_UID"));
 
-	char *s = getenv("SINGULARITY_E2E_NO_PID_NS");
-	if ( s == NULL || s[0] == '\0' ) {
-		fprintf(stderr, "Creating E2E PID namespace\n");
-		create_pid_namespace();
+	if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) {
+		fprintf(stderr, "failed to set private mount propagation: %s\n", strerror(errno));
+		exit(1);
 	}
 
 	// set original user identity and retain privileges for

diff --git a/e2e/oci/oci.go b/e2e/oci/oci.go
@@ -19,16 +19,6 @@ import (
 	"github.com/sylabs/singularity/pkg/ociruntime"
 )
 
-//  NOTE
-//  ----
-//  Tests in this package/topic are run in a a mount namespace only. There is
-//  no PID namespace, in order that the systemd cgroups manager functionality
-//  can be exercised.
-//
-//  You must take extra care not to leave detached process etc. that will
-//  pollute the host PID namespace.
-//
-
 func randomContainerID(t *testing.T) string {
 	t.Helper()
 

diff --git a/e2e/suite.go b/e2e/suite.go
@@ -64,19 +64,12 @@ var (
 	runTests    = flag.String("e2e_tests", "", "specify a regex matching e2e tests to run")
 )
 
-// Groups run without a PID NS (systemd cgroups related)
-var e2eGroupsNoPIDNS = map[string]testhelper.Group{
-	"CGROUPS":  cgroups.E2ETests,
-	"INSTANCE": instance.E2ETests,
-	"OCI":      oci.E2ETests,
-}
-
-// Groups run inside a PID NS
 var e2eGroups = map[string]testhelper.Group{
 	"ACTIONS":    actions.E2ETests,
 	"BUILDCFG":   e2ebuildcfg.E2ETests,
 	"BUILD":      imgbuild.E2ETests,
 	"CACHE":      cache.E2ETests,
+	"CGROUPS":    cgroups.E2ETests,
 	"CMDENVVARS": cmdenvvars.E2ETests,
 	"CONFIG":     config.E2ETests,
 	"DELETE":     delete.E2ETests,
@@ -86,7 +79,9 @@ var e2eGroups = map[string]testhelper.Group{
 	"GPU":        gpu.E2ETests,
 	"HELP":       help.E2ETests,
 	"INSPECT":    inspect.E2ETests,
+	"INSTANCE":   instance.E2ETests,
 	"KEY":        key.E2ETests,
+	"OCI":        oci.E2ETests,
 	"OVERLAY":    overlay.E2ETests,
 	"PLUGIN":     plugin.E2ETests,
 	"PULL":       pull.E2ETests,
@@ -212,27 +207,6 @@ func Run(t *testing.T) {
 	// If you need the test image, add the call at the top of your
 	// own test.
 
-	if os.Getenv("SINGULARITY_E2E_NO_PID_NS") != "" {
-		// e2e tests that will run in a mount namespace only
-		// They do not currently require the OCI registry instance.
-		suite := testhelper.NewSuite(t, testenv)
-
-		groups := []string{}
-		if runGroups != nil && *runGroups != "" {
-			groups = strings.Split(*runGroups, ",")
-		}
-
-		for key, val := range e2eGroupsNoPIDNS {
-			if len(groups) == 0 || slice.ContainsString(groups, key) {
-				suite.AddGroup(key, val)
-			}
-		}
-		suite.Run(runTests)
-		return
-	}
-
-	// e2e tests that will run in a mount and PID namespace are below
-
 	// Provision local registry
 	testenv.TestRegistry = e2e.StartRegistry(t, testenv)