Skip to content

Commit

Permalink
Replace e2e tests PID namespace with PR_SET_CHILD_SUBREAPER
Browse files Browse the repository at this point in the history
Cherry pick apptainer approach to drop PID namespace entirely in e2e
tests, and handle reaping children.

apptainer/apptainer@9c0b50a

Too many tests are now becoming partly dependent on systemd cgroups
management, which doesn't work in the PID namespace. It's not
practical to continue splitting them into different groups.

Original PR:

Replace e2e tests PID namespace with PR_SET_CHILD_SUBREAPER
Signed-off-by: Cédric Clerget <[email protected]>
  • Loading branch information
cclerget authored and dtrudg committed Dec 13, 2022
1 parent 01bfa7c commit da2ff83
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 184 deletions.
119 changes: 112 additions & 7 deletions e2e/e2e_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
// Copyright (c) 2019-2022, Sylabs Inc. All rights reserved.
// Copyright (c) Contributors to the Apptainer project, established as
// Apptainer a Series of LF Projects LLC.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.
Expand All @@ -8,19 +10,122 @@
package e2e

import (
"bytes"
"fmt"
"log"
"os"
"os/exec"
"os/signal"
"path/filepath"
"syscall"
"testing"

// This import will execute a CGO section with the help of a C constructor
// section "init". As we always require to run e2e tests as root, the C part
// is responsible of finding the original user who executes tests; it will
// also create a dedicated mount namespace for the e2e tests, and a PID
// namespace if "SINGULARITY_E2E_NO_PID_NS" is not set. Finally, it will
// restore identity to the original user but will retain privileges for
// Privileged method enabling the execution of a function with root
// privileges when required
// section "init". It will create a dedicated mount namespace for the e2e tests
// and will restore identity to the original user but will retain privileges for
// Privileged method enabling the execution of a function with root privileges
// when required
_ "github.com/sylabs/singularity/e2e/internal/e2e/init"

"golang.org/x/sys/unix"
)

func TestE2E(t *testing.T) {
RunE2ETests(t)
}

func TestMain(m *testing.M) {
if os.Getenv("E2E_NO_REAPER") != "" {
ret := m.Run()
os.Exit(ret)
}

// start reaper process
if err := unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0); err != nil {
log.Fatalf("failed to create reaper process: %s", err)
}

sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh)

executable, err := os.Executable()
if err != nil {
log.Fatalf("unable to determine current executable path: %s", err)
}

os.Setenv("E2E_NO_REAPER", "1")

cmd := exec.Command(executable, os.Args[1:]...)
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
cmd.Stdin = os.Stdin
// create a mount namespace
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWNS,
}

if err := cmd.Start(); err != nil {
log.Fatalf("e2e test re-execution failed: %s", err)
}
cmdPid := cmd.Process.Pid

for s := range sigCh {
switch s {
case syscall.SIGCHLD:
// reap all childs
for {
var status syscall.WaitStatus

childPid, err := syscall.Wait4(-1, &status, syscall.WNOHANG, nil)
if childPid <= 0 || err != nil {
break
}
if childPid == cmdPid {
killAllChilds()
os.Exit(status.ExitStatus())
}
}
default:
// forward signals to e2e test command
syscall.Kill(cmdPid, s.(syscall.Signal))
case syscall.SIGURG:
// ignore goroutine preemption
break
}
}
}

// kill all direct childs
func killAllChilds() {
currentPid := os.Getpid()

matches, err := filepath.Glob("/proc/*/stat")
if err != nil {
log.Fatal(err)
}
for _, match := range matches {
statData := ""
switch match {
case "/proc/net/stat", "/proc/self/stat", "/proc/thread-self/stat":
default:
d, err := os.ReadFile(match)
if err != nil {
continue
}
statData = string(bytes.TrimSpace(d))
}
if statData == "" {
continue
}
pid := 0
ppid := 0
if n, err := fmt.Sscanf(statData, "%d %s %c %d", &pid, new(string), new(byte), &ppid); err != nil {
continue
} else if n != 4 || ppid != currentPid {
continue
}
// best effort to wait child
_ = syscall.Kill(pid, syscall.SIGKILL)
_, _ = syscall.Wait4(pid, nil, 0, nil)
}
}
144 changes: 16 additions & 128 deletions e2e/internal/e2e/init/init_linux.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
// Copyright (c) 2019, 2022 Sylabs Inc. All rights reserved.
// Copyright (c) Contributors to the Apptainer project, established as
// Apptainer a Series of LF Projects LLC.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.
Expand All @@ -17,120 +19,6 @@ package init
#include <sys/types.h>
#include <sys/wait.h>
#define SIZE 128
// getProcInfo returns the parent PID, UID, and GID associated with the
// supplied PID.
static pid_t getProcInfo(pid_t pid, uid_t *uid, gid_t *gid) {
FILE *status;
char procPath[SIZE];
char *line = NULL;
size_t len = 0;
pid_t ppid = 1;
memset(procPath, 0, SIZE);
if ( snprintf(procPath, SIZE-1, "/proc/%d/status", pid) > SIZE-1 ) {
// set returned PID to 1 to trigger error from getUnprivIDs call
return 1;
}
status = fopen(procPath, "r");
if ( status == NULL ) {
// set returned PID to 1 to trigger error from getUnprivIDs call
return 1;
}
while ( getline(&line, &len, status) != -1 ) {
if ( ppid == 1 ) {
sscanf(line, "PPid:\t%d", &ppid);
}
if ( *uid == 0 ) {
sscanf(line, "Uid:\t%d", uid);
}
if ( *gid == 0 ) {
sscanf(line, "Gid:\t%d", gid);
}
}
free(line);
fclose(status);
return ppid;
}
// getUnprivIDs searches recursively up the process parent chain to find a
// process with a non-root UID, then returns the UID and GID of that process.
static int getUnprivIDs(pid_t pid, uid_t *uid, gid_t *gid) {
// PID 1 here means we didn't find a process containing
// identity of the original user or an error occurred in
// getProcInfo
if ( pid == 1 ) {
return -1;
}
pid_t ppid = getProcInfo(pid, uid, gid);
if ( *uid == 0 || *gid == 0 ) {
return getUnprivIDs(ppid, uid, gid);
}
return 0;
}
// create and use a PID namespace if possible to avoid leaving some processes
// once tests are done. Child process won't catch orphaned child processes like
// instances, and we can't really catch them correctly to avoid conflicts during
// `cmd.Wait()` calls. But this is not a big deal compared to detached processes
// that could keep running on host machine after the tests execution.
static void create_pid_namespace(void) {
if ( unshare(CLONE_NEWPID) == 0 ) {
pid_t forked = fork();
if ( forked > 0 ) {
// parent process will wait that tests execution finished
int status, exit_status = 0;
pid_t child;
child = waitpid(forked, &status, 0);
if ( child < 0 ) {
fprintf(stderr, "unexpected error while waiting children: %s\n", strerror(errno));
exit(1);
}
if ( WIFEXITED(status) ) {
if ( WEXITSTATUS(status) != 0 ) {
exit_status = WEXITSTATUS(status);
}
} else if ( WIFSIGNALED(status) ) {
kill(getpid(), WTERMSIG(status));
exit_status = 128 + WTERMSIG(status);
}
exit(exit_status);
}
// mount a new proc filesystem for the new PID namespace
if ( mount(NULL, "/proc", "proc", MS_NOSUID|MS_NODEV, NULL) < 0 ) {
fprintf(stderr, "failed to set private mount propagation: %s\n", strerror(errno));
exit(1);
}
// return to the child process
}
}
// create and use a mount namespace in order to bind a temporary
// filesystem on top of home directories and not screw them up by
// accident during tests execution.
static void create_mount_namespace(void) {
if ( unshare(CLONE_FS) < 0 ) {
fprintf(stderr, "failed to unshare filesystem: %s\n", strerror(errno));
exit(1);
}
if ( unshare(CLONE_NEWNS) < 0 ) {
fprintf(stderr, "failed to create mount namespace: %s\n", strerror(errno));
exit(1);
}
if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) {
fprintf(stderr, "failed to set private mount propagation: %s\n", strerror(errno));
exit(1);
}
}
// This is the CGO init constructor called before executing any Go code
// in e2e/e2e_test.go.
__attribute__((constructor)) static void init(void) {
Expand All @@ -139,25 +27,25 @@ __attribute__((constructor)) static void init(void) {
if ( getuid() != 0 ) {
fprintf(stderr, "tests must be executed as root user\n");
fprintf(stderr, "%d %d", uid, gid);
exit(1);
}
if ( getUnprivIDs(getppid(), &uid, &gid) < 0 ) {
fprintf(stderr, "failed to retrieve user information\n");
fprintf(stderr, "%d %d", getuid(), getgid());
exit(1);
} else if ( getenv("E2E_NO_REAPER") == NULL ) {
return;
}
if ( uid == 0 || gid == 0 ) {
fprintf(stderr, "failed to retrieve user information\n");
exit(1);
if ( getenv("E2E_ORIG_GID") == NULL ) {
fprintf(stderr, "E2E_ORIG_GID environment variable not set\n");
}
gid = atoi(getenv("E2E_ORIG_GID"));
fprintf(stderr, "Creating E2E mount namespace\n");
create_mount_namespace();
if ( getenv("E2E_ORIG_UID") == NULL ) {
fprintf(stderr, "E2E_ORIG_UID environment variable not set\n");
}
uid = atoi(getenv("E2E_ORIG_UID"));
char *s = getenv("SINGULARITY_E2E_NO_PID_NS");
if ( s == NULL || s[0] == '\0' ) {
fprintf(stderr, "Creating E2E PID namespace\n");
create_pid_namespace();
if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) {
fprintf(stderr, "failed to set private mount propagation: %s\n", strerror(errno));
exit(1);
}
// set original user identity and retain privileges for
Expand Down
10 changes: 0 additions & 10 deletions e2e/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@ import (
"github.com/sylabs/singularity/pkg/ociruntime"
)

// NOTE
// ----
// Tests in this package/topic are run in a a mount namespace only. There is
// no PID namespace, in order that the systemd cgroups manager functionality
// can be exercised.
//
// You must take extra care not to leave detached process etc. that will
// pollute the host PID namespace.
//

func randomContainerID(t *testing.T) string {
t.Helper()

Expand Down
32 changes: 3 additions & 29 deletions e2e/suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,12 @@ var (
runTests = flag.String("e2e_tests", "", "specify a regex matching e2e tests to run")
)

// Groups run without a PID NS (systemd cgroups related)
var e2eGroupsNoPIDNS = map[string]testhelper.Group{
"CGROUPS": cgroups.E2ETests,
"INSTANCE": instance.E2ETests,
"OCI": oci.E2ETests,
}

// Groups run inside a PID NS
var e2eGroups = map[string]testhelper.Group{
"ACTIONS": actions.E2ETests,
"BUILDCFG": e2ebuildcfg.E2ETests,
"BUILD": imgbuild.E2ETests,
"CACHE": cache.E2ETests,
"CGROUPS": cgroups.E2ETests,
"CMDENVVARS": cmdenvvars.E2ETests,
"CONFIG": config.E2ETests,
"DELETE": delete.E2ETests,
Expand All @@ -86,7 +79,9 @@ var e2eGroups = map[string]testhelper.Group{
"GPU": gpu.E2ETests,
"HELP": help.E2ETests,
"INSPECT": inspect.E2ETests,
"INSTANCE": instance.E2ETests,
"KEY": key.E2ETests,
"OCI": oci.E2ETests,
"OVERLAY": overlay.E2ETests,
"PLUGIN": plugin.E2ETests,
"PULL": pull.E2ETests,
Expand Down Expand Up @@ -212,27 +207,6 @@ func Run(t *testing.T) {
// If you need the test image, add the call at the top of your
// own test.

if os.Getenv("SINGULARITY_E2E_NO_PID_NS") != "" {
// e2e tests that will run in a mount namespace only
// They do not currently require the OCI registry instance.
suite := testhelper.NewSuite(t, testenv)

groups := []string{}
if runGroups != nil && *runGroups != "" {
groups = strings.Split(*runGroups, ",")
}

for key, val := range e2eGroupsNoPIDNS {
if len(groups) == 0 || slice.ContainsString(groups, key) {
suite.AddGroup(key, val)
}
}
suite.Run(runTests)
return
}

// e2e tests that will run in a mount and PID namespace are below

// Provision local registry
testenv.TestRegistry = e2e.StartRegistry(t, testenv)

Expand Down
Loading

0 comments on commit da2ff83

Please sign in to comment.