From 64844dc02f44628a2b4e8c5512f5b3db807cac15 Mon Sep 17 00:00:00 2001 From: preminger Date: Thu, 11 May 2023 09:59:40 -0400 Subject: [PATCH] oci: support --overlay (#1659) * oci: support --overlay * support for multiple overlays, other revisions * fixup: deduplicate RunWrapped logic * lots of refactoring and cleanup * remove leftover debug-related panic call * cleanup comments, fix small issues w/erroring --------- Co-authored-by: David Trudgian --- CHANGELOG.md | 4 + cmd/internal/cli/oci_linux.go | 12 + docs/content.go | 2 +- e2e/actions/actions.go | 1 + e2e/actions/oci.go | 99 ++++++- e2e/docker/docker.go | 12 +- e2e/docker/regressions.go | 2 +- e2e/imgbuild/imgbuild.go | 2 +- e2e/suite.go | 6 +- internal/app/singularity/oci_linux.go | 4 +- .../runtime/launcher/oci/launcher_linux.go | 7 +- .../pkg/runtime/launcher/oci/oci_overlay.go | 108 ++++++++ .../runtime/launcher/oci/oci_runc_linux.go | 57 ++-- internal/pkg/runtime/launcher/options.go | 2 +- pkg/ocibundle/tools/overlay_linux.go | 246 +++++++++++++++--- 15 files changed, 463 insertions(+), 101 deletions(-) create mode 100644 internal/pkg/runtime/launcher/oci/oci_overlay.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b58e953c2..9809c4a99c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,10 @@ `--fakeroot`, for example). - The `remote status` command will now print the username, realname, and email of the logged-in user, if available. +- OCI-mode now supports `--overlay ` flag, allowing writes to the + filesystem to persist across runs of the OCI container. If specified dir does + not exist, Singularity will attempt to create it. Multiple overlays can be + specified, but all but one must be read-only (`--overlay :ro`). ## 3.11.3 \[2023-05-04\] diff --git a/cmd/internal/cli/oci_linux.go b/cmd/internal/cli/oci_linux.go index 8e3a58866d..1506926590 100644 --- a/cmd/internal/cli/oci_linux.go +++ b/cmd/internal/cli/oci_linux.go @@ -33,6 +33,17 @@ var ociBundleFlag = cmdline.Flag{ EnvKeys: []string{"BUNDLE"}, } +// -o|--overlay +var ociOverlayFlag = cmdline.Flag{ + ID: "ociOverlayFlag", + Value: &ociArgs.OverlayPaths, + DefaultValue: []string{}, + Name: "overlay", + ShortHand: "o", + Usage: "specify an overlay dir to use in lieu of a writable tmpfs", + Tag: "", +} + // -l|--log-path var ociLogPathFlag = cmdline.Flag{ ID: "ociLogPathFlag", @@ -126,6 +137,7 @@ func init() { cmdManager.RegisterFlagForCmd(&ociLogPathFlag, createRunCmd...) cmdManager.RegisterFlagForCmd(&ociLogFormatFlag, createRunCmd...) cmdManager.RegisterFlagForCmd(&ociPidFileFlag, createRunCmd...) + cmdManager.RegisterFlagForCmd(&ociOverlayFlag, OciRunWrappedCmd) cmdManager.RegisterFlagForCmd(&ociKillForceFlag, OciKillCmd) cmdManager.RegisterFlagForCmd(&ociKillSignalFlag, OciKillCmd) cmdManager.RegisterFlagForCmd(&ociUpdateFromFileFlag, OciUpdateCmd) diff --git a/docs/content.go b/docs/content.go index 11fba9305e..e335c22522 100644 --- a/docs/content.go +++ b/docs/content.go @@ -977,7 +977,7 @@ Enterprise Performance Computing (EPC)` $ singularity oci delete mycontainer` // Internal oci launcher use only - no user-facing docs - OciRunWrappedUse string = `run-wrapped -b [run options...] ` + OciRunWrappedUse string = `run-wrapped -b [-o ] [run options...] ` OciUpdateUse string = `update [update options...] ` OciUpdateShort string = `Update container cgroups resources (root user only)` diff --git a/e2e/actions/actions.go b/e2e/actions/actions.go index 8cb112ab76..2249f86ee3 100644 --- a/e2e/actions/actions.go +++ b/e2e/actions/actions.go @@ -2592,5 +2592,6 @@ func E2ETests(env e2e.TestEnv) testhelper.Tests { "ociCdi": c.actionOciCdi, // singularity exec --oci --cdi "ociIDMaps": c.actionOciIDMaps, // check uid/gid mapping on host for --oci as user / --fakeroot "ociCompat": np(c.actionOciCompat), // --oci equivalence to native mode --compat + "ociOverlay": (c.actionOciOverlay), // --overlay in OCI mode } } diff --git a/e2e/actions/oci.go b/e2e/actions/oci.go index 3721379e59..39d118033e 100644 --- a/e2e/actions/oci.go +++ b/e2e/actions/oci.go @@ -241,7 +241,7 @@ func (c actionTests) actionOciExec(t *testing.T) { argv: []string{"--home", "/tmp", imageRef, "cat", "/etc/passwd"}, exit: 0, wantOutputs: []e2e.SingularityCmdResultOp{ - e2e.ExpectOutput(e2e.RegexMatch, `^root:x:0:0:root:[^:]*:/bin/sh\n`), + e2e.ExpectOutput(e2e.RegexMatch, `^root:x:0:0:root:[^:]*:/bin/ash\n`), }, }, } @@ -811,7 +811,7 @@ func (c actionTests) actionOciCdi(t *testing.T) { // Generate the command to be executed in the container // Start by printing all environment variables, to test using e2e.ContainMatch conditions later - execCmd := "/bin/env" + execCmd := "/usr/bin/env" // Add commands to test the presence of mapped devices. for _, d := range tt.DeviceNodes { @@ -974,3 +974,98 @@ func (c actionTests) actionOciCompat(t *testing.T) { ) } } + +// actionOciOverlay checks that --overlay functions correctly in OCI mode. +func (c actionTests) actionOciOverlay(t *testing.T) { + e2e.EnsureOCIArchive(t, c.env) + imageRef := "oci-archive:" + c.env.OCIArchivePath + + for _, profile := range []e2e.Profile{e2e.OCIRootProfile, e2e.OCIFakerootProfile} { + testDir, err := fs.MakeTmpDir(c.env.TestDir, "overlaytestdir", 0o755) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + if !t.Failed() { + os.RemoveAll(testDir) + } + }) + + // Create a few read-only overlay subdirs under testDir + for i := 0; i < 3; i++ { + dirName := fmt.Sprintf("my_ro_ol_dir%d", i) + fullPath := filepath.Join(testDir, dirName) + if err = os.Mkdir(fullPath, 0o755); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + if !t.Failed() { + os.RemoveAll(fullPath) + } + }) + if err = os.WriteFile( + filepath.Join(fullPath, fmt.Sprintf("testfile.%d", i)), + []byte(fmt.Sprintf("test_string_%d\n", i)), + 0o644); err != nil { + t.Fatal(err) + } + if err = os.WriteFile( + filepath.Join(fullPath, "maskable_testfile"), + []byte(fmt.Sprintf("maskable_string_%d\n", i)), + 0o644); err != nil { + t.Fatal(err) + } + } + + tests := []struct { + name string + args []string + exitCode int + wantOutputs []e2e.SingularityCmdResultOp + }{ + { + name: "NewWritable", + args: []string{"--overlay", filepath.Join(testDir, "my_rw_ol_dir"), imageRef, "sh", "-c", "echo my_test_string > /my_test_file"}, + exitCode: 0, + }, + { + name: "ExistWritable", + args: []string{"--overlay", filepath.Join(testDir, "my_rw_ol_dir"), imageRef, "cat", "/my_test_file"}, + exitCode: 0, + wantOutputs: []e2e.SingularityCmdResultOp{ + e2e.ExpectOutput(e2e.ExactMatch, "my_test_string"), + }, + }, + { + name: "NonExistReadonly", + args: []string{"--overlay", filepath.Join(testDir, "my_ro_ol_dir_nonexistent:ro"), imageRef, "echo", "hi"}, + exitCode: 255, + }, + { + name: "SeveralReadonly", + args: []string{"--overlay", filepath.Join(testDir, "my_ro_ol_dir2:ro"), "--overlay", filepath.Join(testDir, "my_ro_ol_dir1:ro"), imageRef, "cat", "/testfile.1", "/maskable_testfile"}, + exitCode: 0, + wantOutputs: []e2e.SingularityCmdResultOp{ + e2e.ExpectOutput(e2e.ContainMatch, "test_string_1"), + e2e.ExpectOutput(e2e.ContainMatch, "maskable_string_2"), + }, + }, + } + + t.Run(profile.String(), func(t *testing.T) { + for _, tt := range tests { + c.env.RunSingularity( + t, + e2e.AsSubtest(tt.name), + e2e.WithProfile(profile), + e2e.WithCommand("exec"), + e2e.WithArgs(tt.args...), + e2e.ExpectExit( + tt.exitCode, + tt.wantOutputs..., + ), + ) + } + }) + } +} diff --git a/e2e/docker/docker.go b/e2e/docker/docker.go index 01cb291b24..a399874dae 100644 --- a/e2e/docker/docker.go +++ b/e2e/docker/docker.go @@ -488,28 +488,28 @@ func (c ctx) testDockerRegistry(t *testing.T) { dfd e2e.DefFileDetails }{ { - name: "BusyBox", + name: "Alpine", exit: 0, dfd: e2e.DefFileDetails{ Bootstrap: "docker", - From: c.env.TestRegistry + "/my-busybox", + From: c.env.TestRegistry + "/my-alpine", }, }, { - name: "BusyBoxRegistry", + name: "AlpineRegistry", exit: 0, dfd: e2e.DefFileDetails{ Bootstrap: "docker", - From: "my-busybox", + From: "my-alpine", Registry: c.env.TestRegistry, }, }, { - name: "BusyBoxNamespace", + name: "AlpineNamespace", exit: 255, dfd: e2e.DefFileDetails{ Bootstrap: "docker", - From: "my-busybox", + From: "my-alpine", Registry: c.env.TestRegistry, Namespace: "not-a-namespace", }, diff --git a/e2e/docker/regressions.go b/e2e/docker/regressions.go index 435075a944..776ca60f2d 100644 --- a/e2e/docker/regressions.go +++ b/e2e/docker/regressions.go @@ -82,7 +82,7 @@ func (c ctx) issue5172(t *testing.T) { u := e2e.UserProfile.HostUser(t) // create $HOME/.config/containers/registries.conf - regImage := "docker://" + c.env.TestRegistry + "/my-busybox" + regImage := "docker://" + c.env.TestRegistry + "/my-alpine" regDir := filepath.Join(u.Dir, ".config", "containers") regFile := filepath.Join(regDir, "registries.conf") imagePath := filepath.Join(c.env.TestDir, "issue-5172") diff --git a/e2e/imgbuild/imgbuild.go b/e2e/imgbuild/imgbuild.go index 47de1ba351..aafc3364fb 100644 --- a/e2e/imgbuild/imgbuild.go +++ b/e2e/imgbuild/imgbuild.go @@ -1550,7 +1550,7 @@ func (c imgBuildTests) buildBindMount(t *testing.T) { } } -// testWritableTmpfs checks that we can run the build using a writeable tmpfs in the %test step +// testWritableTmpfs checks that we can run the build using a writable tmpfs in the %test step func (c imgBuildTests) testWritableTmpfs(t *testing.T) { e2e.EnsureImage(t, c.env) diff --git a/e2e/suite.go b/e2e/suite.go index 313b08d902..8ea5ca21cf 100644 --- a/e2e/suite.go +++ b/e2e/suite.go @@ -194,9 +194,9 @@ func Run(t *testing.T) { // Provision local registry testenv.TestRegistry = e2e.StartRegistry(t, testenv) - testenv.TestRegistryImage = fmt.Sprintf("docker://%s/my-busybox:latest", testenv.TestRegistry) + testenv.TestRegistryImage = fmt.Sprintf("docker://%s/my-alpine:latest", testenv.TestRegistry) - // Copy small test image (busybox:latest) into local registry from DockerHub + // Copy small test image (alpine:latest) into local registry from DockerHub insecureSource := false insecureValue := os.Getenv("E2E_DOCKER_MIRROR_INSECURE") if insecureValue != "" { @@ -205,7 +205,7 @@ func Run(t *testing.T) { t.Fatalf("could not convert E2E_DOCKER_MIRROR_INSECURE=%s: %s", insecureValue, err) } } - e2e.CopyOCIImage(t, "docker://busybox:latest", testenv.TestRegistryImage, insecureSource, true) + e2e.CopyOCIImage(t, "docker://alpine:latest", testenv.TestRegistryImage, insecureSource, true) // SIF base test path, built on demand by e2e.EnsureImage imagePath := path.Join(name, "test.sif") diff --git a/internal/app/singularity/oci_linux.go b/internal/app/singularity/oci_linux.go index e59c427914..0f53e466f2 100644 --- a/internal/app/singularity/oci_linux.go +++ b/internal/app/singularity/oci_linux.go @@ -23,6 +23,7 @@ import ( // OciArgs contains CLI arguments type OciArgs struct { BundlePath string + OverlayPaths []string LogPath string LogFormat string PidFile string @@ -48,7 +49,8 @@ func OciRunWrapped(ctx context.Context, containerID string, args *OciArgs) error if err != nil { return err } - return oci.RunWrapped(ctx, containerID, args.BundlePath, args.PidFile, systemdCgroups) + + return oci.RunWrapped(ctx, containerID, args.BundlePath, args.PidFile, args.OverlayPaths, systemdCgroups) } // OciCreate creates a container from an OCI bundle diff --git a/internal/pkg/runtime/launcher/oci/launcher_linux.go b/internal/pkg/runtime/launcher/oci/launcher_linux.go index 8e4657b12f..776a82aa7c 100644 --- a/internal/pkg/runtime/launcher/oci/launcher_linux.go +++ b/internal/pkg/runtime/launcher/oci/launcher_linux.go @@ -81,9 +81,6 @@ func checkOpts(lo launcher.Options) error { if lo.WritableTmpfs { sylog.Infof("--oci mode uses --writable-tmpfs by default") } - if len(lo.OverlayPaths) > 0 { - badOpt = append(badOpt, "OverlayPaths") - } if lo.WorkDir != "" { badOpt = append(badOpt, "WorkDir") } @@ -473,11 +470,11 @@ func (l *Launcher) Exec(ctx context.Context, image string, process string, args if os.Getuid() == 0 { // Execution of runc/crun run, wrapped with prep / cleanup. - err = RunWrapped(ctx, id.String(), b.Path(), "", l.singularityConf.SystemdCgroups) + err = RunWrapped(ctx, id.String(), b.Path(), "", l.cfg.OverlayPaths, l.singularityConf.SystemdCgroups) } else { // Reexec singularity oci run in a userns with mappings. // Note - the oci run command will pull out the SystemdCgroups setting from config. - err = RunWrappedNS(ctx, id.String(), b.Path(), "") + err = RunWrappedNS(ctx, id.String(), b.Path(), l.cfg.OverlayPaths) } var exitErr *exec.ExitError if errors.As(err, &exitErr) { diff --git a/internal/pkg/runtime/launcher/oci/oci_overlay.go b/internal/pkg/runtime/launcher/oci/oci_overlay.go new file mode 100644 index 0000000000..5d37a8e7bf --- /dev/null +++ b/internal/pkg/runtime/launcher/oci/oci_overlay.go @@ -0,0 +1,108 @@ +// Copyright (c) 2018-2023, Sylabs Inc. All rights reserved. +// This software is licensed under a 3-clause BSD license. Please consult the +// LICENSE.md file distributed with the sources of this project regarding your +// rights to use or distribute this software. + +package oci + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/sylabs/singularity/pkg/ocibundle/tools" + "github.com/sylabs/singularity/pkg/sylog" + "github.com/sylabs/singularity/pkg/util/singularityconf" +) + +// WrapWithWritableTmpFs runs a function wrapped with prep / cleanup steps for a writable tmpfs. +func WrapWithWritableTmpFs(f func() error, bundleDir string) error { + // TODO: --oci mode always emulating --compat, which uses --writable-tmpfs. + // Provide a way of disabling this, for a read only rootfs. + overlayDir, err := prepareWritableTmpfs(bundleDir) + if err != nil { + return err + } + + err = f() + + // Cleanup actions log errors, but don't return - so we get as much cleanup done as possible. + if cleanupErr := cleanupWritableTmpfs(bundleDir, overlayDir); cleanupErr != nil { + sylog.Errorf("While cleaning up writable tmpfs: %v", cleanupErr) + } + + // Return any error from the actual container payload - preserve exit code. + return err +} + +// WrapWithOverlays runs a function wrapped with prep / cleanup steps for overlays. +func WrapWithOverlays(f func() error, bundleDir string, overlayPaths []string) error { + writableOverlayFound := false + ovs := tools.OverlaySet{} + for _, p := range overlayPaths { + writable := true + splitted := strings.SplitN(p, ":", 2) + barePath := splitted[0] + if len(splitted) > 1 { + if splitted[1] == "ro" { + writable = false + } + } + + if writable && writableOverlayFound { + return fmt.Errorf("you can't specify more than one writable overlay; %#v has already been specified as a writable overlay; use '--overlay %s:ro' instead", ovs.WritableLoc, barePath) + } + if writable { + writableOverlayFound = true + ovs.WritableLoc = barePath + } else { + ovs.ReadonlyLocs = append(ovs.ReadonlyLocs, barePath) + } + } + + rootFsDir := tools.RootFs(bundleDir).Path() + err := tools.ApplyOverlay(rootFsDir, ovs) + if err != nil { + return err + } + + err = f() + + // Cleanup actions log errors, but don't return - so we get as much cleanup done as possible. + if cleanupErr := tools.UnmountOverlay(rootFsDir); cleanupErr != nil { + sylog.Errorf("While unmounting rootfs overlay: %v", cleanupErr) + } + + // Return any error from the actual container payload - preserve exit code. + return err +} + +func prepareWritableTmpfs(bundleDir string) (string, error) { + sylog.Debugf("Configuring writable tmpfs overlay for %s", bundleDir) + c := singularityconf.GetCurrentConfig() + if c == nil { + return "", fmt.Errorf("singularity configuration is not initialized") + } + return tools.CreateOverlayTmpfs(bundleDir, int(c.SessiondirMaxSize)) +} + +func cleanupWritableTmpfs(bundleDir, overlayDir string) error { + sylog.Debugf("Cleaning up writable tmpfs overlay for %s", bundleDir) + return tools.DeleteOverlayTmpfs(bundleDir, overlayDir) +} + +// absOverlay takes an overlay description string (a path, optionally followed by a colon with an option string, like ":ro" or ":rw"), and replaces any relative path in the description string with an absolute one. +func absOverlay(desc string) (string, error) { + splitted := strings.SplitN(desc, ":", 2) + barePath := splitted[0] + absBarePath, err := filepath.Abs(barePath) + if err != nil { + return "", err + } + absDesc := absBarePath + if len(splitted) > 1 { + absDesc += ":" + splitted[1] + } + + return absDesc, nil +} diff --git a/internal/pkg/runtime/launcher/oci/oci_runc_linux.go b/internal/pkg/runtime/launcher/oci/oci_runc_linux.go index 65934ce011..80749488c6 100644 --- a/internal/pkg/runtime/launcher/oci/oci_runc_linux.go +++ b/internal/pkg/runtime/launcher/oci/oci_runc_linux.go @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2022, Sylabs Inc. All rights reserved. +// Copyright (c) 2018-2023, Sylabs Inc. All rights reserved. // This software is licensed under a 3-clause BSD license. Please consult the // LICENSE.md file distributed with the sources of this project regarding your // rights to use or distribute this software. @@ -19,10 +19,8 @@ import ( "github.com/sylabs/singularity/internal/pkg/buildcfg" fakerootConfig "github.com/sylabs/singularity/internal/pkg/runtime/engine/fakeroot/config" "github.com/sylabs/singularity/internal/pkg/util/starter" - "github.com/sylabs/singularity/pkg/ocibundle/tools" "github.com/sylabs/singularity/pkg/runtime/engine/config" "github.com/sylabs/singularity/pkg/sylog" - "github.com/sylabs/singularity/pkg/util/singularityconf" ) // Delete deletes container resources @@ -222,44 +220,43 @@ func Run(ctx context.Context, containerID, bundlePath, pidFile string, systemdCg } // RunWrapped runs a container via the OCI runtime, wrapped with prep / cleanup steps. -func RunWrapped(ctx context.Context, containerID, bundlePath, pidFile string, systemdCgroups bool) error { - // TODO: --oci mode always emulating --compat, which uses --writable-tmpfs. - // Provide a way of disabling this, for a read only rootfs. - if err := prepareWriteableTmpfs(bundlePath); err != nil { - return err +func RunWrapped(ctx context.Context, containerID, bundlePath, pidFile string, overlayPaths []string, systemdCgroups bool) error { + runFunc := func() error { + return Run(ctx, containerID, bundlePath, "", systemdCgroups) } - err := Run(ctx, containerID, bundlePath, pidFile, systemdCgroups) - - // Cleanup actions log errors, but don't return - so we get as much cleanup done as possible. - if err := cleanupWritableTmpfs(bundlePath); err != nil { - sylog.Errorf("While cleaning up writable tmpfs: %v", err) + if len(overlayPaths) > 0 { + return WrapWithOverlays(runFunc, bundlePath, overlayPaths) } - // Return any error from the actual container payload - preserve exit code. - return err + return WrapWithWritableTmpFs(runFunc, bundlePath) } // RunWrappedNS reexecs singularity in a user namespace, with supplied uid/gid mapping, calling oci run. -func RunWrappedNS(ctx context.Context, containerID, bundlePath, pidFile string) error { +func RunWrappedNS(ctx context.Context, containerID, bundlePath string, overlayPaths []string) error { absBundle, err := filepath.Abs(bundlePath) if err != nil { return fmt.Errorf("failed to determine bundle absolute path: %s", err) } - if err := os.Chdir(absBundle); err != nil { - return fmt.Errorf("failed to change directory to %s: %s", absBundle, err) - } - args := []string{ filepath.Join(buildcfg.BINDIR, "singularity"), "oci", "run-wrapped", "-b", absBundle, - containerID, } - if pidFile != "" { - args = append(args, "--pid-file="+pidFile) + for _, p := range overlayPaths { + absPath, err := absOverlay(p) + if err != nil { + return fmt.Errorf("could not convert %q to absolute path: %w", p, err) + } + + args = append(args, "--overlay", absPath) + } + args = append(args, containerID) + + if err := os.Chdir(absBundle); err != nil { + return fmt.Errorf("failed to change directory to %s: %s", absBundle, err) } sylog.Debugf("Calling fakeroot engine to execute %q", strings.Join(args, " ")) @@ -363,17 +360,3 @@ func Update(containerID, cgFile string, systemdCgroups bool) error { sylog.Debugf("Calling %s with args %v", runtimeBin, runtimeArgs) return cmd.Run() } - -func prepareWriteableTmpfs(bundleDir string) error { - sylog.Debugf("Configuring writable tmpfs overlay for %s", bundleDir) - c := singularityconf.GetCurrentConfig() - if c == nil { - return fmt.Errorf("singularity configuration is not initialized") - } - return tools.CreateOverlayTmpfs(bundleDir, int(c.SessiondirMaxSize)) -} - -func cleanupWritableTmpfs(bundleDir string) error { - sylog.Debugf("Cleaning up writable tmpfs overlay for %s", bundleDir) - return tools.DeleteOverlay(bundleDir) -} diff --git a/internal/pkg/runtime/launcher/options.go b/internal/pkg/runtime/launcher/options.go index 6cac2c5266..12e74aeb18 100644 --- a/internal/pkg/runtime/launcher/options.go +++ b/internal/pkg/runtime/launcher/options.go @@ -25,7 +25,7 @@ type Namespaces struct { type Options struct { // Writable marks the container image itself as writable. Writable bool - // WriteableTmpfs applies an ephemeral writable overlay to the container. + // WritableTmpfs applies an ephemeral writable overlay to the container. WritableTmpfs bool // OverlayPaths holds paths to image or directory overlays to be applied. OverlayPaths []string diff --git a/pkg/ocibundle/tools/overlay_linux.go b/pkg/ocibundle/tools/overlay_linux.go index e8bf08812c..ea7ca467e3 100644 --- a/pkg/ocibundle/tools/overlay_linux.go +++ b/pkg/ocibundle/tools/overlay_linux.go @@ -9,18 +9,41 @@ import ( "fmt" "os" "path/filepath" + "strings" "syscall" ) -// CreateOverlay creates a writable overlay based on a directory. -func CreateOverlay(bundlePath string) error { - var err error +// OverlaySet represents a set of overlay directories which will be overlain on +// top of some filesystem mount point. The actual mount point atop which these +// directories will be overlain is not specified in the OverlaySet; it is left +// implicit, to be chosen by whichever function consumes an OverlaySet. An +// OverlaySet contains two types of directories: zero or more directories which +// will be mounted as read-only overlays atop the (implicit) mount point, and +// one directory which will be mounted as a writable overlay atop all the rest. +// An empty WritableLoc field indicates that no writable overlay is to be +// mounted. +type OverlaySet struct { + // ReadonlyLocs is a list of directories to be mounted as read-only + // overlays. The mount point atop which these will be mounted is left + // implicit, to be chosen by whichever function consumes the OverlaySet. + ReadonlyLocs []string + + // WritableLoc is the directory to be mounted as a writable overlay. The + // mount point atop which this will be mounted is left implicit, to be + // chosen by whichever function consumes the OverlaySet. Empty value + // indicates no writable overlay is to be mounted. + WritableLoc string +} +// CreateOverlay creates a writable overlay using a directory inside the OCI +// bundle. +func CreateOverlay(bundlePath string) error { oldumask := syscall.Umask(0) defer syscall.Umask(oldumask) overlayDir := filepath.Join(bundlePath, "overlay") - if err = os.Mkdir(overlayDir, 0o700); err != nil { + var err error + if err = ensureOverlayDir(overlayDir, true, 0o700); err != nil { return fmt.Errorf("failed to create %s: %s", overlayDir, err) } // delete overlay directory in case of error @@ -30,35 +53,31 @@ func CreateOverlay(bundlePath string) error { } }() - err = syscall.Mount(overlayDir, overlayDir, "", syscall.MS_BIND, "") - if err != nil { - return fmt.Errorf("failed to bind %s: %s", overlayDir, err) - } - // best effort to cleanup mount - defer func() { - if err != nil { - syscall.Unmount(overlayDir, syscall.MNT_DETACH) - } - }() - - if err = syscall.Mount("", overlayDir, "", syscall.MS_REMOUNT|syscall.MS_BIND, ""); err != nil { - return fmt.Errorf("failed to remount %s: %s", overlayDir, err) - } + return ApplyOverlay( + RootFs(bundlePath).Path(), + OverlaySet{WritableLoc: overlayDir}, + ) +} - err = prepareOverlay(bundlePath, overlayDir) - return err +// DeleteOverlay deletes an overlay previously created using a directory inside +// the OCI bundle. +func DeleteOverlay(bundlePath string) error { + overlayDir := filepath.Join(bundlePath, "overlay") + rootFsDir := RootFs(bundlePath).Path() + return unmountAndDeleteOverlay(rootFsDir, overlayDir) } -// CreateOverlay creates a writable overlay based on a tmpfs. -func CreateOverlayTmpfs(bundlePath string, sizeMiB int) error { +// CreateOverlay creates a writable overlay using tmpfs. +func CreateOverlayTmpfs(bundlePath string, sizeMiB int) (string, error) { var err error oldumask := syscall.Umask(0) defer syscall.Umask(oldumask) overlayDir := filepath.Join(bundlePath, "overlay") - if err = os.Mkdir(overlayDir, 0o700); err != nil { - return fmt.Errorf("failed to create %s: %s", overlayDir, err) + err = ensureOverlayDir(overlayDir, true, 0o700) + if err != nil { + return "", fmt.Errorf("failed to create %s: %s", overlayDir, err) } // delete overlay directory in case of error defer func() { @@ -70,7 +89,7 @@ func CreateOverlayTmpfs(bundlePath string, sizeMiB int) error { options := fmt.Sprintf("mode=1777,size=%dm", sizeMiB) err = syscall.Mount("tmpfs", overlayDir, "tmpfs", syscall.MS_NODEV, options) if err != nil { - return fmt.Errorf("failed to bind %s: %s", overlayDir, err) + return "", fmt.Errorf("failed to bind %s: %s", overlayDir, err) } // best effort to cleanup mount defer func() { @@ -79,41 +98,182 @@ func CreateOverlayTmpfs(bundlePath string, sizeMiB int) error { } }() - err = prepareOverlay(bundlePath, overlayDir) - return err + err = ApplyOverlay( + RootFs(bundlePath).Path(), + OverlaySet{WritableLoc: overlayDir}, + ) + if err != nil { + return "", err + } + + return overlayDir, nil +} + +// DeleteOverlayTmpfs deletes an overlay previously created using tmpfs. +func DeleteOverlayTmpfs(bundlePath, overlayDir string) error { + rootFsDir := RootFs(bundlePath).Path() + return unmountAndDeleteOverlay(rootFsDir, overlayDir) } -func prepareOverlay(bundlePath, overlayDir string) error { - upperDir := filepath.Join(overlayDir, "upper") - if err := os.Mkdir(upperDir, 0o755); err != nil { - return fmt.Errorf("failed to create %s: %s", upperDir, err) +// ApplyOverlay prepares and mounts the specified overlay +func ApplyOverlay(rootFsDir string, ovs OverlaySet) error { + // Prepare internal structure of writable overlay dir, if necessary + if len(ovs.WritableLoc) > 0 { + if err := ensureOverlayDir(ovs.WritableLoc, true, 0o755); err != nil { + return err + } + if err := prepareWritableOverlay(ovs.WritableLoc); err != nil { + return err + } } - workDir := filepath.Join(overlayDir, "work") - if err := os.Mkdir(workDir, 0o700); err != nil { - return fmt.Errorf("failed to create %s: %s", workDir, err) + + // Perform identity mounts for this OverlaySet + if err := performIdentityMounts(ovs); err != nil { + return err } - rootFsDir := RootFs(bundlePath).Path() - options := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", rootFsDir, upperDir, workDir) + // Perform actual overlay mount + return performOverlayMount(rootFsDir, overlayOptions(rootFsDir, ovs)) +} + +// UnmountOverlay umounts an overlay +func UnmountOverlay(rootFsDir string) error { + if err := syscall.Unmount(rootFsDir, syscall.MNT_DETACH); err != nil { + return fmt.Errorf("failed to unmount %s: %s", rootFsDir, err) + } + + return nil +} + +// prepareWritableOverlay ensures that the upper and work subdirs of a writable +// overlay dir exist, and if not, creates them. +func prepareWritableOverlay(dir string) error { + if err := ensureOverlayDir(upperSubdirOf(dir), true, 0o755); err != nil { + return fmt.Errorf("err encountered while preparing upper subdir of overlay dir %q: %w", upperSubdirOf(dir), err) + } + if err := ensureOverlayDir(workSubdirOf(dir), true, 0o700); err != nil { + return fmt.Errorf("err encountered while preparing work subdir of overlay dir %q: %w", workSubdirOf(dir), err) + } + + return nil +} + +// performIdentityMounts creates the writable OverlaySet directory if it does +// not exist, and performs a bind mount & remount of every OverlaySet dir onto +// itself. The pattern of bind mount followed by remount allows application of +// more restrictive mount flags than are in force on the underlying filesystem. +func performIdentityMounts(ovs OverlaySet) error { + var err error + + locsToBind := ovs.ReadonlyLocs + if len(ovs.WritableLoc) > 0 { + // Check if writable overlay dir already exists; if it doesn't, try to + // create it. + if err = ensureOverlayDir(ovs.WritableLoc, true, 0o755); err != nil { + return err + } + + locsToBind = append(locsToBind, ovs.WritableLoc) + } + + // Try to do initial bind-mounts + for _, d := range locsToBind { + if err = ensureOverlayDir(d, false, 0); err != nil { + return fmt.Errorf("error accessing directory %s: %s", d, err) + } + + if err = syscall.Mount(d, d, "", syscall.MS_BIND, ""); err != nil { + return fmt.Errorf("failed to bind %s: %s", d, err) + } + + // best effort to cleanup mount + defer func() { + if err != nil { + syscall.Unmount(d, syscall.MNT_DETACH) + } + }() + + // Try to perform remount + if err = syscall.Mount("", d, "", syscall.MS_REMOUNT|syscall.MS_BIND, ""); err != nil { + return fmt.Errorf("failed to remount %s: %s", d, err) + } + } + + return err +} + +// overlayOptions creates the options string to be used in an overlay mount +func overlayOptions(rootFsDir string, ovs OverlaySet) string { + // Create lowerdir argument of options string + lowerDirJoined := strings.Join(append(ovs.ReadonlyLocs, rootFsDir), ":") + + if len(ovs.WritableLoc) > 0 { + return fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerDirJoined, upperSubdirOf(ovs.WritableLoc), workSubdirOf(ovs.WritableLoc)) + } + + return fmt.Sprintf("lowerdir=%s", lowerDirJoined) +} + +// performOverlayMount mounts an overlay atop a given rootfs directory +func performOverlayMount(rootFsDir, options string) error { + // Try to perform actual mount if err := syscall.Mount("overlay", rootFsDir, "overlay", 0, options); err != nil { - return fmt.Errorf("failed to mount %s: %s", overlayDir, err) + return fmt.Errorf("failed to mount %s: %s", rootFsDir, err) + } + + return nil +} + +// ensureOverlayDir checks if a directory already exists; if it doesn't, and +// createIfMissing is true, it attempts to create it with the specified +// permissions. +func ensureOverlayDir(dir string, createIfMissing bool, createPerm os.FileMode) error { + if len(dir) == 0 { + return fmt.Errorf("internal error: ensureOverlayDir() called with empty dir name") + } + + _, err := os.Stat(dir) + if err == nil { + return nil + } + + if !os.IsNotExist(err) { + return err + } + + if !createIfMissing { + return fmt.Errorf("missing overlay dir %q", dir) + } + + // Create the requested dir + if err := os.Mkdir(dir, createPerm); err != nil { + return fmt.Errorf("failed to create %q: %s", dir, err) } + return nil } -// DeleteOverlay deletes overlay -func DeleteOverlay(bundlePath string) error { - overlayDir := filepath.Join(bundlePath, "overlay") - rootFsDir := RootFs(bundlePath).Path() +func upperSubdirOf(overlayDir string) string { + return filepath.Join(overlayDir, "upper") +} - if err := syscall.Unmount(rootFsDir, syscall.MNT_DETACH); err != nil { - return fmt.Errorf("failed to unmount %s: %s", rootFsDir, err) +func workSubdirOf(overlayDir string) string { + return filepath.Join(overlayDir, "work") +} + +// unmountAndDeleteOverlay unmounts and deletes a previously-created overlay. +func unmountAndDeleteOverlay(rootFsDir, overlayDir string) error { + if err := UnmountOverlay(rootFsDir); err != nil { + return err } + if err := syscall.Unmount(overlayDir, syscall.MNT_DETACH); err != nil { return fmt.Errorf("failed to unmount %s: %s", overlayDir, err) } + if err := os.RemoveAll(overlayDir); err != nil { return fmt.Errorf("failed to remove %s: %s", overlayDir, err) } + return nil }