From 186e0057f2a90010470b7ad24c02902528bd2222 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Sun, 13 Mar 2022 04:32:16 +0200 Subject: [PATCH] qemu_nbd: Select cache and aio automatically Previously we use --cache=writeback and --aio=threads so we can access images on file system that does not support direct I/O. This is sometimes faster than --cache=none and --aio=native, but typically give less consistent results, and pollute the page cache with image data, which is mostly likely will never be used. When uploading and downloading images on a hypervisor, using the page cache can be harmful and cause sanlock timeouts when the kernel try to flush huge images to the underlying storage. Future qemu-img and qemu-nbd are expected to implement "auto" cache and aio modes, selecting --cache=none and --aio=native if possible. This change implements this in our qemu_nbd wrapper so we can use this now with current qemu version. When starting qemu_nbd.Server(), if cache was not specified, we select cache="none" if the image can be opened with direct I/O. If aio was not specified, we select it based on the cache value. Signed-off-by: Nir Soffer --- ovirt_imageio/_internal/qemu_nbd.py | 31 ++++++++-- ovirt_imageio/client/_api.py | 9 --- test/qemu_nbd_test.py | 94 +++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 13 deletions(-) diff --git a/ovirt_imageio/_internal/qemu_nbd.py b/ovirt_imageio/_internal/qemu_nbd.py index 513318ca..286b1830 100644 --- a/ovirt_imageio/_internal/qemu_nbd.py +++ b/ovirt_imageio/_internal/qemu_nbd.py @@ -28,7 +28,7 @@ class Server: def __init__( self, image, fmt, sock, export_name="", read_only=False, shared=8, - cache="none", aio="native", discard="unmap", detect_zeroes="unmap", + cache=None, aio=None, discard="unmap", detect_zeroes="unmap", bitmap=None, backing_chain=True, offset=None, size=None, timeout=10.0): """ @@ -41,8 +41,11 @@ def __init__( export_name (str): expose export by name read_only (bool): export is read-only shared (int): export can be shared by specified number of clients - cache (str): cache mode (none, writeback, ...) - aio (str): AIO mode (native or threads) + cache (str): cache mode (none, writeback, ...). If not specified, + probe image and use "none" if direct I/O is supported, and + "writeback" if not. + aio (str): AIO mode (native or threads). If not specified, use + "native" if cache is "none" and "threads" otherwise. discard (str): discard mode (ignore, unmap) detect_zeroes (str): Control the automatic conversion of plain zero writes by the OS to driver-specific optimized zero write @@ -86,6 +89,15 @@ def url(self): return urllib.parse.urlparse(url) def start(self): + # Implement "auto" cache mode, planned for qemu 7. + if self.cache is None: + self.cache = "none" if self._can_use_direct_io() else "writeback" + log.debug("Using cache=%r", self.cache) + + if self.aio is None: + self.aio = "native" if self.cache == "none" else "threads" + log.debug("Using aio=%r", self.aio) + cmd = [ QEMU_NBD, "--export-name={}".format(self.export_name), @@ -189,10 +201,21 @@ def stop(self): self.proc = None + def _can_use_direct_io(self): + flags = os.O_RDONLY if self.read_only else os.O_RDWR + flags |= os.O_DIRECT + try: + fd = os.open(self.image, flags) + except OSError: + return False + else: + os.close(fd) + return True + @contextmanager def run(image, fmt, sock, export_name="", read_only=False, shared=8, - cache="none", aio="native", discard="unmap", detect_zeroes="unmap", + cache=None, aio=None, discard="unmap", detect_zeroes="unmap", bitmap=None, backing_chain=True, offset=None, size=None, timeout=10.0): server = Server( image, fmt, sock, diff --git a/ovirt_imageio/client/_api.py b/ovirt_imageio/client/_api.py index 7924ba88..74b450aa 100644 --- a/ovirt_imageio/client/_api.py +++ b/ovirt_imageio/client/_api.py @@ -480,13 +480,6 @@ def _open_nbd(filename, fmt, read_only=False, shared=1, bitmap=None, offset=None, size=None, backing_chain=True): """ Open nbd backend. - - Caching modes - ------------- - qemu-nbd uses "writethrough" cache mode by default. This is - painfully slow and pointless, waiting until every write it flushed - to underlying storage. We use "writeback", waiting until data is - copied to the host page cache. """ with _tmp_dir("imageio-") as base: sock = UnixAddress(os.path.join(base, "sock")) @@ -495,8 +488,6 @@ def _open_nbd(filename, fmt, read_only=False, shared=1, bitmap=None, fmt, sock, read_only=read_only, - cache="writeback", - aio=None, shared=shared, bitmap=bitmap, offset=offset, diff --git a/test/qemu_nbd_test.py b/test/qemu_nbd_test.py index a190508b..a417d3b4 100644 --- a/test/qemu_nbd_test.py +++ b/test/qemu_nbd_test.py @@ -8,8 +8,10 @@ import io import os +import shutil import struct import tarfile +import tempfile import urllib.parse import pytest @@ -24,6 +26,13 @@ from . marks import flaky_in_ovirt_ci +@pytest.fixture +def tmpfs_dir(): + path = tempfile.mkdtemp(dir="/dev/shm") + yield path + shutil.rmtree(path) + + @pytest.mark.parametrize("addr,export,url", [ (nbd.UnixAddress("/sock"), None, "nbd:unix:/sock"), (nbd.UnixAddress("/sock"), "", "nbd:unix:/sock"), @@ -53,6 +62,91 @@ def test_open(tmpdir, fmt): assert d.read(offset, len(data)) == data +@pytest.mark.parametrize("fmt", ["raw", "qcow2"]) +def test_server_can_use_direct_io(tmpdir, nbd_server, fmt): + disk = str(tmpdir.join("disk." + fmt)) + qemu_img.create(disk, fmt, size=1024**2) + + nbd_server.image = disk + nbd_server.fmt = fmt + nbd_server.cache = None + nbd_server.aio = None + + # Starting probes the image and select cache="none" and aio="native" + # if direct I/O can be used. + nbd_server.start() + assert nbd_server.cache == "none" + assert nbd_server.aio == "native" + + +@pytest.mark.parametrize("fmt", ["raw", "qcow2"]) +def test_server_cannot_use_direct_io(tmpfs_dir, nbd_server, fmt): + disk = os.path.join(tmpfs_dir, "disk." + fmt) + qemu_img.create(disk, fmt, size=1024**2) + + nbd_server.image = disk + nbd_server.fmt = fmt + nbd_server.cache = None + nbd_server.aio = None + + # Starting probes the image and selects cache="writeback" and + # aio="threads" since direct I/O cannot be used with tmpfs file + # system. + nbd_server.start() + assert nbd_server.cache == "writeback" + assert nbd_server.aio == "threads" + + +@pytest.mark.parametrize("fmt", ["raw", "qcow2"]) +def test_server_keep_cache(tmpdir, nbd_server, fmt): + disk = str(tmpdir.join("disk." + fmt)) + qemu_img.create(disk, fmt, size=1024**2) + + nbd_server.image = disk + nbd_server.fmt = fmt + nbd_server.cache = "writeback" + nbd_server.aio = None + + # Staring the server selects aio="threads" since cache is not + # "none". + nbd_server.start() + assert nbd_server.cache == "writeback" + assert nbd_server.aio == "threads" + + +@pytest.mark.parametrize("fmt", ["raw", "qcow2"]) +def test_server_keep_aio(tmpdir, nbd_server, fmt): + disk = str(tmpdir.join("disk." + fmt)) + qemu_img.create(disk, fmt, size=1024**2) + + nbd_server.image = disk + nbd_server.fmt = fmt + nbd_server.cache = None + nbd_server.aio = "threads" + + # Staring the server probes the image and selects cache="none", but + # keeps the requested aio option. + nbd_server.start() + assert nbd_server.cache == "none" + assert nbd_server.aio == "threads" + + +@pytest.mark.parametrize("fmt", ["raw", "qcow2"]) +def test_server_keep_cache_and_aio(tmpdir, nbd_server, fmt): + disk = str(tmpdir.join("disk." + fmt)) + qemu_img.create(disk, fmt, size=1024**2) + + nbd_server.image = disk + nbd_server.fmt = fmt + nbd_server.cache = "writeback" + nbd_server.aio = "threads" + + # Staring the server keeps the requested options. + nbd_server.start() + assert nbd_server.cache == "writeback" + assert nbd_server.aio == "threads" + + @pytest.mark.parametrize("fmt", ["raw", "qcow2"]) def test_ova(tmpdir, fmt): size = 1024**2