From f193985656f18938753f5e4bf125197f7ef94445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jelmer=20Vernoo=C4=B3?= Date: Mon, 16 Jan 2023 02:26:07 +0000 Subject: [PATCH] Inline get_object_at. --- dulwich/contrib/swift.py | 12 ++-------- dulwich/pack.py | 47 ++++++++------------------------------ dulwich/tests/test_pack.py | 2 +- 3 files changed, 12 insertions(+), 49 deletions(-) diff --git a/dulwich/contrib/swift.py b/dulwich/contrib/swift.py index e2f9625a2..909a1418c 100644 --- a/dulwich/contrib/swift.py +++ b/dulwich/contrib/swift.py @@ -42,7 +42,6 @@ GreenThreadsMissingObjectFinder, ) -from dulwich.lru_cache import LRUSizeCache from dulwich.objects import ( Blob, Commit, @@ -66,7 +65,6 @@ write_pack_index_v2, load_pack_index_file, read_pack_header, - _compute_object_size, unpack_object, write_pack_object, ) @@ -585,20 +583,14 @@ def __init__(self, scon, filename): self.pack_length = int(headers["content-length"]) pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length) (version, self._num_objects) = read_pack_header(pack_reader.read) - self._offset_cache = LRUSizeCache( - 1024 * 1024 * self.scon.cache_length, - compute_size=_compute_object_size, - ) self.pack = None - def get_object_at(self, offset): - if offset in self._offset_cache: - return self._offset_cache[offset] + def get_unpacked_object_at(self, offset): assert offset >= self._header_size pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length) pack_reader.seek(offset) unpacked, _ = unpack_object(pack_reader.read) - return (unpacked.pack_type_num, unpacked._obj()) + return unpacked def get_stored_checksum(self): pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length) diff --git a/dulwich/pack.py b/dulwich/pack.py index dc4e176fc..0ceec8a59 100644 --- a/dulwich/pack.py +++ b/dulwich/pack.py @@ -83,9 +83,6 @@ ChecksumMismatch, ) from dulwich.file import GitFile -from dulwich.lru_cache import ( - LRUSizeCache, -) from dulwich.objects import ( ShaFile, ObjectID, @@ -894,14 +891,6 @@ def unpack_object( return unpacked, unused -def _compute_object_size(value): - """Compute the size of a unresolved object for use with LRUSizeCache.""" - (num, obj) = value - if num in DELTA_TYPES: - return chunks_length(obj[1]) - return chunks_length(obj) - - class PackStreamReader: """Class to read a pack stream. @@ -1169,9 +1158,6 @@ def __init__(self, filename, file=None, size=None): else: self._file = file (version, self._num_objects) = read_pack_header(self._file.read) - self._offset_cache = LRUSizeCache( - 1024 * 1024 * 20, compute_size=_compute_object_size - ) @property def filename(self): @@ -1331,20 +1317,6 @@ def get_unpacked_object_at(self, offset: int, *, include_comp: bool = False) -> unpacked.offset = offset return unpacked - def get_object_at(self, offset: int) -> Tuple[int, OldUnpackedObject]: - """Given an offset in to the packfile return the object that is there. - - Using the associated index the location of an object can be looked up, - and then the packfile can be asked directly for that object using this - function. - """ - try: - return self._offset_cache[offset] - except KeyError: - pass - unpacked = self.get_unpacked_object_at(offset, include_comp=False) - return (unpacked.pack_type_num, unpacked._obj()) - T = TypeVar('T') @@ -2391,7 +2363,9 @@ def __contains__(self, sha1: bytes) -> bool: def get_raw(self, sha1: bytes) -> Tuple[int, bytes]: offset = self.index.object_offset(sha1) - obj_type, obj = self.data.get_object_at(offset) + unpacked = self.data.get_unpacked_object_at(offset, include_comp=False) + obj = unpacked._obj() + obj_type = unpacked.pack_type_num type_num, chunks = self.resolve_object(offset, obj_type, obj) return type_num, b"".join(chunks) @@ -2475,7 +2449,9 @@ def get_ref(self, sha: bytes) -> Tuple[Optional[int], int, OldUnpackedObject]: except KeyError: offset = None if offset: - type, obj = self.data.get_object_at(offset) + unpacked = self.data.get_unpacked_object_at(offset, include_comp=False) + type = unpacked.pack_type_num + obj = unpacked._obj() elif self.resolve_ext_ref: type, obj = self.resolve_ext_ref(sha) else: @@ -2501,7 +2477,9 @@ def resolve_object(self, offset: int, type: int, obj, get_ref=None) -> Tuple[int (delta_offset, delta) = base_obj # TODO: clean up asserts and replace with nicer error messages base_offset = base_offset - delta_offset - base_type, base_obj = self.data.get_object_at(base_offset) + unpacked = self.data.get_unpacked_object_at(base_offset, include_comp=False) + base_type = unpacked.pack_type_num + base_obj = unpacked._obj() assert isinstance(base_type, int) elif base_type == REF_DELTA: (basename, delta) = base_obj @@ -2515,13 +2493,6 @@ def resolve_object(self, offset: int, type: int, obj, get_ref=None) -> Tuple[int chunks = base_obj for prev_offset, delta_type, delta in reversed(delta_stack): chunks = apply_delta(chunks, delta) - # TODO(dborowitz): This can result in poor performance if - # large base objects are separated from deltas in the pack. - # We should reorganize so that we apply deltas to all - # objects in a chain one after the other to optimize cache - # performance. - if prev_offset is not None: - self.data._offset_cache[prev_offset] = base_type, chunks return base_type, chunks def entries(self, progress: Optional[ProgressFn] = None) -> Iterator[PackIndexEntry]: diff --git a/dulwich/tests/test_pack.py b/dulwich/tests/test_pack.py index be3579e94..d873e4b9a 100644 --- a/dulwich/tests/test_pack.py +++ b/dulwich/tests/test_pack.py @@ -410,7 +410,7 @@ def test_pack_tuples(self): self.assertEqual(expected, set(list(tuples))) self.assertEqual(3, len(tuples)) - def test_get_object_at(self): + def test_get_object(self): """Tests random access for non-delta objects""" with self.get_pack(pack1_sha) as p: obj = p[a_sha]