Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inline get_object_at. #1122

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions dulwich/contrib/swift.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
GreenThreadsMissingObjectFinder,
)

from dulwich.lru_cache import LRUSizeCache
from dulwich.objects import (
Blob,
Commit,
Expand All @@ -66,7 +65,6 @@
write_pack_index_v2,
load_pack_index_file,
read_pack_header,
_compute_object_size,
unpack_object,
write_pack_object,
)
Expand Down Expand Up @@ -585,20 +583,14 @@ def __init__(self, scon, filename):
self.pack_length = int(headers["content-length"])
pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length)
(version, self._num_objects) = read_pack_header(pack_reader.read)
self._offset_cache = LRUSizeCache(
1024 * 1024 * self.scon.cache_length,
compute_size=_compute_object_size,
)
self.pack = None

def get_object_at(self, offset):
if offset in self._offset_cache:
return self._offset_cache[offset]
def get_unpacked_object_at(self, offset):
assert offset >= self._header_size
pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length)
pack_reader.seek(offset)
unpacked, _ = unpack_object(pack_reader.read)
return (unpacked.pack_type_num, unpacked._obj())
return unpacked

def get_stored_checksum(self):
pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length)
Expand Down
47 changes: 9 additions & 38 deletions dulwich/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@
ChecksumMismatch,
)
from dulwich.file import GitFile
from dulwich.lru_cache import (
LRUSizeCache,
)
from dulwich.objects import (
ShaFile,
ObjectID,
Expand Down Expand Up @@ -894,14 +891,6 @@ def unpack_object(
return unpacked, unused


def _compute_object_size(value):
"""Compute the size of a unresolved object for use with LRUSizeCache."""
(num, obj) = value
if num in DELTA_TYPES:
return chunks_length(obj[1])
return chunks_length(obj)


class PackStreamReader:
"""Class to read a pack stream.

Expand Down Expand Up @@ -1169,9 +1158,6 @@ def __init__(self, filename, file=None, size=None):
else:
self._file = file
(version, self._num_objects) = read_pack_header(self._file.read)
self._offset_cache = LRUSizeCache(
1024 * 1024 * 20, compute_size=_compute_object_size
)

@property
def filename(self):
Expand Down Expand Up @@ -1331,20 +1317,6 @@ def get_unpacked_object_at(self, offset: int, *, include_comp: bool = False) ->
unpacked.offset = offset
return unpacked

def get_object_at(self, offset: int) -> Tuple[int, OldUnpackedObject]:
"""Given an offset in to the packfile return the object that is there.

Using the associated index the location of an object can be looked up,
and then the packfile can be asked directly for that object using this
function.
"""
try:
return self._offset_cache[offset]
except KeyError:
pass
unpacked = self.get_unpacked_object_at(offset, include_comp=False)
return (unpacked.pack_type_num, unpacked._obj())


T = TypeVar('T')

Expand Down Expand Up @@ -2391,7 +2363,9 @@ def __contains__(self, sha1: bytes) -> bool:

def get_raw(self, sha1: bytes) -> Tuple[int, bytes]:
offset = self.index.object_offset(sha1)
obj_type, obj = self.data.get_object_at(offset)
unpacked = self.data.get_unpacked_object_at(offset, include_comp=False)
obj = unpacked._obj()
obj_type = unpacked.pack_type_num
type_num, chunks = self.resolve_object(offset, obj_type, obj)
return type_num, b"".join(chunks)

Expand Down Expand Up @@ -2475,7 +2449,9 @@ def get_ref(self, sha: bytes) -> Tuple[Optional[int], int, OldUnpackedObject]:
except KeyError:
offset = None
if offset:
type, obj = self.data.get_object_at(offset)
unpacked = self.data.get_unpacked_object_at(offset, include_comp=False)
type = unpacked.pack_type_num
obj = unpacked._obj()
elif self.resolve_ext_ref:
type, obj = self.resolve_ext_ref(sha)
else:
Expand All @@ -2501,7 +2477,9 @@ def resolve_object(self, offset: int, type: int, obj, get_ref=None) -> Tuple[int
(delta_offset, delta) = base_obj
# TODO: clean up asserts and replace with nicer error messages
base_offset = base_offset - delta_offset
base_type, base_obj = self.data.get_object_at(base_offset)
unpacked = self.data.get_unpacked_object_at(base_offset, include_comp=False)
base_type = unpacked.pack_type_num
base_obj = unpacked._obj()
assert isinstance(base_type, int)
elif base_type == REF_DELTA:
(basename, delta) = base_obj
Expand All @@ -2515,13 +2493,6 @@ def resolve_object(self, offset: int, type: int, obj, get_ref=None) -> Tuple[int
chunks = base_obj
for prev_offset, delta_type, delta in reversed(delta_stack):
chunks = apply_delta(chunks, delta)
# TODO(dborowitz): This can result in poor performance if
# large base objects are separated from deltas in the pack.
# We should reorganize so that we apply deltas to all
# objects in a chain one after the other to optimize cache
# performance.
if prev_offset is not None:
self.data._offset_cache[prev_offset] = base_type, chunks
return base_type, chunks

def entries(self, progress: Optional[ProgressFn] = None) -> Iterator[PackIndexEntry]:
Expand Down
2 changes: 1 addition & 1 deletion dulwich/tests/test_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def test_pack_tuples(self):
self.assertEqual(expected, set(list(tuples)))
self.assertEqual(3, len(tuples))

def test_get_object_at(self):
def test_get_object(self):
"""Tests random access for non-delta objects"""
with self.get_pack(pack1_sha) as p:
obj = p[a_sha]
Expand Down