-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Draft] [Storage] Decompression #39740
base: main
Are you sure you want to change the base?
Changes from 4 commits
0bb3eaa
1d242f0
37efb36
f57f65c
3bcd401
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,7 +64,7 @@ | |
class TestStorageCommonBlobAsync(AsyncStorageRecordedTestCase): | ||
# --Helpers----------------------------------------------------------------- | ||
async def _setup(self, storage_account_name, key): | ||
self.bsc = BlobServiceClient(self.account_url(storage_account_name, "blob"), credential=key) | ||
self.bsc = BlobServiceClient(self.account_url(storage_account_name, "blob"), credential=key, retry_total=0) | ||
self.container_name = self.get_resource_name('utcontainer') | ||
self.source_container_name = self.get_resource_name('utcontainersource') | ||
self.byte_data = self.get_random_bytes(1024) | ||
|
@@ -3510,4 +3510,49 @@ async def test_mock_transport_with_content_validation(self, **kwargs): | |
|
||
blob_data = await (await blob_client.download_blob(validate_content=True)).read() | ||
assert blob_data == b"Hello Async World!" # data is fixed by mock transport | ||
# ------------------------------------------------------------------------------ | ||
|
||
@pytest.mark.live_test_only | ||
@BlobPreparer() | ||
async def test_download_blob_decompress(self, **kwargs): | ||
storage_account_name = kwargs.pop("storage_account_name") | ||
storage_account_key = kwargs.pop("storage_account_key") | ||
|
||
# Arrange | ||
await self._setup(storage_account_name, storage_account_key) | ||
blob_name = self._get_blob_reference() | ||
blob = self.bsc.get_blob_client(self.container_name, blob_name) | ||
compressed_data = b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xcaH\xcd\xc9\xc9WH+\xca\xcfUH\xaf\xca,\x00\x00\x00\x00\xff\xff\x03\x00d\xaa\x8e\xb5\x0f\x00\x00\x00' | ||
decompressed_data = b"hello from gzip" | ||
content_settings = ContentSettings(content_encoding='gzip') | ||
|
||
# Act / Assert | ||
await blob.upload_blob(data=compressed_data, content_settings=content_settings, overwrite=True) | ||
|
||
downloaded = await blob.download_blob(decompress=True) | ||
result = await downloaded.readall() | ||
assert result == decompressed_data | ||
|
||
downloaded = await blob.download_blob(decompress=False) | ||
result = await downloaded.readall() | ||
assert result == compressed_data | ||
|
||
@pytest.mark.live_test_only | ||
@BlobPreparer() | ||
async def test_download_blob_decompress_md5(self, **kwargs): | ||
vincenttran-msft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
storage_account_name = kwargs.pop("storage_account_name") | ||
storage_account_key = kwargs.pop("storage_account_key") | ||
|
||
# Arrange | ||
await self._setup(storage_account_name, storage_account_key) | ||
blob_name = self._get_blob_reference() | ||
blob = self.bsc.get_blob_client(self.container_name, blob_name) | ||
compressed_data = b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xcaH\xcd\xc9\xc9WH+\xca\xcfUH\xaf\xca,\x00\x00\x00\x00\xff\xff\x03\x00d\xaa\x8e\xb5\x0f\x00\x00\x00' | ||
decompressed_data = b"hello from gzip" | ||
content_settings = ContentSettings(content_encoding='gzip') | ||
|
||
# Act / Assert | ||
await blob.upload_blob(data=compressed_data, content_settings=content_settings, overwrite=True) | ||
downloaded = await blob.download_blob(validate_content=True, decompress=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This scenario ( # response.http_response type is RestAioHttpTransportResponse
await response.http_response.load_body() # Load the body in memory and close the socket
except (StreamClosedError, StreamConsumedError):
pass
computed_md5 = response.http_request.headers.get('content-md5', None) or \
encode_base64(StorageContentValidation.get_content_md5(response.http_response.body())) This is because where Therefore, we need to be able to request the data back from Storage without auto-decompression, so that we can properly perform the MD5 check. |
||
result = await downloaded.readall() | ||
assert result == decompressed_data | ||
# ------------------------------------------------------------------------------ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This scenario (no content validation, upload compressed data, but download specifying
decompress=False
) will fail here:This is the download code for non-content validation path:
The issue is that
RestAioHttpTransportResponse
did not respectdecompress=False
, so it decompressed the compressed data.