Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test_mcg_namespace_mpu_crd[RGW-OC-Single] - consistent failures of the test case, errors of 2 types, low pass rate. #11412

Open
ypersky1980 opened this issue Feb 17, 2025 · 0 comments

Comments

@ypersky1980
Copy link
Contributor

test_mcg_namespace_mpu_crd[RGW-OC-Single] - consistent failures of the test case, errors of 2 types, low pass rate.

The test almost always fails.

The failures can be devided into 2 categories:

  1. botocore.exceptions.ClientError: An error occurred (InternalError) when calling the PutObject operation (reached max retries: 4): We encountered an internal error. Please try again.

Relevant links:
https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/795/27570/1359553/1359660/log
https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/795/28819/1413877/1413987/log

  1. AssertionError: rgw-ns-store-b400d689414147a9bc8d4f78bc7 did not reach a healthy state within 180 seconds.

Relevant links:
https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/795/27647/1363270/1363352/log
https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/795/28022/1376497/1376579/log
https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/795/28535/1401296/1401378/log

Backtraces:

  1. self = <test_mcg_namespace_s3_ops_crd.TestMcgNamespaceS3OperationsCrd object at 0x7efed77b47f0>
    mcg_obj = <ocs_ci.ocs.resources.mcg.MCG object at 0x7efed38561c0>
    awscli_pod = <ocs_ci.ocs.resources.pod.Pod object at 0x7efeface1460>
    bucket_factory = <function bucket_factory_fixture.._create_buckets at 0x7efef47be280>
    bucketclass_dict = {'interface': 'OC', 'namespace_policy_dict': {'namespacestore_dict': {'rgw': [(1, None)]}, 'type': 'Single'}}
    test_directory_setup = SetupDirs(origin_dir='test_mcg_namespace_mpu_crd[RGW-OC-Single]/origin', result_dir='test_mcg_namespace_mpu_crd[RGW-OC-Single]/result')

@pytest.mark.parametrize(
argnames=["bucketclass_dict"],
argvalues=[
pytest.param(
{
"interface": "OC",
"namespace_policy_dict": {
"type": "Single",
"namespacestore_dict": {"aws": [(1, None)]},
},
},
),
pytest.param(
{
"interface": "OC",
"namespace_policy_dict": {
"type": "Single",
"namespacestore_dict": {"azure": [(1, None)]},
},
},
),
pytest.param(
{
"interface": "OC",
"namespace_policy_dict": {
"type": "Single",
"namespacestore_dict": {"rgw": [(1, None)]},
},
},
marks=on_prem_platform_required,
),
pytest.param(
{
"interface": "OC",
"namespace_policy_dict": {
"type": "Cache",
"ttl": 600000,
"namespacestore_dict": {
"aws": [(1, "eu-central-1")],
},
},
"placement_policy": {
"tiers": [
{"backingStores": [constants.DEFAULT_NOOBAA_BACKINGSTORE]}
]
},
}
),
],
ids=["AWS-OC-Single", "Azure-OC-Single", "RGW-OC-Single", "AWS-OC-Cache"],
)
def test_mcg_namespace_mpu_crd(
self,
mcg_obj,
awscli_pod,
bucket_factory,
bucketclass_dict,
test_directory_setup,
):
"""
Test multipart upload S3 operations on namespace buckets(created by CRDs)
Validates create, upload, upload copy and list parts operations

"""
ns_buc = bucket_factory(
    amount=1,
    interface=bucketclass_dict["interface"],
    bucketclass=bucketclass_dict,
)[0]

ns_bucket = ns_buc.name

object_path = f"s3://{ns_bucket}"

logger.info(
    f"Setting up test files for mpu and aborting any mpu on bucket: {ns_bucket}"
)
mpu_key, origin_dir, res_dir, parts = multipart_setup(
    awscli_pod, test_directory_setup.origin_dir, test_directory_setup.result_dir
)
bucket_utils.abort_all_multipart_upload(mcg_obj, ns_bucket, COPY_OBJ)

# Initiate mpu, Upload part copy, List and Abort operations
logger.info(f"Put object on bucket: {ns_bucket} to create a copy source")

assert bucket_utils.s3_put_object(
s3_obj=mcg_obj, bucketname=ns_bucket, object_key=ROOT_OBJ, data=OBJ_DATA
), "Failed: PutObject"

/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/tests/functional/object/mcg/lifecycle/test_mcg_namespace_s3_ops_crd.py:623:

/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/ocs_ci/ocs/bucket_utils.py:1316: in s3_put_object
return s3_obj.s3_client.put_object(
/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/venv/lib64/python3.9/site-packages/botocore/client.py:514: in _api_call
return self._make_api_call(operation_name, kwargs)

self = <botocore.client.S3 object at 0x7efefa7a0cd0>
operation_name = 'PutObject'
api_params = {'Body': <_io.BytesIO object at 0x7efed3747270>, 'Bucket': 'oc-bucket-770d0cffa399429da0bf56f69f214b', 'ContentEncoding': '', 'ContentType': '', ...}

def _make_api_call(self, operation_name, api_params):
operation_model = self._service_model.operation_model(operation_name)
service_name = self._service_model.service_name
history_recorder.record(
'API_CALL',
{
'service': service_name,
'operation': operation_name,
'params': api_params,
},
)
if operation_model.deprecated:
logger.debug(
'Warning: %s.%s() is deprecated', service_name, operation_name
)
request_context = {
'client_region': self.meta.region_name,
'client_config': self.meta.config,
'has_streaming_input': operation_model.has_streaming_input,
'auth_type': operation_model.auth_type,
}
request_dict = self._convert_to_request_dict(
api_params, operation_model, context=request_context
)
resolve_checksum_context(request_dict, operation_model, api_params)

service_id = self._service_model.service_id.hyphenize()
handler, event_response = self.meta.events.emit_until_response(
    'before-call.{service_id}.{operation_name}'.format(
        service_id=service_id, operation_name=operation_name
    ),
    model=operation_model,
    params=request_dict,
    request_signer=self._request_signer,
    context=request_context,
)

if event_response is not None:
    http, parsed_response = event_response
else:
    apply_request_checksum(request_dict)
    http, parsed_response = self._make_request(
        operation_model, request_dict, request_context
    )

self.meta.events.emit(
    'after-call.{service_id}.{operation_name}'.format(
        service_id=service_id, operation_name=operation_name
    ),
    http_response=http,
    parsed=parsed_response,
    model=operation_model,
    context=request_context,
)

if http.status_code >= 300:
    error_code = parsed_response.get("Error", {}).get("Code")
    error_class = self.exceptions.from_code(error_code)

  raise error_class(parsed_response, operation_name)

E botocore.exceptions.ClientError: An error occurred (InternalError) when calling the PutObject operation (reached max retries: 4): We encountered an internal error. Please try again.

/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/venv/lib64/python3.9/site-packages/botocore/client.py:938: ClientError

  1. self = <ocs_ci.ocs.resources.namespacestore.NamespaceStore object at 0x7f08a3cfb700>
    timeout = 180, interval = 5

def verify_health(self, timeout=180, interval=5):
"""
Health verification function that tries to verify
a namespacestores's health until a given time limit is reached

Args:
    timeout (int): Timeout for the check, in seconds
    interval (int): Interval to wait between checks, in seconds

Returns:
    (bool): True if the bucket is healthy, False otherwise

"""
log.info(f"Waiting for {self.name} to be healthy")
try:

  for health_check in TimeoutSampler(
        timeout, interval, getattr(self, f"{self.method}_verify_health")
    ):

/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/ocs_ci/ocs/resources/namespacestore.py:174:

self = <ocs_ci.utility.utils.TimeoutSampler object at 0x7f08cacf6400>

def iter(self):
if self.start_time is None:
self.start_time = time.time()
while True:
self.last_sample_time = time.time()
if self.timeout <= (self.last_sample_time - self.start_time):

      raise self.timeout_exc_cls(*self.timeout_exc_args)

E ocs_ci.ocs.exceptions.TimeoutExpiredError: Timed out after 180s running oc_verify_health()

/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/ocs_ci/utility/utils.py:1481: TimeoutExpiredError

During handling of the above exception, another exception occurred:

self = <test_mcg_namespace_s3_ops_crd.TestMcgNamespaceS3OperationsCrd object at 0x7f08a3fce160>
mcg_obj = <ocs_ci.ocs.resources.mcg.MCG object at 0x7f08a3353100>
awscli_pod = <ocs_ci.ocs.resources.pod.Pod object at 0x7f08caab48b0>
bucket_factory = <function bucket_factory_fixture.._create_buckets at 0x7f08acfe3310>
bucketclass_dict = {'interface': 'OC', 'namespace_policy_dict': {'namespacestore_dict': {'rgw': [(1, None)]}, 'type': 'Single'}}
test_directory_setup = SetupDirs(origin_dir='test_mcg_namespace_mpu_crd[RGW-OC-Single]/origin', result_dir='test_mcg_namespace_mpu_crd[RGW-OC-Single]/result')

@pytest.mark.parametrize(
argnames=["bucketclass_dict"],
argvalues=[
pytest.param(
{
"interface": "OC",
"namespace_policy_dict": {
"type": "Single",
"namespacestore_dict": {"aws": [(1, None)]},
},
},
),
pytest.param(
{
"interface": "OC",
"namespace_policy_dict": {
"type": "Single",
"namespacestore_dict": {"azure": [(1, None)]},
},
},
),
pytest.param(
{
"interface": "OC",
"namespace_policy_dict": {
"type": "Single",
"namespacestore_dict": {"rgw": [(1, None)]},
},
},
marks=on_prem_platform_required,
),
pytest.param(
{
"interface": "OC",
"namespace_policy_dict": {
"type": "Cache",
"ttl": 600000,
"namespacestore_dict": {
"aws": [(1, "eu-central-1")],
},
},
"placement_policy": {
"tiers": [
{"backingStores": [constants.DEFAULT_NOOBAA_BACKINGSTORE]}
]
},
}
),
],
ids=["AWS-OC-Single", "Azure-OC-Single", "RGW-OC-Single", "AWS-OC-Cache"],
)
def test_mcg_namespace_mpu_crd(
self,
mcg_obj,
awscli_pod,
bucket_factory,
bucketclass_dict,
test_directory_setup,
):
"""
Test multipart upload S3 operations on namespace buckets(created by CRDs)
Validates create, upload, upload copy and list parts operations

"""

ns_buc = bucket_factory(
amount=1,
interface=bucketclass_dict["interface"],
bucketclass=bucketclass_dict,
)[0]

/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/tests/functional/object/mcg/lifecycle/test_mcg_namespace_s3_ops_crd.py:603:

/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/tests/conftest.py:3107: in _create_buckets
bucketclass if bucketclass is None else bucket_class_factory(bucketclass)
/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/ocs_ci/ocs/resources/bucketclass.py:134: in _create_bucket_class
namespacestores = namespace_store_factory(interface, nss_dict)
/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/ocs_ci/ocs/resources/namespacestore.py:466: in _create_nss
nss_obj.verify_health()

self = <ocs_ci.ocs.resources.namespacestore.NamespaceStore object at 0x7f08a3cfb700>
timeout = 180, interval = 5

def verify_health(self, timeout=180, interval=5):
"""
Health verification function that tries to verify
a namespacestores's health until a given time limit is reached

Args:
    timeout (int): Timeout for the check, in seconds
    interval (int): Interval to wait between checks, in seconds

Returns:
    (bool): True if the bucket is healthy, False otherwise

"""
log.info(f"Waiting for {self.name} to be healthy")
try:
    for health_check in TimeoutSampler(
        timeout, interval, getattr(self, f"{self.method}_verify_health")
    ):
        if health_check:
            log.info(f"{self.name} is healthy")
            return True
        else:
            log.info(f"{self.name} is unhealthy. Rechecking.")
except TimeoutExpiredError:
    log.error(
        f"{self.name} did not reach a healthy state within {timeout} seconds."
    )

  assert (
        False
    ), f"{self.name} did not reach a healthy state within {timeout} seconds."

E AssertionError: rgw-ns-store-022544566ecb4cb4a1cf342e9b0 did not reach a healthy state within 180 seconds.

/home/jenkins/workspace/qe-deploy-ocs-cluster-prod/ocs-ci/ocs_ci/ocs/resources/namespacestore.py:186: AssertionError

2025-01-16 11:00:25

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant