From 873ad863490276ac3a06d5700a080e3f952ff4b7 Mon Sep 17 00:00:00 2001 From: Matthew Zeiler Date: Tue, 11 Feb 2025 13:25:41 -0500 Subject: [PATCH] [EAGLE-5467,EAGLE-5502]: use new sha image and fixed clarifai versions (#513) * use new torch runtime image * less logs by default * clarifai from requirements file * fixed logs and tested new dockerifle * update hash * bump version * some helpers to add latest clarifai version --- CHANGELOG.md | 6 + clarifai/__init__.py | 2 +- .../dockerfile_template/Dockerfile.template | 16 +-- clarifai/runners/models/model_builder.py | 129 +++++++++++------- clarifai/runners/utils/const.py | 10 +- clarifai/utils/logging.py | 6 +- requirements.txt | 6 +- .../dummy_runner_models/requirements.txt | 1 + tests/runners/hf_mbart_model/requirements.txt | 1 + 9 files changed, 106 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 267c30e7..0739f842 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [[11.1.2]](https://github.com/Clarifai/clarifai-python/releases/tag/11.1.2) - [PyPI](https://pypi.org/project/clarifai/11.1.2/) - 2025-02-10 + +### Changed + + - User new base images and fix clarifai version [(#513)] (https://github.com/Clarifai/clarifai-python/pull/513) + ## [[11.1.1]](https://github.com/Clarifai/clarifai-python/releases/tag/11.1.1) - [PyPI](https://pypi.org/project/clarifai/11.1.1/) - 2025-02-06 ### Changed diff --git a/clarifai/__init__.py b/clarifai/__init__.py index 33bc8e9e..07e0cf9d 100644 --- a/clarifai/__init__.py +++ b/clarifai/__init__.py @@ -1 +1 @@ -__version__ = "11.1.1" +__version__ = "11.1.2" diff --git a/clarifai/runners/dockerfile_template/Dockerfile.template b/clarifai/runners/dockerfile_template/Dockerfile.template index 535d3486..f819d45d 100644 --- a/clarifai/runners/dockerfile_template/Dockerfile.template +++ b/clarifai/runners/dockerfile_template/Dockerfile.template @@ -1,14 +1,14 @@ -# syntax=docker/dockerfile:1 +# syntax=docker/dockerfile:1.13-labs ############################# # User specific requirements installed in the pip_packages ############################# -FROM --platform=$TARGETPLATFORM ${BUILDER_IMAGE} as pip_packages +FROM --platform=$TARGETPLATFORM ${FINAL_IMAGE} as pip_packages COPY --link requirements.txt /home/nonroot/requirements.txt # Update clarifai package so we always have latest protocol to the API. Everything should land in /venv -RUN pip install --no-cache-dir -r /home/nonroot/requirements.txt && \ - (pip install --upgrade --upgrade-strategy only-if-needed --no-deps --no-cache-dir clarifai clarifai-grpc clarifai-protocol || true) +RUN ["pip", "install", "--no-cache-dir", "-r", "/home/nonroot/requirements.txt"] +RUN ["pip", "show", "clarifai"] ############################# ############################# @@ -16,15 +16,15 @@ RUN pip install --no-cache-dir -r /home/nonroot/requirements.txt && \ ############################# FROM --platform=$TARGETPLATFORM ${DOWNLOADER_IMAGE} as downloader -# make sure we have the latest clarifai package. -RUN (pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir clarifai clarifai-grpc clarifai-protocol || true) +# make sure we have the latest clarifai package. This version is filled in by SDK. +RUN ["pip", "install", "clarifai==${CLARIFAI_VERSION}"] ##### ############################# # Final runtime image ############################# -FROM --platform=$TARGETPLATFORM ${RUNTIME_IMAGE} as final +FROM --platform=$TARGETPLATFORM ${FINAL_IMAGE} as final # Set the NUMBA cache dir to /tmp # Set the TORCHINDUCTOR cache dir to /tmp @@ -54,7 +54,7 @@ RUN ["python", "-m", "clarifai.cli", "model", "download-checkpoints", "--model_p ##### -# Copy the python packages from the previous stage. +# Copy the python packages from the builder stage. COPY --link=true --from=pip_packages /venv /venv ##### diff --git a/clarifai/runners/models/model_builder.py b/clarifai/runners/models/model_builder.py index 73310140..82597c34 100644 --- a/clarifai/runners/models/model_builder.py +++ b/clarifai/runners/models/model_builder.py @@ -16,12 +16,33 @@ from clarifai.client import BaseClient from clarifai.runners.models.model_class import ModelClass -from clarifai.runners.utils.const import ( - AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES, CONCEPTS_REQUIRED_MODEL_TYPE, - DEFAULT_PYTHON_VERSION, PYTHON_BUILDER_IMAGE, PYTHON_RUNTIME_IMAGE, TORCH_BASE_IMAGE) +from clarifai.runners.utils.const import (AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES, + CONCEPTS_REQUIRED_MODEL_TYPE, DEFAULT_PYTHON_VERSION, + PYTHON_BASE_IMAGE, TORCH_BASE_IMAGE) from clarifai.runners.utils.loader import HuggingFaceLoader from clarifai.urls.helper import ClarifaiUrlHelper from clarifai.utils.logging import logger +from clarifai.versions import CLIENT_VERSION + +# parse the user's requirements.txt to determine the proper base image to build on top of, based on the torch and other large dependencies and it's versions +# List of dependencies to look for +dependencies = [ + 'torch', + 'clarifai', +] +# Escape dependency names for regex +dep_pattern = '|'.join(map(re.escape, dependencies)) +# All possible version specifiers +version_specifiers = '==|>=|<=|!=|~=|>|<' +# Compile a regex pattern with verbose mode for readability +pattern = re.compile(r""" + ^\s* # Start of line, optional whitespace + (?P""" + dep_pattern + r""") # Dependency name + \s* # Optional whitespace + (?P""" + version_specifiers + r""")? # Optional version specifier + \s* # Optional whitespace + (?P[^\s;]+)? # Optional version (up to space or semicolon) + """, re.VERBOSE) def _clear_line(n: int = 1) -> None: @@ -290,32 +311,15 @@ def maybe_create_model(self): return self.client.STUB.PostModels(request) def _parse_requirements(self): - # parse the user's requirements.txt to determine the proper base image to build on top of, based on the torch and other large dependencies and it's versions - # List of dependencies to look for - dependencies = [ - 'torch', - ] - # Escape dependency names for regex - dep_pattern = '|'.join(map(re.escape, dependencies)) - # All possible version specifiers - version_specifiers = '==|>=|<=|!=|~=|>|<' - # Compile a regex pattern with verbose mode for readability - pattern = re.compile(r""" - ^\s* # Start of line, optional whitespace - (?P""" + dep_pattern + r""") # Dependency name - \s* # Optional whitespace - (?P""" + version_specifiers + r""")? # Optional version specifier - \s* # Optional whitespace - (?P[^\s;]+)? # Optional version (up to space or semicolon) - """, re.VERBOSE) - - deendencies_version = {} + dependencies_version = {} with open(os.path.join(self.folder, 'requirements.txt'), 'r') as file: for line in file: # Skip empty lines and comments line = line.strip() if not line or line.startswith('#'): continue + # split on whitespace followed by # + line = re.split(r'\s+#', line)[0] match = pattern.match(line) if match: dependency = match.group('dependency') @@ -324,8 +328,8 @@ def _parse_requirements(self): 'whl/cpu') > 0: # Ignore torch-cpu whl files, use base mage. continue - deendencies_version[dependency] = version if version else None - return deendencies_version + dependencies_version[dependency] = version if version else None + return dependencies_version def create_dockerfile(self): dockerfile_template = os.path.join( @@ -357,9 +361,8 @@ def create_dockerfile(self): python_version = DEFAULT_PYTHON_VERSION # This is always the final image used for runtime. - runtime_image = PYTHON_RUNTIME_IMAGE.format(python_version=python_version) - builder_image = PYTHON_BUILDER_IMAGE.format(python_version=python_version) - downloader_image = PYTHON_BUILDER_IMAGE.format(python_version=python_version) + final_image = PYTHON_BASE_IMAGE.format(python_version=python_version) + downloader_image = PYTHON_BASE_IMAGE.format(python_version=python_version) # Parse the requirements.txt file to determine the base image dependencies = self._parse_requirements() @@ -370,23 +373,45 @@ def create_dockerfile(self): for image in sorted(AVAILABLE_TORCH_IMAGES, reverse=True): if torch_version in image and f'py{python_version}' in image: cuda_version = image.split('-')[-1].replace('cuda', '') - builder_image = TORCH_BASE_IMAGE.format( + final_image = TORCH_BASE_IMAGE.format( torch_version=torch_version, python_version=python_version, cuda_version=cuda_version, ) - # download_image = base_image logger.info(f"Using Torch version {torch_version} base image to build the Docker image") break - # else: # if not torch then use the download image for the base image too - # # base_image = download_image - # requirements_image = base_image + + if 'clarifai' not in dependencies: + raise Exception( + f"clarifai not found in requirements.txt, please add clarifai to the requirements.txt file with a fixed version. Current version is clarifai=={CLIENT_VERSION}" + ) + clarifai_version = dependencies['clarifai'] + if not clarifai_version: + logger.warn( + f"clarifai version not found in requirements.txt, using the latest version {CLIENT_VERSION}" + ) + clarifai_version = CLIENT_VERSION + lines = [] + with open(os.path.join(self.folder, 'requirements.txt'), 'r') as file: + for line in file: + # if the line without whitespace is "clarifai" + # split on whitespace followed by # + matchline = re.split(r'\s+#', line)[0] + match = pattern.match(matchline) + if match and match.group('dependency') == "clarifai": + lines.append(line.replace("clarifai", f"clarifai=={CLIENT_VERSION}")) + else: + lines.append(line) + with open(os.path.join(self.folder, 'requirements.txt'), 'w') as file: + file.writelines(lines) + logger.warn(f"Updated requirements.txt to have clarifai=={CLIENT_VERSION}") + # Replace placeholders with actual values dockerfile_content = dockerfile_template.safe_substitute( name='main', - BUILDER_IMAGE=builder_image, # for pip requirements - RUNTIME_IMAGE=runtime_image, # for runtime + FINAL_IMAGE=final_image, # for pip requirements DOWNLOADER_IMAGE=downloader_image, # for downloading checkpoints + CLARIFAI_VERSION=clarifai_version, # for clarifai ) # Write Dockerfile @@ -496,7 +521,7 @@ def get_model_version_proto(self): def upload_model_version(self, download_checkpoints): file_path = f"{self.folder}.tar.gz" - logger.info(f"Will tar it into file: {file_path}") + logger.debug(f"Will tar it into file: {file_path}") model_type_id = self.config.get('model').get('model_type_id') @@ -537,10 +562,10 @@ def filter_func(tarinfo): with tarfile.open(self.tar_file, "w:gz") as tar: tar.add(self.folder, arcname=".", filter=filter_func) - logger.info("Tarring complete, about to start upload.") + logger.debug("Tarring complete, about to start upload.") file_size = os.path.getsize(self.tar_file) - logger.info(f"Size of the tar is: {file_size} bytes") + logger.debug(f"Size of the tar is: {file_size} bytes") self.storage_request_size = self._get_tar_file_content_size(file_path) if not download_checkpoints and self.config.get("checkpoints"): @@ -573,7 +598,6 @@ def filter_func(tarinfo): f"request_id: {response.status.req_id}", end='\r', flush=True) - logger.info("") if response.status.code != status_code_pb2.MODEL_BUILDING: logger.error(f"Failed to upload model version: {response}") return @@ -584,7 +608,7 @@ def filter_func(tarinfo): self.monitor_model_build() finally: if os.path.exists(self.tar_file): - logger.info(f"Cleaning up upload file: {self.tar_file}") + logger.debug(f"Cleaning up upload file: {self.tar_file}") os.remove(self.tar_file) def model_version_stream_upload_iterator(self, model_version_proto, file_path): @@ -594,9 +618,9 @@ def model_version_stream_upload_iterator(self, model_version_proto, file_path): chunk_size = int(127 * 1024 * 1024) # 127MB chunk size num_chunks = (file_size // chunk_size) + 1 logger.info("Uploading file...") - logger.info(f"File size: {file_size}") - logger.info(f"Chunk size: {chunk_size}") - logger.info(f"Number of chunks: {num_chunks}") + logger.debug(f"File size: {file_size}") + logger.debug(f"Chunk size: {chunk_size}") + logger.debug(f"Number of chunks: {num_chunks}") read_so_far = 0 for part_id in range(num_chunks): try: @@ -616,12 +640,12 @@ def model_version_stream_upload_iterator(self, model_version_proto, file_path): break if read_so_far == file_size: - logger.info("\nUpload complete!, waiting for model build...") + logger.info("Upload complete!") def init_upload_model_version(self, model_version_proto, file_path): file_size = os.path.getsize(file_path) - logger.info(f"Uploading model version of model {self.model_proto.id}") - logger.info(f"Using file '{os.path.basename(file_path)}' of size: {file_size} bytes") + logger.debug(f"Uploading model version of model {self.model_proto.id}") + logger.debug(f"Using file '{os.path.basename(file_path)}' of size: {file_size} bytes") result = service_pb2.PostModelVersionsUploadRequest( upload_config=service_pb2.PostModelVersionsUploadConfig( user_app_id=self.client.user_app_id, @@ -656,18 +680,19 @@ def monitor_model_build(self): version_id=self.model_version_id, )) status_code = resp.model_version.status.code + logs = self.get_model_build_logs() + for log_entry in logs.log_entries: + if log_entry.url not in seen_logs: + seen_logs.add(log_entry.url) + logger.info(f"{escape(log_entry.message.strip())}") if status_code == status_code_pb2.MODEL_BUILDING: print(f"Model is building... (elapsed {time.time() - st:.1f}s)", end='\r', flush=True) # Fetch and display the logs - logs = self.get_model_build_logs() - for log_entry in logs.log_entries: - if log_entry.url not in seen_logs: - seen_logs.add(log_entry.url) - logger.info(f"{escape(log_entry.message.strip())}") time.sleep(1) elif status_code == status_code_pb2.MODEL_TRAINED: - logger.info(f"\nModel build complete! (elapsed {time.time() - st:.1f}s)") + logger.info("Model build complete!") + logger.info(f"Build time elapsed {time.time() - st:.1f}s)") logger.info(f"Check out the model at {self.model_url} version: {self.model_version_id}") return True else: diff --git a/clarifai/runners/utils/const.py b/clarifai/runners/utils/const.py index de0b368b..3f6d114f 100644 --- a/clarifai/runners/utils/const.py +++ b/clarifai/runners/utils/const.py @@ -2,9 +2,10 @@ registry = os.environ.get('CLARIFAI_BASE_IMAGE_REGISTRY', 'public.ecr.aws/clarifai-models') -PYTHON_BUILDER_IMAGE = registry + '/python-base:builder-{python_version}' -PYTHON_RUNTIME_IMAGE = registry + '/python-base:runtime-{python_version}' -TORCH_BASE_IMAGE = registry + '/torch:builder-{torch_version}-py{python_version}-cuda{cuda_version}' +GIT_SHA = "df565436eea93efb3e8d1eb558a0a46df29523ec" + +PYTHON_BASE_IMAGE = registry + '/python-base:{python_version}-' + GIT_SHA +TORCH_BASE_IMAGE = registry + '/torch:{torch_version}-py{python_version}-cuda{cuda_version}-' + GIT_SHA # List of available python base images AVAILABLE_PYTHON_IMAGES = ['3.11', '3.12'] @@ -14,13 +15,10 @@ # List of available torch images # Keep sorted by most recent cuda version. AVAILABLE_TORCH_IMAGES = [ - '2.4.0-py3.11-cuda124', '2.4.1-py3.11-cuda124', '2.5.1-py3.11-cuda124', - '2.4.0-py3.12-cuda124', '2.4.1-py3.12-cuda124', '2.5.1-py3.12-cuda124', - # '2.4.0-py3.13-cuda124', # '2.4.1-py3.13-cuda124', # '2.5.1-py3.13-cuda124', ] diff --git a/clarifai/utils/logging.py b/clarifai/utils/logging.py index 031d10b5..ad03ea2b 100644 --- a/clarifai/utils/logging.py +++ b/clarifai/utils/logging.py @@ -142,8 +142,12 @@ def _configure_logger(name: str, logger_level: Union[int, str] = logging.NOTSET) logger.addHandler(handler) else: # Add the new rich handler and formatter + try: + width, _ = os.get_terminal_size() + except OSError: + width = 255 handler = RichHandler( - rich_tracebacks=True, log_time_format="%Y-%m-%d %H:%M:%S.%f", console=Console(width=255)) + rich_tracebacks=True, log_time_format="%Y-%m-%d %H:%M:%S.%f", console=Console(width=width)) formatter = logging.Formatter('%(message)s') handler.setFormatter(formatter) logger.addHandler(handler) diff --git a/requirements.txt b/requirements.txt index 9a95634e..17532719 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,6 @@ PyYAML>=6.0.1 schema==0.7.5 Pillow>=9.5.0 tabulate>=0.9.0 -fsspec==2024.6.1 -click==8.1.7 -requests==2.32.3 +fsspec>=2024.6.1 +click>=8.1.7 +requests>=2.32.3 diff --git a/tests/runners/dummy_runner_models/requirements.txt b/tests/runners/dummy_runner_models/requirements.txt index 93d76e98..a8ca8559 100644 --- a/tests/runners/dummy_runner_models/requirements.txt +++ b/tests/runners/dummy_runner_models/requirements.txt @@ -1,2 +1,3 @@ aiohttp requests +clarifai diff --git a/tests/runners/hf_mbart_model/requirements.txt b/tests/runners/hf_mbart_model/requirements.txt index 63d7193f..8bbd44c3 100644 --- a/tests/runners/hf_mbart_model/requirements.txt +++ b/tests/runners/hf_mbart_model/requirements.txt @@ -5,3 +5,4 @@ accelerate>=1.0.1 optimum>=1.20.0 sentencepiece==0.2.0 requests +clarifai