diff --git a/config.json b/config.json new file mode 100644 index 0000000000..89ed0a8307 --- /dev/null +++ b/config.json @@ -0,0 +1,20 @@ +{ + "llm": { + "api_key": "${API_KEY}", + "type": "azure_openai_chat", + "deployment_name":"gpt-4o-mini", + "api_base":"https://siasaigc.openai.azure.com/", + "api_version":"gpt-4o-mini-2024-07-18", + "model":"gpt-4o-mini-2024-07-18" + + }, + "embeddings": { + "api_key": "${API_KEY}", + "type": "azure_openai_chat", + "deployment_name":"text-embedding-3-large", + "api_base":"https://siasaigc.openai.azure.com/", + "api_version":"2024-06-01", + "model":"text-embedding-3-large" + + } +} \ No newline at end of file diff --git a/graphrag/config/create_graphrag_config.py b/graphrag/config/create_graphrag_config.py index 3504507be2..5b5108f9e5 100644 --- a/graphrag/config/create_graphrag_config.py +++ b/graphrag/config/create_graphrag_config.py @@ -350,6 +350,10 @@ def hydrate_parallelization_params( connection_string=reader.str(Fragment.conn_string), storage_account_blob_url=reader.str(Fragment.storage_account_blob_url), container_name=reader.str(Fragment.container_name), + bucket_name=reader.str(Fragment.bucket_name), + access_key=reader.str(Fragment.access_key), + secret_key=reader.str(Fragment.secret_key), + endpoint = reader.str(Fragment.endpoint), ) with reader.envvar_prefix(Section.cache), reader.use(values.get("cache")): c_type = reader.str(Fragment.type) @@ -359,6 +363,10 @@ def hydrate_parallelization_params( storage_account_blob_url=reader.str(Fragment.storage_account_blob_url), container_name=reader.str(Fragment.container_name), base_dir=reader.str(Fragment.base_dir) or defs.CACHE_BASE_DIR, + bucket_name=reader.str(Fragment.bucket_name), + access_key=reader.str(Fragment.access_key), + secret_key=reader.str(Fragment.secret_key), + endpoint=reader.str(Fragment.endpoint), ) with ( reader.envvar_prefix(Section.reporting), @@ -371,6 +379,10 @@ def hydrate_parallelization_params( storage_account_blob_url=reader.str(Fragment.storage_account_blob_url), container_name=reader.str(Fragment.container_name), base_dir=reader.str(Fragment.base_dir) or defs.REPORTING_BASE_DIR, + bucket_name=reader.str(Fragment.bucket_name), + access_key=reader.str(Fragment.access_key), + secret_key=reader.str(Fragment.secret_key), + endpoint=reader.str(Fragment.endpoint), ) with reader.envvar_prefix(Section.storage), reader.use(values.get("storage")): s_type = reader.str(Fragment.type) @@ -380,6 +392,10 @@ def hydrate_parallelization_params( storage_account_blob_url=reader.str(Fragment.storage_account_blob_url), container_name=reader.str(Fragment.container_name), base_dir=reader.str(Fragment.base_dir) or defs.STORAGE_BASE_DIR, + bucket_name=reader.str(Fragment.bucket_name), + access_key=reader.str(Fragment.access_key), + secret_key=reader.str(Fragment.secret_key), + endpoint=reader.str(Fragment.endpoint), ) with reader.envvar_prefix(Section.chunk), reader.use(values.get("chunks")): group_by_columns = reader.list("group_by_columns", "BY_COLUMNS") @@ -608,7 +624,10 @@ class Fragment(str, Enum): thread_stagger = "THREAD_STAGGER" tpm = "TOKENS_PER_MINUTE" type = "TYPE" - + bucket_name="BUCKET_NAME" + access_key = "ACCESS_KEY" + secret_key = "SECRET_KEY" # noqa: S105 + endpoint = "ENDPOINT" class Section(str, Enum): """Configuration Sections.""" diff --git a/graphrag/config/enums.py b/graphrag/config/enums.py index 8741cf74ae..1b647b69c5 100644 --- a/graphrag/config/enums.py +++ b/graphrag/config/enums.py @@ -19,7 +19,8 @@ class CacheType(str, Enum): """The none cache configuration type.""" blob = "blob" """The blob cache configuration type.""" - + minio = "minio" + """The minio cache configuration type.""" def __repr__(self): """Get a string representation.""" return f'"{self.value}"' @@ -45,7 +46,8 @@ class InputType(str, Enum): """The file storage type.""" blob = "blob" """The blob storage type.""" - + minio = "minio" + """The minio storage type.""" def __repr__(self): """Get a string representation.""" return f'"{self.value}"' @@ -60,6 +62,8 @@ class StorageType(str, Enum): """The memory storage type.""" blob = "blob" """The blob storage type.""" + minio = "minio" + """The minio storage type.""" def __repr__(self): """Get a string representation.""" @@ -75,7 +79,8 @@ class ReportingType(str, Enum): """The console reporting configuration type.""" blob = "blob" """The blob reporting configuration type.""" - + minio = "minio" + """The minio reporting configuration type.""" def __repr__(self): """Get a string representation.""" return f'"{self.value}"' diff --git a/graphrag/config/input_models/input_config_input.py b/graphrag/config/input_models/input_config_input.py index 4ff89d2c9a..35419b90d2 100644 --- a/graphrag/config/input_models/input_config_input.py +++ b/graphrag/config/input_models/input_config_input.py @@ -25,3 +25,6 @@ class InputConfigInput(TypedDict): title_column: NotRequired[str | None] document_attribute_columns: NotRequired[list[str] | str | None] storage_account_blob_url: NotRequired[str | None] + bucket_name: NotRequired[str | None] + access_key: NotRequired[str | None] + endpoint: NotRequired[str | None] \ No newline at end of file diff --git a/graphrag/config/input_models/reporting_config_input.py b/graphrag/config/input_models/reporting_config_input.py index a224f0b440..c0a73378c7 100644 --- a/graphrag/config/input_models/reporting_config_input.py +++ b/graphrag/config/input_models/reporting_config_input.py @@ -16,3 +16,6 @@ class ReportingConfigInput(TypedDict): connection_string: NotRequired[str | None] container_name: NotRequired[str | None] storage_account_blob_url: NotRequired[str | None] + bucket_name: NotRequired[str | None] + access_key: NotRequired[str | None] + endpoint: NotRequired[str | None] \ No newline at end of file diff --git a/graphrag/config/input_models/storage_config_input.py b/graphrag/config/input_models/storage_config_input.py index cc5caf7952..5250ac36fa 100644 --- a/graphrag/config/input_models/storage_config_input.py +++ b/graphrag/config/input_models/storage_config_input.py @@ -16,3 +16,6 @@ class StorageConfigInput(TypedDict): connection_string: NotRequired[str | None] container_name: NotRequired[str | None] storage_account_blob_url: NotRequired[str | None] + bucket_name: NotRequired[str | None] + access_key: NotRequired[str | None] + endpoint: NotRequired[str | None] \ No newline at end of file diff --git a/graphrag/config/models/cache_config.py b/graphrag/config/models/cache_config.py index 4589edce0b..bbb6024fe2 100644 --- a/graphrag/config/models/cache_config.py +++ b/graphrag/config/models/cache_config.py @@ -27,3 +27,19 @@ class CacheConfig(BaseModel): storage_account_blob_url: str | None = Field( description="The storage account blob url to use.", default=None ) + bucket_name: str| None = Field( + description="The bucket name for the input files.", default=None + ) + """The bucket name for the input files.""" + access_key: str| None = Field( + description="The access key for the input files.", default=None + ) + """The access key for the input files.""" + secret_key: str| None = Field( + description="The secret key for the input files.", default=None + ) + """The secret key for the input files.""" + endpoint: str | None = Field( + description="The endpoint for the input files.", default=None + ) + """The endpoint for the input files.""" \ No newline at end of file diff --git a/graphrag/config/models/input_config.py b/graphrag/config/models/input_config.py index f9e5847af6..f099856a1e 100644 --- a/graphrag/config/models/input_config.py +++ b/graphrag/config/models/input_config.py @@ -58,3 +58,19 @@ class InputConfig(BaseModel): document_attribute_columns: list[str] = Field( description="The document attribute columns to use.", default=[] ) + bucket_name: str| None = Field( + description="The bucket name for the input files.", default=None + ) + """The bucket name for the input files.""" + access_key: str| None = Field( + description="The access key for the input files.", default=None + ) + """The access key for the input files.""" + secret_key: str| None = Field( + description="The secret key for the input files.", default=None + ) + """The secret key for the input files.""" + endpoint: str | None = Field( + description="The endpoint for the input files.", default=None + ) + """The endpoint for the input files.""" \ No newline at end of file diff --git a/graphrag/config/models/reporting_config.py b/graphrag/config/models/reporting_config.py index 35e86cf5da..7682c8f6d6 100644 --- a/graphrag/config/models/reporting_config.py +++ b/graphrag/config/models/reporting_config.py @@ -28,3 +28,15 @@ class ReportingConfig(BaseModel): storage_account_blob_url: str | None = Field( description="The storage account blob url to use.", default=None ) + bucket_name: str | None = Field( + description="The reporting bucket name to use.", default=None + ) + access_key: str | None = Field( + description="The reporting access key to use.", default=None + ) + secret_key: str | None = Field( + description="The reporting secret key to use.", default=None + ) + endpoint: str | None = Field( + description="The reporting endpoint to use.", default=None + ) \ No newline at end of file diff --git a/graphrag/config/models/storage_config.py b/graphrag/config/models/storage_config.py index dcf41b9222..5006b70cd3 100644 --- a/graphrag/config/models/storage_config.py +++ b/graphrag/config/models/storage_config.py @@ -28,3 +28,19 @@ class StorageConfig(BaseModel): storage_account_blob_url: str | None = Field( description="The storage account blob url to use.", default=None ) + bucket_name: str| None = Field( + description="The bucket name for the input files.", default=None + ) + """The bucket name for the input files.""" + access_key: str| None = Field( + description="The access key for the input files.", default=None + ) + """The access key for the input files.""" + secret_key: str| None = Field( + description="The secret key for the input files.", default=None + ) + """The secret key for the input files.""" + endpoint: str | None = Field( + description="The endpoint for the input files.", default=None + ) + """The endpoint for the input files.""" \ No newline at end of file diff --git a/graphrag/index/cache/load_cache.py b/graphrag/index/cache/load_cache.py index 4e0e6324fb..527e9ea271 100644 --- a/graphrag/index/cache/load_cache.py +++ b/graphrag/index/cache/load_cache.py @@ -11,8 +11,13 @@ from graphrag.index.config.cache import ( PipelineBlobCacheConfig, PipelineFileCacheConfig, + PipelineMinioCacheConfig, +) +from graphrag.index.storage import ( + BlobPipelineStorage, + FilePipelineStorage, + MinioPipelineStorage, ) -from graphrag.index.storage import BlobPipelineStorage, FilePipelineStorage if TYPE_CHECKING: from graphrag.index.config import ( @@ -46,6 +51,16 @@ def load_cache(config: PipelineCacheConfig | None, root_dir: str | None): storage_account_blob_url=config.storage_account_blob_url, ).child(config.base_dir) return JsonPipelineCache(storage) + case CacheType.minio: + config = cast(PipelineMinioCacheConfig, config) + storage = MinioPipelineStorage( + config.endpoint if config.endpoint is not None else "", + config.access_key if config.access_key is not None else "", + config.secret_key if config.secret_key is not None else "", + config.bucket_name if config.bucket_name is not None else "", + path_prefix=config.base_dir + ).child(config.base_dir) + return JsonPipelineCache(storage) case _: msg = f"Unknown cache type: {config.type}" raise ValueError(msg) diff --git a/graphrag/index/config/__init__.py b/graphrag/index/config/__init__.py index 3c40762a84..ec50ecbf0c 100644 --- a/graphrag/index/config/__init__.py +++ b/graphrag/index/config/__init__.py @@ -9,6 +9,7 @@ PipelineCacheConfigTypes, PipelineFileCacheConfig, PipelineMemoryCacheConfig, + PipelineMinioCacheConfig, PipelineNoneCacheConfig, ) from .input import ( @@ -22,6 +23,7 @@ PipelineBlobReportingConfig, PipelineConsoleReportingConfig, PipelineFileReportingConfig, + PipelineMinioReportingConfig, PipelineReportingConfig, PipelineReportingConfigTypes, ) @@ -29,6 +31,7 @@ PipelineBlobStorageConfig, PipelineFileStorageConfig, PipelineMemoryStorageConfig, + PipelineMinioStorageConfig, PipelineStorageConfig, PipelineStorageConfigTypes, ) @@ -42,6 +45,7 @@ "PipelineBlobCacheConfig", "PipelineBlobReportingConfig", "PipelineBlobStorageConfig", + "PipelineMinioReportingConfig", "PipelineCSVInputConfig", "PipelineCacheConfig", "PipelineCacheConfigTypes", @@ -66,4 +70,5 @@ "PipelineWorkflowConfig", "PipelineWorkflowReference", "PipelineWorkflowStep", + "PipelineMinioCacheConfig" ] diff --git a/graphrag/index/config/cache.py b/graphrag/index/config/cache.py index be1053de2e..557c008138 100644 --- a/graphrag/index/config/cache.py +++ b/graphrag/index/config/cache.py @@ -73,10 +73,35 @@ class PipelineBlobCacheConfig(PipelineCacheConfig[Literal[CacheType.blob]]): ) """The storage account blob url for cache""" +class PipelineMinioCacheConfig(PipelineCacheConfig[Literal[CacheType.blob]]): + """Represents the blob cache configuration for the pipeline.""" + + type: Literal[CacheType.minio] = CacheType.minio + """The type of cache.""" + base_dir: str | None = pydantic_Field( + description="The base directory for the cache.", default=None + ) + bucket_name: str| None = pydantic_Field( + description="The bucket name for the input files.", default=None + ) + """The bucket name for the input files.""" + access_key: str| None = pydantic_Field( + description="The access key for the input files.", default=None + ) + """The access key for the input files.""" + secret_key: str| None = pydantic_Field( + description="The secret key for the input files.", default=None + ) + """The secret key for the input files.""" + endpoint: str | None = pydantic_Field( + description="The endpoint for the input files.", default=None + ) + """The endpoint for the input files.""" PipelineCacheConfigTypes = ( PipelineFileCacheConfig | PipelineMemoryCacheConfig | PipelineBlobCacheConfig | PipelineNoneCacheConfig + | PipelineMinioCacheConfig ) diff --git a/graphrag/index/config/input.py b/graphrag/index/config/input.py index 35db357599..9cc32ad0e0 100644 --- a/graphrag/index/config/input.py +++ b/graphrag/index/config/input.py @@ -69,7 +69,22 @@ class PipelineInputConfig(BaseModel, Generic[T]): description="The encoding for the input files.", default=None ) """The encoding for the input files.""" - + bucket_name: str| None = pydantic_Field( + description="The bucket name for the input files.", default=None + ) + """The bucket name for the input files.""" + access_key: str| None = pydantic_Field( + description="The access key for the input files.", default=None + ) + """The access key for the input files.""" + secret_key: str| None = pydantic_Field( + description="The secret key for the input files.", default=None + ) + """The secret key for the input files.""" + endpoint: str | None = pydantic_Field( + description="The endpoint for the input files.", default=None + ) + """The endpoint for the input files.""" class PipelineCSVInputConfig(PipelineInputConfig[Literal[InputFileType.csv]]): """Represent the configuration for a CSV input.""" diff --git a/graphrag/index/config/reporting.py b/graphrag/index/config/reporting.py index 921e24ae4e..cc9c373836 100644 --- a/graphrag/index/config/reporting.py +++ b/graphrag/index/config/reporting.py @@ -69,9 +69,70 @@ class PipelineBlobReportingConfig(PipelineReportingConfig[Literal[ReportingType. ) """The base directory for the reporting.""" + """The encoding for the input files.""" + bucket_name: str| None = pydantic_Field( + description="The bucket name for the input files.", default=None + ) + """The bucket name for the input files.""" + access_key: str| None = pydantic_Field( + description="The access key for the input files.", default=None + ) + """The access key for the input files.""" + secret_key: str| None = pydantic_Field( + description="The secret key for the input files.", default=None + ) + """The secret key for the input files.""" + endpoint: str | None = pydantic_Field( + description="The endpoint for the input files.", default=None + ) + """The endpoint for the input files.""" +class PipelineMinioReportingConfig(PipelineReportingConfig[Literal[ReportingType.minio]]): + """Represents the blob reporting configuration for the pipeline.""" + + type: Literal[ReportingType.minio] = ReportingType.minio + """The type of reporting.""" + + connection_string: str | None = pydantic_Field( + description="The blob reporting connection string for the reporting.", + default=None, + ) + """The blob reporting connection string for the reporting.""" + container_name: str = pydantic_Field( + description="The container name for reporting", default=None + ) + """The container name for reporting""" + + storage_account_blob_url: str | None = pydantic_Field( + description="The storage account blob url for reporting", default=None + ) + """The storage account blob url for reporting""" + + base_dir: str | None = pydantic_Field( + description="The base directory for the reporting.", default=None + ) + """The base directory for the reporting.""" + + """The encoding for the input files.""" + bucket_name: str| None = pydantic_Field( + description="The bucket name for the input files.", default=None + ) + """The bucket name for the input files.""" + access_key: str| None = pydantic_Field( + description="The access key for the input files.", default=None + ) + """The access key for the input files.""" + secret_key: str| None = pydantic_Field( + description="The secret key for the input files.", default=None + ) + """The secret key for the input files.""" + endpoint: str | None = pydantic_Field( + description="The endpoint for the input files.", default=None + ) + """The endpoint for the input files.""" PipelineReportingConfigTypes = ( PipelineFileReportingConfig | PipelineConsoleReportingConfig | PipelineBlobReportingConfig + | PipelineMinioReportingConfig ) diff --git a/graphrag/index/config/storage.py b/graphrag/index/config/storage.py index 023d50e249..6aa54e4b4f 100644 --- a/graphrag/index/config/storage.py +++ b/graphrag/index/config/storage.py @@ -65,8 +65,49 @@ class PipelineBlobStorageConfig(PipelineStorageConfig[Literal[StorageType.blob]] description="The storage account blob url.", default=None ) """The storage account blob url.""" +class PipelineMinioStorageConfig(PipelineStorageConfig[Literal[StorageType.minio]]): + """Represents the blob storage configuration for the pipeline.""" + + type: Literal[StorageType.minio] = StorageType.minio + """The type of storage.""" + + connection_string: str | None = pydantic_Field( + description="The blob storage connection string for the storage.", default=None + ) + """The blob storage connection string for the storage.""" + + container_name: str = pydantic_Field( + description="The container name for storage", default=None + ) + """The container name for storage.""" + base_dir: str | None = pydantic_Field( + description="The base directory for the storage.", default=None + ) + """The base directory for the storage.""" + + storage_account_blob_url: str | None = pydantic_Field( + description="The storage account blob url.", default=None + ) + """The storage account blob url.""" + """The encoding for the input files.""" + bucket_name: str| None = pydantic_Field( + description="The bucket name for the input files.", default=None + ) + """The bucket name for the input files.""" + access_key: str| None = pydantic_Field( + description="The access key for the input files.", default=None + ) + """The access key for the input files.""" + secret_key: str| None = pydantic_Field( + description="The secret key for the input files.", default=None + ) + """The secret key for the input files.""" + endpoint: str | None = pydantic_Field( + description="The endpoint for the input files.", default=None + ) + """The endpoint for the input files.""" PipelineStorageConfigTypes = ( - PipelineFileStorageConfig | PipelineMemoryStorageConfig | PipelineBlobStorageConfig + PipelineFileStorageConfig | PipelineMemoryStorageConfig | PipelineBlobStorageConfig | PipelineMinioStorageConfig ) diff --git a/graphrag/index/create_pipeline_config.py b/graphrag/index/create_pipeline_config.py index 22dba20029..057c40463b 100644 --- a/graphrag/index/create_pipeline_config.py +++ b/graphrag/index/create_pipeline_config.py @@ -23,6 +23,7 @@ PipelineCacheConfigTypes, PipelineFileCacheConfig, PipelineMemoryCacheConfig, + PipelineMinioCacheConfig, PipelineNoneCacheConfig, ) from graphrag.index.config.input import ( @@ -37,12 +38,14 @@ PipelineBlobReportingConfig, PipelineConsoleReportingConfig, PipelineFileReportingConfig, + PipelineMinioReportingConfig, PipelineReportingConfigTypes, ) from graphrag.index.config.storage import ( PipelineBlobStorageConfig, PipelineFileStorageConfig, PipelineMemoryStorageConfig, + PipelineMinioStorageConfig, PipelineStorageConfigTypes, ) from graphrag.index.config.workflow import ( @@ -469,6 +472,7 @@ def _get_pipeline_input_config( connection_string=settings.input.connection_string, storage_account_blob_url=settings.input.storage_account_blob_url, container_name=settings.input.container_name, + bucket_name = settings.input.bucket_name ) case InputFileType.text: return PipelineTextInputConfig( @@ -479,6 +483,10 @@ def _get_pipeline_input_config( connection_string=settings.input.connection_string, storage_account_blob_url=settings.input.storage_account_blob_url, container_name=settings.input.container_name, + bucket_name = settings.input.bucket_name, + access_key= settings.input.access_key, + secret_key=settings.input.secret_key, + endpoint=settings.input.endpoint ) case _: msg = f"Unknown input type: {file_type}" @@ -511,6 +519,24 @@ def _get_reporting_config( ) case ReportingType.console: return PipelineConsoleReportingConfig() + case ReportingType.minio: + bucket_name= settings.reporting.bucket_name + access_key = settings.reporting.access_key + secret_key = settings.reporting.secret_key + endpoint = settings.reporting.endpoint + if bucket_name is None: + msg = "bucket_name must be provided for blob reporting." + raise ValueError(settings.reporting) + if access_key is None or secret_key is None or endpoint is None : + msg = "Connection string or storage account blob url must be provided for blob reporting." + raise ValueError(msg) + return PipelineMinioReportingConfig( + bucket_name=bucket_name, + access_key= access_key, + secret_key=secret_key, + endpoint=settings.reporting.endpoint, + base_dir=settings.reporting.base_dir, + ) case _: # relative to the root_dir return PipelineFileReportingConfig(base_dir=settings.reporting.base_dir) @@ -547,6 +573,27 @@ def _get_storage_config( base_dir=settings.storage.base_dir, storage_account_blob_url=storage_account_blob_url, ) + case StorageType.minio: + access_key = settings.cache.access_key + bucket_name = settings.cache.bucket_name + endpoint = settings.cache.endpoint + secret_key = settings.cache.secret_key + if access_key is None: + msg = "access_key must be provided for blob cache." + raise ValueError(msg) + if bucket_name is None: + msg = "bucket_name string must be provided for blob cache." + raise ValueError(msg) + if endpoint is None: + msg = "endpoint must be provided for blob cache." + raise ValueError(msg) + return PipelineMinioStorageConfig( + access_key=access_key, + bucket_name=bucket_name, + base_dir=settings.storage.base_dir, + endpoint=endpoint, + secret_key = secret_key + ) case _: # relative to the root_dir base_dir = settings.storage.base_dir @@ -584,6 +631,27 @@ def _get_cache_config( base_dir=settings.cache.base_dir, storage_account_blob_url=storage_account_blob_url, ) + case CacheType.minio: + access_key = settings.cache.access_key + bucket_name = settings.cache.bucket_name + endpoint = settings.cache.endpoint + secret_key = settings.cache.secret_key + if access_key is None: + msg = "access_key must be provided for blob cache." + raise ValueError(msg) + if bucket_name is None: + msg = "bucket_name string must be provided for blob cache." + raise ValueError(msg) + if endpoint is None: + msg = "endpoint must be provided for blob cache." + raise ValueError(msg) + return PipelineMinioCacheConfig( + access_key=access_key, + bucket_name=bucket_name, + base_dir=settings.cache.base_dir, + endpoint=endpoint, + secret_key = secret_key + ) case _: # relative to root dir return PipelineFileCacheConfig(base_dir="./cache") diff --git a/graphrag/index/input/load_input.py b/graphrag/index/input/load_input.py index 6d62334210..7bc8f3161a 100644 --- a/graphrag/index/input/load_input.py +++ b/graphrag/index/input/load_input.py @@ -16,6 +16,7 @@ from graphrag.index.storage import ( BlobPipelineStorage, FilePipelineStorage, + MinioPipelineStorage, ) from .csv import input_type as csv @@ -67,6 +68,24 @@ async def load_input( storage = FilePipelineStorage( root_dir=str(Path(root_dir) / (config.base_dir or "")) ) + case InputType.minio: + log.info("using minio storage for input") + if config.bucket_name is None: + msg = "Bucket name required for minio storage" + raise ValueError(config) + if config.access_key is None or config.secret_key is None: + msg = "Access key and secret key required for minio storage" + raise ValueError(config) + if config.endpoint is None: + msg = "Endpoint required for minio storage" + raise ValueError(config) + storage = MinioPipelineStorage( + endpoint=config.endpoint if config.endpoint is not None else "", + access_key=config.access_key, + secret_key=config.secret_key, + bucket_name=config.bucket_name, + path_prefix=config.base_dir, + ) case _: log.info("using file storage for input") storage = FilePipelineStorage( diff --git a/graphrag/index/reporting/load_pipeline_reporter.py b/graphrag/index/reporting/load_pipeline_reporter.py index 0386ea03d1..b4e08d84a9 100644 --- a/graphrag/index/reporting/load_pipeline_reporter.py +++ b/graphrag/index/reporting/load_pipeline_reporter.py @@ -12,12 +12,14 @@ from graphrag.index.config import ( PipelineBlobReportingConfig, PipelineFileReportingConfig, + PipelineMinioReportingConfig, PipelineReportingConfig, ) from .blob_workflow_callbacks import BlobWorkflowCallbacks from .console_workflow_callbacks import ConsoleWorkflowCallbacks from .file_workflow_callbacks import FileWorkflowCallbacks +from .minio_workflow_callbacks import MinioWorkflowCallbacks def load_pipeline_reporter( @@ -42,6 +44,15 @@ def load_pipeline_reporter( base_dir=config.base_dir, storage_account_blob_url=config.storage_account_blob_url, ) + case ReportingType.minio: + config = cast(PipelineMinioReportingConfig, config) + return MinioWorkflowCallbacks( + endpoint=config.endpoint or "", + access_key=config.access_key or "", + secret_key=config.secret_key or "", + bucket_name=config.bucket_name or "", + base_dir=config.base_dir or "", + ) case _: msg = f"Unknown reporting type: {config.type}" raise ValueError(msg) diff --git a/graphrag/index/reporting/minio_workflow_callbacks.py b/graphrag/index/reporting/minio_workflow_callbacks.py new file mode 100644 index 0000000000..d31852275a --- /dev/null +++ b/graphrag/index/reporting/minio_workflow_callbacks.py @@ -0,0 +1,120 @@ +"""A reporter that writes to a MinIO storage.""" + +import io +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from datashaper import NoopWorkflowCallbacks +from minio import Minio +from minio.error import S3Error + + +class MinioWorkflowCallbacks(NoopWorkflowCallbacks): + """A reporter that writes to a MinIO storage.""" + + _minio_client: Minio + _bucket_name: str + _max_block_count: int = 25000 # 25k blocks per object + + def __init__( + self, + endpoint: str, + access_key: str, + secret_key: str, + bucket_name: str, + base_dir: str + ): # type: ignore + """Create a new instance of the MinIOStorageReporter class.""" + if bucket_name is None: + msg = "No bucket name provided for MinIO storage." + raise ValueError(msg) + + if not endpoint or not access_key or not secret_key: + msg = "Endpoint, Access Key, and Secret Key must be provided for MinIO storage." + raise ValueError(msg) + + self._bucket_name = bucket_name + + self._minio_client = Minio( + endpoint, + access_key=access_key, + secret_key=secret_key, + secure=False + ) + self._bucket_name = bucket_name + self.access_key = access_key + self.endpoint = endpoint + self.secret_key = secret_key + self.base_dir = base_dir + if bucket_name == "": + object_name = f"report/{datetime.now(tz=timezone.utc).strftime('%Y-%m-%d-%H:%M:%S:%f')}.logs.json" + self._object_name = str(Path(base_dir or "") / object_name) + + # Ensure the bucket exists + if not self._minio_client.bucket_exists(self._bucket_name): + self._minio_client.make_bucket(self._bucket_name) + + self._num_blocks = 0 # refresh block counter + + def _write_log(self, log: dict[str, Any]): + # create a new file when block count hits close to 25k + if self._num_blocks >= self._max_block_count: + self.__init__( + self.endpoint, + self.access_key, + self.secret_key, + self._bucket_name, + self.base_dir + ) + + log_data = json.dumps(log) + "\n" + try: + time = f"{datetime.now(tz=timezone.utc).strftime('%Y-%m-%d-%H:%M:%S:%f')}" + self.base_dir = self.base_dir.replace("${timestamp}",time) + object_name = f"{self.base_dir}.logs.json" + self._object_name = str(Path(self.base_dir or "") / object_name) + current_data = self._minio_client.get_object(self._bucket_name, self._object_name).read().decode("utf-8") + updated_data = current_data + log_data + except S3Error as e: + if e.code == "NoSuchKey": + updated_data = log_data + else: + raise + + # 创建一个 BinaryIO 对象 + binary_io_data = io.BytesIO(updated_data.encode("utf-8")) + self._minio_client.put_object( + self._bucket_name, + self._object_name, + data=binary_io_data, + length=len(updated_data), + content_type="application/json" + ) + # update the log's block count + self._num_blocks += 1 + + def on_error( + self, + message: str, + cause: BaseException | None = None, + stack: str | None = None, + details: dict | None = None, + ): + """Report an error.""" + self._write_log({ + "type": "error", + "data": message, + "cause": str(cause), + "stack": stack, + "details": details, + }) + + def on_warning(self, message: str, details: dict | None = None): + """Report a warning.""" + self._write_log({"type": "warning", "data": message, "details": details}) + + def on_log(self, message: str, details: dict | None = None): + """Report a generic log message.""" + self._write_log({"type": "log", "data": message, "details": details}) \ No newline at end of file diff --git a/graphrag/index/storage/__init__.py b/graphrag/index/storage/__init__.py index 7ca943db52..97e7dde7e4 100644 --- a/graphrag/index/storage/__init__.py +++ b/graphrag/index/storage/__init__.py @@ -7,6 +7,7 @@ from .file_pipeline_storage import FilePipelineStorage from .load_storage import load_storage from .memory_pipeline_storage import MemoryPipelineStorage +from .minio_pipeline_storage import MinioPipelineStorage from .typing import PipelineStorage __all__ = [ @@ -16,4 +17,5 @@ "PipelineStorage", "create_blob_storage", "load_storage", + "MinioPipelineStorage" ] diff --git a/graphrag/index/storage/load_storage.py b/graphrag/index/storage/load_storage.py index 33d61ee97f..5994db8d37 100644 --- a/graphrag/index/storage/load_storage.py +++ b/graphrag/index/storage/load_storage.py @@ -11,12 +11,14 @@ from graphrag.index.config.storage import ( PipelineBlobStorageConfig, PipelineFileStorageConfig, + PipelineMinioStorageConfig, PipelineStorageConfig, ) from .blob_pipeline_storage import create_blob_storage from .file_pipeline_storage import create_file_storage from .memory_pipeline_storage import create_memory_storage +from .minio_pipeline_storage import create_minio_storage def load_storage(config: PipelineStorageConfig): @@ -35,6 +37,14 @@ def load_storage(config: PipelineStorageConfig): case StorageType.file: config = cast(PipelineFileStorageConfig, config) return create_file_storage(config.base_dir) + case StorageType.minio: + config = cast(PipelineMinioStorageConfig, config) + return create_minio_storage( endpoint=config.endpoint if config.endpoint else "" + ,base_dir= config.base_dir if config.base_dir else "" + ,bucket_name= config.bucket_name if config.bucket_name else "" + ,access_key= config.access_key if config.access_key else "" + ,secret_key= config.secret_key if config.secret_key else "" + ) case _: msg = f"Unknown storage type: {config.type}" raise ValueError(msg) diff --git a/graphrag/index/storage/minio_pipeline_storage.py b/graphrag/index/storage/minio_pipeline_storage.py new file mode 100644 index 0000000000..ccb12b1998 --- /dev/null +++ b/graphrag/index/storage/minio_pipeline_storage.py @@ -0,0 +1,276 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Minio Blob Storage implementation of PipelineStorage.""" +import io +import logging +import re +from collections.abc import Iterator +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from datashaper import Progress +from minio import Minio +from minio.error import S3Error + +from graphrag.index.progress import ProgressReporter + +from .typing import PipelineStorage + +log = logging.getLogger(__name__) + +class MinioPipelineStorage(PipelineStorage): + """The MinIO-Storage implementation.""" + + def __init__( + self, + endpoint: str, + access_key: str, + secret_key: str, + bucket_name: str, + secure: bool = False, + encoding: str | None = None, + path_prefix: str | None = None, + ): + """Create a new MinIOStorage instance.""" + self._minio_client = Minio( + endpoint, access_key=access_key, secret_key=secret_key, secure=secure + ) + self._encoding = encoding or "utf-8" + self._bucket_name = bucket_name + self._path_prefix = path_prefix or "" + self.create_bucket() + self.access_key = access_key + self.endpoint = endpoint + self.secret_key = secret_key + self.secure = secure + + def create_bucket(self) -> None: + """Create the bucket if it does not exist.""" + if not self.bucket_exists(): + self._minio_client.make_bucket(self._bucket_name) + + def delete_bucket(self) -> None: + """Delete the bucket.""" + if self.bucket_exists(): + self._minio_client.remove_bucket(self._bucket_name) + + def bucket_exists(self) -> bool: + """Check if the bucket exists.""" + return self._minio_client.bucket_exists(self._bucket_name) + + def find( + self, + file_pattern: re.Pattern[str], + base_dir: str | None = None, + progress: ProgressReporter | None = None, + file_filter: dict[str, Any] | None = None, + max_count=-1, + ) -> Iterator[tuple[str, dict[str, Any]]]: + """Find objects in a bucket using a file pattern, as well as a custom filter function.""" + + base_dir = base_dir or "" + + log.info( + "search bucket %s for files matching %s", + self._bucket_name, + file_pattern.pattern, + ) + + def objectname(object_name: str) -> str: + if object_name.startswith(self._path_prefix): + object_name = object_name.replace(self._path_prefix, "", 1) + if object_name.startswith("/"): + object_name = object_name[1:] + return object_name + + def item_filter(item: dict[str, Any]) -> bool: + if file_filter is None: + return True + + return all(re.match(value, item[key]) for key, value in file_filter.items()) + + try: + all_objects = list(self._minio_client.list_objects(self._bucket_name, base_dir, recursive=True)) + + num_loaded = 0 + num_total = len(all_objects) + num_filtered = 0 + for obj in all_objects: + match = file_pattern.match(obj.object_name) # type: ignore + if match: + group = match.groupdict() + if item_filter(group): + yield (objectname(obj.object_name), group) # type: ignore + num_loaded += 1 + if max_count > 0 and num_loaded >= max_count: + break + else: + num_filtered += 1 + else: + num_filtered += 1 + if progress is not None: + progress( + _create_progress_status(num_loaded, num_filtered, num_total) + ) + except Exception: + log.exception( + "Error finding objects: base_dir=%s, file_pattern=%s, file_filter=%s", + base_dir, + file_pattern, + file_filter, + ) + raise + + async def get( + self, key: str, as_bytes: bool | None = False, encoding: str | None = None + ) -> Any: + """Get a value from the storage.""" + try: + key = self._keyname(key) + response = self._minio_client.get_object(self._bucket_name, key) + data = response.read() + response.close() + response.release_conn() + if not as_bytes: + coding = encoding or "utf-8" + data = data.decode(coding) + except Exception: + log.exception("Error getting key %s", key) + return None + else: + return data + + async def set(self, key: str, value: Any, encoding: str | None = None) -> None: + """Set a value in the storage.""" + try: + print("aaaaaaaa") + print(self._path_prefix) + key = self._keyname(key) + print("bbbbbbbbbb") + if isinstance(value, bytes): + data = value + else: + coding = encoding or "utf-8" + data = value.encode(coding) + + # self._minio_client.put_object( + # self._bucket_name, key, data, length=len(data) # type: ignore + # ) + binary_io_data = io.BytesIO(data) + self._minio_client.put_object( + self._bucket_name, + object_name=key, + data=binary_io_data, + length=len(data), + content_type="application/json" + ) + except Exception: + log.exception("Error setting key %s: %s", key) + + async def has(self, key: str) -> bool: + """Check if a key exists in the storage.""" + key = self._keyname(key) + try: + self._minio_client.stat_object(self._bucket_name, key) + return True + except S3Error: + return False + + async def delete(self, key: str) -> None: + """Delete a key from the storage.""" + key = self._keyname(key) + self._minio_client.remove_object(self._bucket_name, key) + + async def clear(self) -> None: + """Clear the storage.""" + all_objects = list(self._minio_client.list_objects(self._bucket_name, recursive=True)) + for obj in all_objects: + self._minio_client.remove_object(self._bucket_name, obj.object_name) # type: ignore + + def child(self, name: str | None) -> "PipelineStorage": + """Create a child storage instance.""" + if name is None: + return self + path = str(Path(self._path_prefix) / name) + return MinioPipelineStorage( + self.endpoint, + self.access_key, + self.secret_key, + self._bucket_name, + secure=self.secure, + encoding=self._encoding, + path_prefix=path, + ) + + def _keyname(self, key: str) -> str: + """Get the key name.""" + time = f"{datetime.now(tz=timezone.utc).strftime('%Y-%m-%d-%H:%M:%S:%f')}" + self._path_prefix = self._path_prefix.replace("${timestamp}",time) + object_name = f"{self._path_prefix}" + return str(Path(object_name) / key) + +def create_minio_storage( + endpoint: str, + access_key: str, + secret_key: str, + bucket_name: str, + base_dir: str | None = None, + secure: bool = True +) -> PipelineStorage: + """Create a MinIO based storage.""" + log.info("Creating MinIO storage at %s", bucket_name) + if bucket_name is None: + error_message = "No bucket name provided for MinIO storage." + raise ValueError(error_message) + return MinioPipelineStorage( + endpoint, access_key, secret_key, bucket_name, False, path_prefix=base_dir + ) + +def validate_bucket_name(bucket_name: str): + """ + Check if the provided bucket name is valid based on MinIO rules. + + MinIO follows the same bucket naming rules as AWS S3. + """ + # Check the length of the name + if len(bucket_name) < 3 or len(bucket_name) > 63: + return ValueError( + f"Bucket name must be between 3 and 63 characters in length. Name provided was {len(bucket_name)} characters long." + ) + + # Check if the name starts with a letter or number + if not bucket_name[0].isalnum(): + return ValueError( + f"Bucket name must start with a letter or number. Starting character was {bucket_name[0]}." + ) + + # Check for valid characters (letters, numbers, hyphen) and lowercase letters + if not re.match("^[a-z0-9-]+$", bucket_name): + return ValueError( + f"Bucket name must only contain:\n- lowercase letters\n- numbers\n- or hyphens\nName provided was {bucket_name}." + ) + + # Check for consecutive hyphens + if "--" in bucket_name: + return ValueError( + f"Bucket name cannot contain consecutive hyphens. Name provided was {bucket_name}." + ) + + # Check for hyphens at the end of the name + if bucket_name[-1] == "-": + return ValueError( + f"Bucket name cannot end with a hyphen. Name provided was {bucket_name}." + ) + + return True + +def _create_progress_status( + num_loaded: int, num_filtered: int, num_total: int +) -> Progress: + return Progress( + total_items=num_total, + completed_items=num_loaded + num_filtered, + description=f"{num_loaded} files loaded ({num_filtered} filtered)", + ) \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index c39b4a0089..b890983089 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1440,6 +1440,64 @@ files = [ {file = "graspologic_native-1.2.1.tar.gz", hash = "sha256:72b7586028a91e9fef9af0ef314d368f0240c18dca99e6e6c546334359a8610a"}, ] +[[package]] +name = "grpcio" +version = "1.63.0" +description = "HTTP/2-based RPC framework" +optional = false +python-versions = ">=3.8" +files = [ + {file = "grpcio-1.63.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:2e93aca840c29d4ab5db93f94ed0a0ca899e241f2e8aec6334ab3575dc46125c"}, + {file = "grpcio-1.63.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:91b73d3f1340fefa1e1716c8c1ec9930c676d6b10a3513ab6c26004cb02d8b3f"}, + {file = "grpcio-1.63.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:b3afbd9d6827fa6f475a4f91db55e441113f6d3eb9b7ebb8fb806e5bb6d6bd0d"}, + {file = "grpcio-1.63.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f3f6883ce54a7a5f47db43289a0a4c776487912de1a0e2cc83fdaec9685cc9f"}, + {file = "grpcio-1.63.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf8dae9cc0412cb86c8de5a8f3be395c5119a370f3ce2e69c8b7d46bb9872c8d"}, + {file = "grpcio-1.63.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:08e1559fd3b3b4468486b26b0af64a3904a8dbc78d8d936af9c1cf9636eb3e8b"}, + {file = "grpcio-1.63.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5c039ef01516039fa39da8a8a43a95b64e288f79f42a17e6c2904a02a319b357"}, + {file = "grpcio-1.63.0-cp310-cp310-win32.whl", hash = "sha256:ad2ac8903b2eae071055a927ef74121ed52d69468e91d9bcbd028bd0e554be6d"}, + {file = "grpcio-1.63.0-cp310-cp310-win_amd64.whl", hash = "sha256:b2e44f59316716532a993ca2966636df6fbe7be4ab6f099de6815570ebe4383a"}, + {file = "grpcio-1.63.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:f28f8b2db7b86c77916829d64ab21ff49a9d8289ea1564a2b2a3a8ed9ffcccd3"}, + {file = "grpcio-1.63.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:65bf975639a1f93bee63ca60d2e4951f1b543f498d581869922910a476ead2f5"}, + {file = "grpcio-1.63.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:b5194775fec7dc3dbd6a935102bb156cd2c35efe1685b0a46c67b927c74f0cfb"}, + {file = "grpcio-1.63.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4cbb2100ee46d024c45920d16e888ee5d3cf47c66e316210bc236d5bebc42b3"}, + {file = "grpcio-1.63.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ff737cf29b5b801619f10e59b581869e32f400159e8b12d7a97e7e3bdeee6a2"}, + {file = "grpcio-1.63.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cd1e68776262dd44dedd7381b1a0ad09d9930ffb405f737d64f505eb7f77d6c7"}, + {file = "grpcio-1.63.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:93f45f27f516548e23e4ec3fbab21b060416007dbe768a111fc4611464cc773f"}, + {file = "grpcio-1.63.0-cp311-cp311-win32.whl", hash = "sha256:878b1d88d0137df60e6b09b74cdb73db123f9579232c8456f53e9abc4f62eb3c"}, + {file = "grpcio-1.63.0-cp311-cp311-win_amd64.whl", hash = "sha256:756fed02dacd24e8f488f295a913f250b56b98fb793f41d5b2de6c44fb762434"}, + {file = "grpcio-1.63.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:93a46794cc96c3a674cdfb59ef9ce84d46185fe9421baf2268ccb556f8f81f57"}, + {file = "grpcio-1.63.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a7b19dfc74d0be7032ca1eda0ed545e582ee46cd65c162f9e9fc6b26ef827dc6"}, + {file = "grpcio-1.63.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:8064d986d3a64ba21e498b9a376cbc5d6ab2e8ab0e288d39f266f0fca169b90d"}, + {file = "grpcio-1.63.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:219bb1848cd2c90348c79ed0a6b0ea51866bc7e72fa6e205e459fedab5770172"}, + {file = "grpcio-1.63.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2d60cd1d58817bc5985fae6168d8b5655c4981d448d0f5b6194bbcc038090d2"}, + {file = "grpcio-1.63.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e350cb096e5c67832e9b6e018cf8a0d2a53b2a958f6251615173165269a91b0"}, + {file = "grpcio-1.63.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:56cdf96ff82e3cc90dbe8bac260352993f23e8e256e063c327b6cf9c88daf7a9"}, + {file = "grpcio-1.63.0-cp312-cp312-win32.whl", hash = "sha256:3a6d1f9ea965e750db7b4ee6f9fdef5fdf135abe8a249e75d84b0a3e0c668a1b"}, + {file = "grpcio-1.63.0-cp312-cp312-win_amd64.whl", hash = "sha256:d2497769895bb03efe3187fb1888fc20e98a5f18b3d14b606167dacda5789434"}, + {file = "grpcio-1.63.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:fdf348ae69c6ff484402cfdb14e18c1b0054ac2420079d575c53a60b9b2853ae"}, + {file = "grpcio-1.63.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a3abfe0b0f6798dedd2e9e92e881d9acd0fdb62ae27dcbbfa7654a57e24060c0"}, + {file = "grpcio-1.63.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:6ef0ad92873672a2a3767cb827b64741c363ebaa27e7f21659e4e31f4d750280"}, + {file = "grpcio-1.63.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b416252ac5588d9dfb8a30a191451adbf534e9ce5f56bb02cd193f12d8845b7f"}, + {file = "grpcio-1.63.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3b77eaefc74d7eb861d3ffbdf91b50a1bb1639514ebe764c47773b833fa2d91"}, + {file = "grpcio-1.63.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b005292369d9c1f80bf70c1db1c17c6c342da7576f1c689e8eee4fb0c256af85"}, + {file = "grpcio-1.63.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cdcda1156dcc41e042d1e899ba1f5c2e9f3cd7625b3d6ebfa619806a4c1aadda"}, + {file = "grpcio-1.63.0-cp38-cp38-win32.whl", hash = "sha256:01799e8649f9e94ba7db1aeb3452188048b0019dc37696b0f5ce212c87c560c3"}, + {file = "grpcio-1.63.0-cp38-cp38-win_amd64.whl", hash = "sha256:6a1a3642d76f887aa4009d92f71eb37809abceb3b7b5a1eec9c554a246f20e3a"}, + {file = "grpcio-1.63.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:75f701ff645858a2b16bc8c9fc68af215a8bb2d5a9b647448129de6e85d52bce"}, + {file = "grpcio-1.63.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cacdef0348a08e475a721967f48206a2254a1b26ee7637638d9e081761a5ba86"}, + {file = "grpcio-1.63.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:0697563d1d84d6985e40ec5ec596ff41b52abb3fd91ec240e8cb44a63b895094"}, + {file = "grpcio-1.63.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6426e1fb92d006e47476d42b8f240c1d916a6d4423c5258ccc5b105e43438f61"}, + {file = "grpcio-1.63.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e48cee31bc5f5a31fb2f3b573764bd563aaa5472342860edcc7039525b53e46a"}, + {file = "grpcio-1.63.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:50344663068041b34a992c19c600236e7abb42d6ec32567916b87b4c8b8833b3"}, + {file = "grpcio-1.63.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:259e11932230d70ef24a21b9fb5bb947eb4703f57865a404054400ee92f42f5d"}, + {file = "grpcio-1.63.0-cp39-cp39-win32.whl", hash = "sha256:a44624aad77bf8ca198c55af811fd28f2b3eaf0a50ec5b57b06c034416ef2d0a"}, + {file = "grpcio-1.63.0-cp39-cp39-win_amd64.whl", hash = "sha256:166e5c460e5d7d4656ff9e63b13e1f6029b122104c1633d5f37eaea348d7356d"}, + {file = "grpcio-1.63.0.tar.gz", hash = "sha256:f3023e14805c61bc439fb40ca545ac3d5740ce66120a678a3c6c2c55b70343d1"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.63.0)"] + [[package]] name = "h11" version = "0.14.0" @@ -2478,6 +2536,40 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "milvus-lite" +version = "2.4.9" +description = "A lightweight version of Milvus wrapped with Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "milvus_lite-2.4.9-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d3e617b3d68c09ad656d54bc3d8cc4ef6ef56c54015e1563d4fe4bcec6b7c90a"}, + {file = "milvus_lite-2.4.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6e7029282d6829b277ebb92f64e2370be72b938e34770e1eb649346bda5d1d7f"}, + {file = "milvus_lite-2.4.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9b8e991e4e433596f6a399a165c1a506f823ec9133332e03d7f8a114bff4550d"}, + {file = "milvus_lite-2.4.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:7f53e674602101cfbcf0a4a59d19eaa139dfd5580639f3040ad73d901f24fc0b"}, +] + +[package.dependencies] +tqdm = "*" + +[[package]] +name = "minio" +version = "7.2.4" +description = "MinIO Python SDK for Amazon S3 Compatible Cloud Storage" +optional = false +python-versions = "*" +files = [ + {file = "minio-7.2.4-py3-none-any.whl", hash = "sha256:91b51c21d25e3ee6d51f52eab126d6c974371add0d77951e42c322a59c5533e7"}, + {file = "minio-7.2.4.tar.gz", hash = "sha256:d504d8464e5198fb74dd9b572cc88b185ae7997c17705e8c09f3fef2f439d984"}, +] + +[package.dependencies] +argon2-cffi = "*" +certifi = "*" +pycryptodome = "*" +typing-extensions = "*" +urllib3 = "*" + [[package]] name = "mistune" version = "3.0.2" @@ -3249,6 +3341,26 @@ files = [ [package.dependencies] wcwidth = "*" +[[package]] +name = "protobuf" +version = "5.27.3" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "protobuf-5.27.3-cp310-abi3-win32.whl", hash = "sha256:dcb307cd4ef8fec0cf52cb9105a03d06fbb5275ce6d84a6ae33bc6cf84e0a07b"}, + {file = "protobuf-5.27.3-cp310-abi3-win_amd64.whl", hash = "sha256:16ddf3f8c6c41e1e803da7abea17b1793a97ef079a912e42351eabb19b2cffe7"}, + {file = "protobuf-5.27.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:68248c60d53f6168f565a8c76dc58ba4fa2ade31c2d1ebdae6d80f969cdc2d4f"}, + {file = "protobuf-5.27.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:b8a994fb3d1c11156e7d1e427186662b64694a62b55936b2b9348f0a7c6625ce"}, + {file = "protobuf-5.27.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:a55c48f2a2092d8e213bd143474df33a6ae751b781dd1d1f4d953c128a415b25"}, + {file = "protobuf-5.27.3-cp38-cp38-win32.whl", hash = "sha256:043853dcb55cc262bf2e116215ad43fa0859caab79bb0b2d31b708f128ece035"}, + {file = "protobuf-5.27.3-cp38-cp38-win_amd64.whl", hash = "sha256:c2a105c24f08b1e53d6c7ffe69cb09d0031512f0b72f812dd4005b8112dbe91e"}, + {file = "protobuf-5.27.3-cp39-cp39-win32.whl", hash = "sha256:c84eee2c71ed83704f1afbf1a85c3171eab0fd1ade3b399b3fad0884cbcca8bf"}, + {file = "protobuf-5.27.3-cp39-cp39-win_amd64.whl", hash = "sha256:af7c0b7cfbbb649ad26132e53faa348580f844d9ca46fd3ec7ca48a1ea5db8a1"}, + {file = "protobuf-5.27.3-py3-none-any.whl", hash = "sha256:8572c6533e544ebf6899c360e91d6bcbbee2549251643d32c52cf8a5de295ba5"}, + {file = "protobuf-5.27.3.tar.gz", hash = "sha256:82460903e640f2b7e34ee81a947fdaad89de796d324bcbc38ff5430bcdead82c"}, +] + [[package]] name = "psutil" version = "6.0.0" @@ -3390,6 +3502,47 @@ files = [ {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] +[[package]] +name = "pycryptodome" +version = "3.20.0" +description = "Cryptographic library for Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "pycryptodome-3.20.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:f0e6d631bae3f231d3634f91ae4da7a960f7ff87f2865b2d2b831af1dfb04e9a"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:baee115a9ba6c5d2709a1e88ffe62b73ecc044852a925dcb67713a288c4ec70f"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:417a276aaa9cb3be91f9014e9d18d10e840a7a9b9a9be64a42f553c5b50b4d1d"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a1250b7ea809f752b68e3e6f3fd946b5939a52eaeea18c73bdab53e9ba3c2dd"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:d5954acfe9e00bc83ed9f5cb082ed22c592fbbef86dc48b907238be64ead5c33"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-win32.whl", hash = "sha256:06d6de87c19f967f03b4cf9b34e538ef46e99a337e9a61a77dbe44b2cbcf0690"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-win_amd64.whl", hash = "sha256:ec0bb1188c1d13426039af8ffcb4dbe3aad1d7680c35a62d8eaf2a529b5d3d4f"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5601c934c498cd267640b57569e73793cb9a83506f7c73a8ec57a516f5b0b091"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d29daa681517f4bc318cd8a23af87e1f2a7bad2fe361e8aa29c77d652a065de4"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3427d9e5310af6680678f4cce149f54e0bb4af60101c7f2c16fdf878b39ccccc"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:3cd3ef3aee1079ae44afaeee13393cf68b1058f70576b11439483e34f93cf818"}, + {file = "pycryptodome-3.20.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac1c7c0624a862f2e53438a15c9259d1655325fc2ec4392e66dc46cdae24d044"}, + {file = "pycryptodome-3.20.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:76658f0d942051d12a9bd08ca1b6b34fd762a8ee4240984f7c06ddfb55eaf15a"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f35d6cee81fa145333137009d9c8ba90951d7d77b67c79cbe5f03c7eb74d8fe2"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76cb39afede7055127e35a444c1c041d2e8d2f1f9c121ecef573757ba4cd2c3c"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a4c4dc60b78ec41d2afa392491d788c2e06edf48580fbfb0dd0f828af49d25"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fb3b87461fa35afa19c971b0a2b7456a7b1db7b4eba9a8424666104925b78128"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:acc2614e2e5346a4a4eab6e199203034924313626f9620b7b4b38e9ad74b7e0c"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:210ba1b647837bfc42dd5a813cdecb5b86193ae11a3f5d972b9a0ae2c7e9e4b4"}, + {file = "pycryptodome-3.20.0-cp35-abi3-win32.whl", hash = "sha256:8d6b98d0d83d21fb757a182d52940d028564efe8147baa9ce0f38d057104ae72"}, + {file = "pycryptodome-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:9b3ae153c89a480a0ec402e23db8d8d84a3833b65fa4b15b81b83be9d637aab9"}, + {file = "pycryptodome-3.20.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:4401564ebf37dfde45d096974c7a159b52eeabd9969135f0426907db367a652a"}, + {file = "pycryptodome-3.20.0-pp27-pypy_73-win32.whl", hash = "sha256:ec1f93feb3bb93380ab0ebf8b859e8e5678c0f010d2d78367cf6bc30bfeb148e"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:acae12b9ede49f38eb0ef76fdec2df2e94aad85ae46ec85be3648a57f0a7db04"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f47888542a0633baff535a04726948e876bf1ed880fddb7c10a736fa99146ab3"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e0e4a987d38cfc2e71b4a1b591bae4891eeabe5fa0f56154f576e26287bfdea"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c18b381553638414b38705f07d1ef0a7cf301bc78a5f9bc17a957eb19446834b"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a60fedd2b37b4cb11ccb5d0399efe26db9e0dd149016c1cc6c8161974ceac2d6"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:405002eafad114a2f9a930f5db65feef7b53c4784495dd8758069b89baf68eab"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ab6ab0cb755154ad14e507d1df72de9897e99fd2d4922851a276ccc14f4f1a5"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:acf6e43fa75aca2d33e93409f2dafe386fe051818ee79ee8a3e21de9caa2ac9e"}, + {file = "pycryptodome-3.20.0.tar.gz", hash = "sha256:09609209ed7de61c2b560cc5c8c4fbf892f8b15b1faf7e4cbffac97db1fffda7"}, +] + [[package]] name = "pydantic" version = "2.8.2" @@ -3570,6 +3723,35 @@ ray = ["ray[data]"] tests = ["boto3", "datasets", "duckdb", "h5py (<3.11)", "ml-dtypes", "pandas", "pillow", "polars[pandas,pyarrow]", "pytest", "tensorflow", "tqdm"] torch = ["torch"] +[[package]] +name = "pymilvus" +version = "2.5.0rc68" +description = "Python Sdk for Milvus" +optional = false +python-versions = ">=3.8" +files = [] +develop = false + +[package.dependencies] +environs = "<=11.0.0" +grpcio = ">=1.49.1,<=1.63.0" +milvus-lite = {version = ">=2.4.0", markers = "sys_platform != \"win32\""} +pandas = ">=1.2.4" +protobuf = ">=3.20.0" +setuptools = ">69" +ujson = ">=2.0.0" + +[package.extras] +bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "requests"] +dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"] +model = ["milvus-model (>=0.1.0)"] + +[package.source] +type = "git" +url = "https://github.com/likenamehaojie/pymilvus.git" +reference = "HEAD" +resolved_reference = "ed46807b1803a1506a216a91bf1d4bbd49873382" + [[package]] name = "pynndescent" version = "0.5.13" @@ -4891,6 +5073,93 @@ files = [ [package.extras] test = ["coverage", "pytest", "pytest-cov"] +[[package]] +name = "ujson" +version = "5.10.0" +description = "Ultra fast JSON encoder and decoder for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, + {file = "ujson-5.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:348898dd702fc1c4f1051bc3aacbf894caa0927fe2c53e68679c073375f732cf"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22cffecf73391e8abd65ef5f4e4dd523162a3399d5e84faa6aebbf9583df86d6"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26b0e2d2366543c1bb4fbd457446f00b0187a2bddf93148ac2da07a53fe51569"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:caf270c6dba1be7a41125cd1e4fc7ba384bf564650beef0df2dd21a00b7f5770"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a245d59f2ffe750446292b0094244df163c3dc96b3ce152a2c837a44e7cda9d1"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:94a87f6e151c5f483d7d54ceef83b45d3a9cca7a9cb453dbdbb3f5a6f64033f5"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:29b443c4c0a113bcbb792c88bea67b675c7ca3ca80c3474784e08bba01c18d51"}, + {file = "ujson-5.10.0-cp310-cp310-win32.whl", hash = "sha256:c18610b9ccd2874950faf474692deee4223a994251bc0a083c114671b64e6518"}, + {file = "ujson-5.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:924f7318c31874d6bb44d9ee1900167ca32aa9b69389b98ecbde34c1698a250f"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a5b366812c90e69d0f379a53648be10a5db38f9d4ad212b60af00bd4048d0f00"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:502bf475781e8167f0f9d0e41cd32879d120a524b22358e7f205294224c71126"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b91b5d0d9d283e085e821651184a647699430705b15bf274c7896f23fe9c9d8"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:129e39af3a6d85b9c26d5577169c21d53821d8cf68e079060602e861c6e5da1b"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f77b74475c462cb8b88680471193064d3e715c7c6074b1c8c412cb526466efe9"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ec0ca8c415e81aa4123501fee7f761abf4b7f386aad348501a26940beb1860f"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab13a2a9e0b2865a6c6db9271f4b46af1c7476bfd51af1f64585e919b7c07fd4"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57aaf98b92d72fc70886b5a0e1a1ca52c2320377360341715dd3933a18e827b1"}, + {file = "ujson-5.10.0-cp311-cp311-win32.whl", hash = "sha256:2987713a490ceb27edff77fb184ed09acdc565db700ee852823c3dc3cffe455f"}, + {file = "ujson-5.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f00ea7e00447918ee0eff2422c4add4c5752b1b60e88fcb3c067d4a21049a720"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98ba15d8cbc481ce55695beee9f063189dce91a4b08bc1d03e7f0152cd4bbdd5"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9d2edbf1556e4f56e50fab7d8ff993dbad7f54bac68eacdd27a8f55f433578e"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6627029ae4f52d0e1a2451768c2c37c0c814ffc04f796eb36244cf16b8e57043"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ccb77b3e40b151e20519c6ae6d89bfe3f4c14e8e210d910287f778368bb3d1"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3caf9cd64abfeb11a3b661329085c5e167abbe15256b3b68cb5d914ba7396f3"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6e32abdce572e3a8c3d02c886c704a38a1b015a1fb858004e03d20ca7cecbb21"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a65b6af4d903103ee7b6f4f5b85f1bfd0c90ba4eeac6421aae436c9988aa64a2"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:604a046d966457b6cdcacc5aa2ec5314f0e8c42bae52842c1e6fa02ea4bda42e"}, + {file = "ujson-5.10.0-cp312-cp312-win32.whl", hash = "sha256:6dea1c8b4fc921bf78a8ff00bbd2bfe166345f5536c510671bccececb187c80e"}, + {file = "ujson-5.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:38665e7d8290188b1e0d57d584eb8110951a9591363316dd41cf8686ab1d0abc"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:618efd84dc1acbd6bff8eaa736bb6c074bfa8b8a98f55b61c38d4ca2c1f7f287"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38d5d36b4aedfe81dfe251f76c0467399d575d1395a1755de391e58985ab1c2e"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67079b1f9fb29ed9a2914acf4ef6c02844b3153913eb735d4bf287ee1db6e557"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0e0ceeb8fe2468c70ec0c37b439dd554e2aa539a8a56365fd761edb418988"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59e02cd37bc7c44d587a0ba45347cc815fb7a5fe48de16bf05caa5f7d0d2e816"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a890b706b64e0065f02577bf6d8ca3b66c11a5e81fb75d757233a38c07a1f20"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:621e34b4632c740ecb491efc7f1fcb4f74b48ddb55e65221995e74e2d00bbff0"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9500e61fce0cfc86168b248104e954fead61f9be213087153d272e817ec7b4f"}, + {file = "ujson-5.10.0-cp313-cp313-win32.whl", hash = "sha256:4c4fc16f11ac1612f05b6f5781b384716719547e142cfd67b65d035bd85af165"}, + {file = "ujson-5.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:4573fd1695932d4f619928fd09d5d03d917274381649ade4328091ceca175539"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a984a3131da7f07563057db1c3020b1350a3e27a8ec46ccbfbf21e5928a43050"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73814cd1b9db6fc3270e9d8fe3b19f9f89e78ee9d71e8bd6c9a626aeaeaf16bd"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e1591ed9376e5eddda202ec229eddc56c612b61ac6ad07f96b91460bb6c2fb"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c75269f8205b2690db4572a4a36fe47cd1338e4368bc73a7a0e48789e2e35a"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7223f41e5bf1f919cd8d073e35b229295aa8e0f7b5de07ed1c8fddac63a6bc5d"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc2fd6b3067c0782e7002ac3b38cf48608ee6366ff176bbd02cf969c9c20fe"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:232cc85f8ee3c454c115455195a205074a56ff42608fd6b942aa4c378ac14dd7"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cc6139531f13148055d691e442e4bc6601f6dba1e6d521b1585d4788ab0bfad4"}, + {file = "ujson-5.10.0-cp38-cp38-win32.whl", hash = "sha256:e7ce306a42b6b93ca47ac4a3b96683ca554f6d35dd8adc5acfcd55096c8dfcb8"}, + {file = "ujson-5.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e82d4bb2138ab05e18f089a83b6564fee28048771eb63cdecf4b9b549de8a2cc"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dfef2814c6b3291c3c5f10065f745a1307d86019dbd7ea50e83504950136ed5b"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4734ee0745d5928d0ba3a213647f1c4a74a2a28edc6d27b2d6d5bd9fa4319e27"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ebb01bd865fdea43da56254a3930a413f0c5590372a1241514abae8aa7c76"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee5e97c2496874acbf1d3e37b521dd1f307349ed955e62d1d2f05382bc36dd5"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7490655a2272a2d0b072ef16b0b58ee462f4973a8f6bbe64917ce5e0a256f9c0"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba17799fcddaddf5c1f75a4ba3fd6441f6a4f1e9173f8a786b42450851bd74f1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2aff2985cef314f21d0fecc56027505804bc78802c0121343874741650a4d3d1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ad88ac75c432674d05b61184178635d44901eb749786c8eb08c102330e6e8996"}, + {file = "ujson-5.10.0-cp39-cp39-win32.whl", hash = "sha256:2544912a71da4ff8c4f7ab5606f947d7299971bdd25a45e008e467ca638d13c9"}, + {file = "ujson-5.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ff201d62b1b177a46f113bb43ad300b424b7847f9c5d38b1b4ad8f75d4a282a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b6fee72fa77dc172a28f21693f64d93166534c263adb3f96c413ccc85ef6e64"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:61d0af13a9af01d9f26d2331ce49bb5ac1fb9c814964018ac8df605b5422dcb3"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb24f0bdd899d368b715c9e6664166cf694d1e57be73f17759573a6986dd95a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbd8fd427f57a03cff3ad6574b5e299131585d9727c8c366da4624a9069ed746"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beeaf1c48e32f07d8820c705ff8e645f8afa690cca1544adba4ebfa067efdc88"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:baed37ea46d756aca2955e99525cc02d9181de67f25515c468856c38d52b5f3b"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7663960f08cd5a2bb152f5ee3992e1af7690a64c0e26d31ba7b3ff5b2ee66337"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8640fb4072d36b08e95a3a380ba65779d356b2fee8696afeb7794cf0902d0a1"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78778a3aa7aafb11e7ddca4e29f46bc5139131037ad628cc10936764282d6753"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0111b27f2d5c820e7f2dbad7d48e3338c824e7ac4d2a12da3dc6061cc39c8e6"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c66962ca7565605b355a9ed478292da628b8f18c0f2793021ca4425abf8b01e5"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba43cc34cce49cf2d4bc76401a754a81202d8aa926d0e2b79f0ee258cb15d3a4"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac56eb983edce27e7f51d05bc8dd820586c6e6be1c5216a6809b0c668bb312b8"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44bd4b23a0e723bf8b10628288c2c7c335161d6840013d4d5de20e48551773b"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c10f4654e5326ec14a46bcdeb2b685d4ada6911050aa8baaf3501e57024b804"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de4971a89a762398006e844ae394bd46991f7c385d7a6a3b93ba229e6dac17e"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1402f0564a97d2a52310ae10a64d25bcef94f8dd643fcf5d310219d915484f7"}, + {file = "ujson-5.10.0.tar.gz", hash = "sha256:b3cd8f3c5d8c7738257f1018880444f7b7d9b66232c64649f562d7ba86ad4bc1"}, +] + [[package]] name = "umap-learn" version = "0.5.6" @@ -5165,4 +5434,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "45b9e71e0d705b4332863430cf5501b8323fb2bfde37b26170298e429c084c4c" +content-hash = "0f7128484423d7f32dff20165a1c9b8fe7963d9727b43e8d3cba977e2e6311dd" diff --git a/pyproject.toml b/pyproject.toml index 406516570b..11d526c3fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,8 @@ format-jinja = """ python = ">=3.10,<3.13" environs = "^11.0.0" datashaper = "^0.0.49" - +minio="7.2.4" +pymilvus = {git = "https://github.com/likenamehaojie/pymilvus.git"} # Vector Stores azure-search-documents = "^11.4.0" lancedb = "^0.11.0"