Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply comments from llama_index #2

Merged
merged 4 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
" service_name=\"llama-index-vesslai-test\",\n",
" model_name=\"mistralai/Mistral-7B-Instruct-v0.3\",\n",
" hf_token=\"HF_TOKEN\",\n",
" api_key=\"OPENAI_API_KEY\",\n",
")"
]
},
Expand Down
39 changes: 39 additions & 0 deletions llama-index-integrations/llms/llama-index-llms-vesslai/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,40 @@
# LlamaIndex Llms Integration: VESSL AI

[VESSL AI](https://vessl.ai/) is an MLOps platform for professional AI & LLM teams. VESSL supports both Multi-Cloud and On-Premise systems, helping ML teams seamlessly manage their workloads and data.

## LlamaIndex VESSL AI LLM Provider

The VESSL AI LLM Provider helps AI Engineers serve their models efficiently using the `serve` method. If your team has already deployed an LLM model with VESSL and has an endpoint, you can easily connect to that endpoint and integrate it into your LlamaIndex workflow. The VESSL AI LLM Provider functions similarly to an OPENAI-like object.

1. To serve a Hugging Face model, use the `serve` method:

```python
# 1 Serve with Hugging Face model name
llm.serve(
service_name="llama-index-vesslai-test",
model_name="mistralai/Mistral-7B-Instruct-v0.3",
hf_token="HF_TOKEN",
api_key="OPENAI-API-KEY",
)
```

2. If you have your own VESSL YAML file for serving, you can deploy the workload using the YAML file:

```python
# 2 Serve with YAML file
llm.serve(
service_name="llama-index-vesslai-test",
yaml_path="/your/own/service_yaml_file.yaml",
api_key="OPENAI-API-KEY",
)
```

3. If you have a pre-served LLM, you can simply connect to the endpoint:

```python
# 3 Connect with pre-served endpoint
llm.connect(
served_model_name="mistralai/Mistral-7B-Instruct-v0.3",
endpoint="https://model-service-gateway-abc.oregon.google-cluster.vessl.ai/v1",
)
```
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import vessl.serving
import yaml
import asyncio
import nest_asyncio
from typing import Any, Optional
from pydantic import BaseModel

Expand Down Expand Up @@ -57,12 +59,14 @@ class VesslAILLM(OpenAILike, BaseModel):
organization_name: str = None
default_service_yaml: str = "vesslai_vllm.yaml"

def __init__(self, **kwargs: Any) -> None:
def __init__(self, force_update_access_token: bool = False, **kwargs: Any) -> None:
nest_asyncio.apply()
super().__init__()
self._configure()

def _configure(self) -> None:
vessl.configure()
self._configure(force_update_access_token=force_update_access_token)

def _configure(self, force_update_access_token) -> None:
vessl.configure(force_update_access_token=force_update_access_token)
if vessl.vessl_api.is_in_run_exec_context():
vessl.vessl_api.set_access_token(no_prompt=True)
user = vessl.vessl_api.user
Expand Down Expand Up @@ -102,6 +106,50 @@ def serve(
force_relaunch: bool = False,
**kwargs: Any,
) -> None:
loop = asyncio.get_event_loop()
if loop.is_running():
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jupyter 의 경우 이미 running 중인 Loop이 존재하는 상황이라 해당 loop에 task를 추가하는 방식으로 했습니다. 해당 부분 한번 봐주시면 감사하겠습니다

task = loop.create_task(
self._serve(
service_name=service_name,
model_name=model_name,
yaml_path=yaml_path,
is_chat_model=is_chat_model,
serverless=serverless,
api_key=api_key,
service_auth_key=service_auth_key,
force_relaunch=force_relaunch,
**kwargs,
)
)
loop.run_until_complete(task)
else:
asyncio.run(
self._serve(
service_name=service_name,
model_name=model_name,
yaml_path=yaml_path,
is_chat_model=is_chat_model,
serverless=serverless,
api_key=api_key,
service_auth_key=service_auth_key,
force_relaunch=force_relaunch,
**kwargs,
)
)

async def _serve(
self,
service_name: str,
model_name: Optional[str] = None,
yaml_path: Optional[str] = None,
is_chat_model: bool = True,
serverless: bool = False,
api_key: str = None,
service_auth_key: str = None,
force_relaunch: bool = False,
**kwargs: Any,
) -> None:
print("Launching VesslAI LLM Service")
self.organization_name = kwargs.get("organization_name", self.organization_name)
self._validate_openai_key(api_key=api_key)

Expand Down Expand Up @@ -159,7 +207,7 @@ def serve(
)
return

self.api_base = self._launch_service_revision_from_yaml(
self.api_base = await self._launch_service_revision_from_yaml(
organization_name=self.organization_name,
yaml_path=serve_yaml_path,
service_name=service_name,
Expand Down Expand Up @@ -210,7 +258,7 @@ def _build_model_serve_config(
service_config["env"]["MODEL_NAME"] = model_name
return service_config

def _launch_service_revision_from_yaml(
async def _launch_service_revision_from_yaml(
self,
organization_name: str,
yaml_path: str,
Expand Down Expand Up @@ -244,7 +292,7 @@ def _launch_service_revision_from_yaml(
print(f"Check your Service at: {service_url}")

gateway = read_service(service_name=service_name).gateway_config
wait_for_gateway_enabled(
await wait_for_gateway_enabled(
gateway=gateway, service_name=revision.model_service_name
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import asyncio
import time
import yaml
from typing import Any
from vessl import vessl_api


def wait_for_gateway_enabled(
async def wait_for_gateway_enabled(
gateway: Any, service_name: str, max_timeout_sec: int = 8 * 60
) -> bool:
"""Waits for the gateway of a service to be enabled.
Expand Down Expand Up @@ -146,7 +147,7 @@ def _get_recent_rollout(service_name: str) -> Any:
return resp.rollouts[0] if resp.rollouts else None


def abort_in_progress_rollout_by_name(service_name: str):
async def abort_in_progress_rollout_by_name(service_name: str):
"""Aborts an ongoing rollout for a given service.

Args:
Expand All @@ -157,7 +158,7 @@ def abort_in_progress_rollout_by_name(service_name: str):
print(f"The service {service_name} is currently rolling out.")
if _request_abort_rollout(service_name):
print("Waiting for the existing rollout to be aborted...")
time.sleep(30)
await asyncio.sleep(30)
else:
print("No existing rollout found.")

Expand Down
Loading