vessl-ai · nsd9696 · Feb 1, 2025 · Jan 8, 2025 · Jan 27, 2025 · Jan 28, 2025
diff --git a/...vesslai/llama_index_vesslai_example.ipynb → docs/docs/examples/llm/vesslai_llm.ipynb b/...vesslai/llama_index_vesslai_example.ipynb → docs/docs/examples/llm/vesslai_llm.ipynb
@@ -29,6 +29,7 @@
     "    service_name=\"llama-index-vesslai-test\",\n",
     "    model_name=\"mistralai/Mistral-7B-Instruct-v0.3\",\n",
     "    hf_token=\"HF_TOKEN\",\n",
+    "    api_key=\"OPENAI_API_KEY\",\n",
     ")"
    ]
   },

diff --git a/llama-index-integrations/llms/llama-index-llms-vesslai/README.md b/llama-index-integrations/llms/llama-index-llms-vesslai/README.md
@@ -1 +1,40 @@
 # LlamaIndex Llms Integration: VESSL AI
+
+[VESSL AI](https://vessl.ai/) is an MLOps platform for professional AI & LLM teams. VESSL supports both Multi-Cloud and On-Premise systems, helping ML teams seamlessly manage their workloads and data.
+
+## LlamaIndex VESSL AI LLM Provider
+
+The VESSL AI LLM Provider helps AI Engineers serve their models efficiently using the `serve` method. If your team has already deployed an LLM model with VESSL and has an endpoint, you can easily connect to that endpoint and integrate it into your LlamaIndex workflow. The VESSL AI LLM Provider functions similarly to an OPENAI-like object.
+
+1. To serve a Hugging Face model, use the `serve` method:
+
+```python
+# 1 Serve with Hugging Face model name
+llm.serve(
+    service_name="llama-index-vesslai-test",
+    model_name="mistralai/Mistral-7B-Instruct-v0.3",
+    hf_token="HF_TOKEN",
+    api_key="OPENAI-API-KEY",
+)
+```
+
+2. If you have your own VESSL YAML file for serving, you can deploy the workload using the YAML file:
+
+```python
+# 2 Serve with YAML file
+llm.serve(
+    service_name="llama-index-vesslai-test",
+    yaml_path="/your/own/service_yaml_file.yaml",
+    api_key="OPENAI-API-KEY",
+)
+```
+
+3. If you have a pre-served LLM, you can simply connect to the endpoint:
+
+```python
+# 3 Connect with pre-served endpoint
+llm.connect(
+    served_model_name="mistralai/Mistral-7B-Instruct-v0.3",
+    endpoint="https://model-service-gateway-abc.oregon.google-cluster.vessl.ai/v1",
+)
+```
diff --git a/llama-index-integrations/llms/llama-index-llms-vesslai/llama_index/llms/vesslai/base.py b/llama-index-integrations/llms/llama-index-llms-vesslai/llama_index/llms/vesslai/base.py
@@ -1,6 +1,8 @@
 import os
 import vessl.serving
 import yaml
+import asyncio
+import nest_asyncio
 from typing import Any, Optional
 from pydantic import BaseModel
 
@@ -57,12 +59,14 @@ class VesslAILLM(OpenAILike, BaseModel):
     organization_name: str = None
     default_service_yaml: str = "vesslai_vllm.yaml"
 
-    def __init__(self, **kwargs: Any) -> None:
+    def __init__(self, force_update_access_token: bool = False, **kwargs: Any) -> None:
+        nest_asyncio.apply()
         super().__init__()
-        self._configure()
 
-    def _configure(self) -> None:
-        vessl.configure()
+        self._configure(force_update_access_token=force_update_access_token)
+
+    def _configure(self, force_update_access_token) -> None:
+        vessl.configure(force_update_access_token=force_update_access_token)
         if vessl.vessl_api.is_in_run_exec_context():
             vessl.vessl_api.set_access_token(no_prompt=True)
             user = vessl.vessl_api.user
@@ -102,6 +106,50 @@ def serve(
         force_relaunch: bool = False,
         **kwargs: Any,
     ) -> None:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            task = loop.create_task(
+                self._serve(
+                    service_name=service_name,
+                    model_name=model_name,
+                    yaml_path=yaml_path,
+                    is_chat_model=is_chat_model,
+                    serverless=serverless,
+                    api_key=api_key,
+                    service_auth_key=service_auth_key,
+                    force_relaunch=force_relaunch,
+                    **kwargs,
+                )
+            )
+            loop.run_until_complete(task)
+        else:
+            asyncio.run(
+                self._serve(
+                    service_name=service_name,
+                    model_name=model_name,
+                    yaml_path=yaml_path,
+                    is_chat_model=is_chat_model,
+                    serverless=serverless,
+                    api_key=api_key,
+                    service_auth_key=service_auth_key,
+                    force_relaunch=force_relaunch,
+                    **kwargs,
+                )
+            )
+
+    async def _serve(
+        self,
+        service_name: str,
+        model_name: Optional[str] = None,
+        yaml_path: Optional[str] = None,
+        is_chat_model: bool = True,
+        serverless: bool = False,
+        api_key: str = None,
+        service_auth_key: str = None,
+        force_relaunch: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        print("Launching VesslAI LLM Service")
         self.organization_name = kwargs.get("organization_name", self.organization_name)
         self._validate_openai_key(api_key=api_key)
 
@@ -159,7 +207,7 @@ def serve(
                 )
                 return
 
-        self.api_base = self._launch_service_revision_from_yaml(
+        self.api_base = await self._launch_service_revision_from_yaml(
             organization_name=self.organization_name,
             yaml_path=serve_yaml_path,
             service_name=service_name,
@@ -210,7 +258,7 @@ def _build_model_serve_config(
         service_config["env"]["MODEL_NAME"] = model_name
         return service_config
 
-    def _launch_service_revision_from_yaml(
+    async def _launch_service_revision_from_yaml(
         self,
         organization_name: str,
         yaml_path: str,
@@ -244,7 +292,7 @@ def _launch_service_revision_from_yaml(
         print(f"Check your Service at: {service_url}")
 
         gateway = read_service(service_name=service_name).gateway_config
-        wait_for_gateway_enabled(
+        await wait_for_gateway_enabled(
             gateway=gateway, service_name=revision.model_service_name
         )
 

diff --git a/llama-index-integrations/llms/llama-index-llms-vesslai/llama_index/llms/vesslai/utils.py b/llama-index-integrations/llms/llama-index-llms-vesslai/llama_index/llms/vesslai/utils.py
@@ -1,10 +1,11 @@
+import asyncio
 import time
 import yaml
 from typing import Any
 from vessl import vessl_api
 
 
-def wait_for_gateway_enabled(
+async def wait_for_gateway_enabled(
     gateway: Any, service_name: str, max_timeout_sec: int = 8 * 60
 ) -> bool:
     """Waits for the gateway of a service to be enabled.
@@ -146,7 +147,7 @@ def _get_recent_rollout(service_name: str) -> Any:
     return resp.rollouts[0] if resp.rollouts else None
 
 
-def abort_in_progress_rollout_by_name(service_name: str):
+async def abort_in_progress_rollout_by_name(service_name: str):
     """Aborts an ongoing rollout for a given service.
 
     Args:
@@ -157,7 +158,7 @@ def abort_in_progress_rollout_by_name(service_name: str):
         print(f"The service {service_name} is currently rolling out.")
         if _request_abort_rollout(service_name):
             print("Waiting for the existing rollout to be aborted...")
-            time.sleep(30)
+            await asyncio.sleep(30)
     else:
         print("No existing rollout found.")