kubeflow · sefgsefg · Aug 25, 2024 · Aug 25, 2024 · Aug 25, 2024 · Aug 25, 2024
diff --git a/.github/workflows/rag_pipeline.yml b/.github/workflows/rag_pipeline.yml
@@ -0,0 +1,37 @@
+name: rag_pipeline
+
+on:
+  push:
+    branches:
+      - master
+      - RAG_example
+  workflow_dispatch: 
+
+jobs:
+  samples:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+
+    - name: Create KFP cluster
+      uses: ./.github/actions/kfp-cluster
+
+    - name: Forward API port
+      run: ./scripts/deploy/github/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888
+
+    - name: Grant execute permissions to scripts
+      shell: bash
+      run: chmod +x ./samples/contrib/RAG_kubeflow/example/.github/rag_action.sh
+
+    - name: Install dos2unix
+      run: sudo apt-get update && sudo apt-get install dos2unix
+
+    - name: Run Samples Tests
+      run: dos2unix ./samples/contrib/RAG_kubeflow/example/.github/rag_action.sh && ./samples/contrib/RAG_kubeflow/example/.github/rag_action.sh
diff --git a/samples/contrib/RAG_kubeflow/Code_para.png b/samples/contrib/RAG_kubeflow/Code_para.png
diff --git a/samples/contrib/RAG_kubeflow/Pinecone_create_APIkey.png b/samples/contrib/RAG_kubeflow/Pinecone_create_APIkey.png
diff --git a/samples/contrib/RAG_kubeflow/Pinecone_create_index.png b/samples/contrib/RAG_kubeflow/Pinecone_create_index.png
diff --git a/samples/contrib/RAG_kubeflow/README.md b/samples/contrib/RAG_kubeflow/README.md
@@ -0,0 +1,43 @@
+# RAG Component
+
+This is a Kubeflow pipeline component that retrieves answers from a provided document using a Retrieval-Augmented Generation (RAG) approach. The component utilizes various libraries such as `langchain`, `pinecone`, and `transformers` to process a PDF document, create embeddings, and generate answers based on the retrieved content.
+
+## Prework
+
+### Installation
+
+```
+!pip install kfp
+```
+
+### Pinecone 
+
+In this example, we use pinecone to create a vector database. Create an account and create an index, enter the index name, dimensions, enter 384 and then create the index
+
+![](https://github.com/sefgsefg/RAG_kubeflow/blob/main/Pinecone_create_index.png)
+
+Then create an API key(token)
+
+![](https://github.com/sefgsefg/RAG_kubeflow/blob/main/Pinecone_create_APIkey.png)
+---
+
+### Huggingface
+
+If the model you use have to sigh the agreement(like meta LLAMA models), you will have to create a huggingface API key.
+Go to huggingface setting and find Access Tokens, then click "New token" to create a key for `READ`
+
+![](https://github.com/sefgsefg/RAG_kubeflow/blob/main/huggingface_token.png)
+---
+
+### Adjust the parameters of the code
+
+Now go to input your parameters in code.
+Type these parameters. For PDF or model you can use "EX" as suggested
+
+![](https://github.com/sefgsefg/RAG_kubeflow/blob/main/Code_para.png)
+
+And there is a parameter call `load_in_8bit` in function `AutoModelForCausalLM.from_pretrained`, if your platform doesn't use GPU, just comment it.
+
+Reference
+---
+https://github.com/MuhammadMoinFaisal/LargeLanguageModelsProjects/tree/main/Chat_with_Multiple_PDF_llama2_Pinecone
diff --git a/samples/contrib/RAG_kubeflow/example/.github/init.py b/samples/contrib/RAG_kubeflow/example/.github/init.py
@@ -0,0 +1,47 @@
+import os
+import unittest
+from dataclasses import dataclass
+from pprint import pprint
+from typing import List
+
+import kfp
+from kfp.dsl.graph_component import GraphComponent
+import rag
+
+_MINUTE = 60  # seconds
+_DEFAULT_TIMEOUT = 5 * _MINUTE
+
+
+@dataclass
+class TestCase:
+    pipeline_func: GraphComponent
+    timeout: int = _DEFAULT_TIMEOUT
+
+
+class SampleTest(unittest.TestCase):
+    _kfp_host_and_port = os.getenv('KFP_API_HOST_AND_PORT', 'http://localhost:8888')
+    _kfp_ui_and_port = os.getenv('KFP_UI_HOST_AND_PORT', 'http://localhost:8080')
+    _client = kfp.Client(host=_kfp_host_and_port, ui_host=_kfp_ui_and_port)
+
+    def test(self):
+        # 只測試 hello_world 範例
+        test_case = TestCase(pipeline_func=rag.rag_pipeline)
+
+        # 直接執行測試案例
+        self.run_test_case(test_case.pipeline_func, test_case.timeout)
+
+    def run_test_case(self, pipeline_func: GraphComponent, timeout: int):
+        # 執行指定的管道函數，並等待完成
+        with self.subTest(pipeline=pipeline_func, msg=pipeline_func.name):
+            run_result = self._client.create_run_from_pipeline_func(pipeline_func=pipeline_func)
+            run_response = run_result.wait_for_run_completion(timeout)
+
+            # 打印運行的詳細信息
+            pprint(run_response.run_details)
+            print("Run details page URL:")
+            print(f"{self._kfp_ui_and_port}/#/runs/details/{run_response.run_id}")
+
+            # 檢查運行狀態是否成功
+            self.assertEqual(run_response.state, "SUCCEEDED")
+if __name__ == '__main__':
+    unittest.main()
diff --git a/samples/contrib/RAG_kubeflow/example/.github/rag.py b/samples/contrib/RAG_kubeflow/example/.github/rag.py
@@ -0,0 +1,123 @@
+from kfp import dsl
+
+@dsl.component(base_image="python:3.10.12",packages_to_install = [
+    'requests',
+    'langchain',
+    'langchain-community',
+    'pypdf',
+    'pinecone-client',
+    'langchain_pinecone',
+    'sentence_transformers',
+    'googletrans==3.1.0a0',
+    'transformers',
+    'torch',
+    'bitsandbytes',
+    'accelerate',
+    'urllib3==1.26.15'
+])
+def RAG(questions:str):
+    import torch
+    import requests
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from langchain_community.vectorstores import Pinecone
+    from langchain_community.document_loaders import PyPDFLoader, OnlinePDFLoader
+    import os
+    import sys
+    from pinecone import Pinecone
+    from langchain_pinecone import PineconeVectorStore
+    from langchain.embeddings import HuggingFaceEmbeddings
+
+    import pinecone
+    from transformers import AutoTokenizer, AutoModelForCausalLM
+    from transformers import pipeline
+    from langchain import HuggingFacePipeline, PromptTemplate
+    from langchain.chains import RetrievalQA
+    from huggingface_hub import login
+
+
+    url = '<change_yours>'#Ex: "https://arxiv.org/pdf/2207.02696.pdf", a yolov7 pdf
+    data_name = '<change_yours>'#Ex: "/yolov7.pdf"
+
+    huggingface_token = '<change_yours>'
+    embedding_model = '<change_yours>'#Ex: "sentence-transformers/all-MiniLM-L6-v2"
+    LLM_model = '<change_yours>'#Ex: "openai-community/gpt2" or "meta-llama/Llama-2-7b-chat-hf"
+    Pinecone_token = '<change_yours>'
+    Pinecone_index = '<change_yours>'
+
+
+    r = requests.get(url, stream=True)
+    with open(data_name, 'wb') as fd:
+        fd.write(r.content)
+    loader = PyPDFLoader(data_name)
+    data = loader.load()
+
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
+    docs = text_splitter.split_documents(data)
+
+    embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
+    PINE_KEY = os.environ.get("PINECONE_API_KEY", Pinecone_token)
+    pc = Pinecone(api_key=Pinecone_token)
+    os.environ["PINECONE_API_KEY"] = Pinecone_token
+    index = pc.Index(Pinecone_index)
+    index_name = Pinecone_index
+    docsearch = PineconeVectorStore.from_documents(docs, embedding=embeddings, index_name=index_name)
+
+
+    login(huggingface_token)
+
+
+    tokenizer = AutoTokenizer.from_pretrained(LLM_model)
+    model = AutoModelForCausalLM.from_pretrained(LLM_model,
+                        device_map='auto',
+                        torch_dtype=torch.float16,
+                        use_auth_token=True,
+                        load_in_8bit=True # If you don't use GPU, comment this parameter
+                         )
+    model = pipeline("text-generation",
+                model=model,
+                tokenizer= tokenizer,
+                torch_dtype=torch.bfloat16,
+                device_map="auto",
+                max_new_tokens = 512,
+                do_sample=True,
+                top_k=30,
+                num_return_sequences=3,
+                eos_token_id=tokenizer.eos_token_id
+                )
+
+    llm=HuggingFacePipeline(pipeline=model, model_kwargs={'temperature':0.1})
+
+    SYSTEM_PROMPT = """Use the following pieces of context to answer the question at the end.
+    If you don't know the answer, just say that you don't know, don't try to make up an answer."""
+    B_INST, E_INST = "[INST]", "[/INST]"
+    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+
+    SYSTEM_PROMPT = B_SYS + SYSTEM_PROMPT + E_SYS
+    instruction = """
+    {context}
+
+    Question: {question}
+    """
+    template = B_INST + SYSTEM_PROMPT + instruction + E_INST
+
+    prompt = PromptTemplate(template=template, input_variables=["context", "question"])
+
+    qa_chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    chain_type="stuff",
+    retriever=docsearch.as_retriever(search_kwargs={"k":3}),
+    return_source_documents=True,
+    chain_type_kwargs={"prompt": prompt},)
+
+    question = questions
+    result = qa_chain(question)
+
+    print(result['result'])
+
+@dsl.pipeline
+def rag_pipeline(recipient: str):
+    rag_task = RAG(questions = recipient).set_gpu_limit(1)
+
+from kfp import compiler
+
+compiler.Compiler().compile(rag_pipeline, 'RAG_pipeline.yaml')
diff --git a/samples/contrib/RAG_kubeflow/example/.github/rag_action.sh b/samples/contrib/RAG_kubeflow/example/.github/rag_action.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Copyright 2021 The Kubeflow Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -ex
+
+pushd ./backend/src/v2/test
+
+python3 -m pip install --upgrade pip
+python3 -m pip install -r ./requirements-sample-test.txt
+
+popd
+
+# The -u flag makes python output unbuffered, so that we can see real time log.
+# Reference: https://stackoverflow.com/a/107717
+python3 -u ./samples/contrib/RAG_kubeflow/example/.github/init.py