bs4scrape

   from api_key import OpenAIKey
from logs_handling import MemoryWithLogging
import os
import json
import openai
import llama_index
import requests
import warnings
import time
import cProfile, pstats
from pstats import SortKey
#Do NOT import AzureOpenAI from OpenAI
from llama_index.llms.azure_openai import AzureOpenAI
import textwrap
# from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
from llama_parse import LlamaParse
from filestore import Storefilepaths
# from deeplake.core.vectorstore import DeepLakeVectorStore
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.core import Settings, Document
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, Document
from llama_index.core.response.pprint_utils import pprint_response
import logging
import sys
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline, IngestionCache
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilters, MetadataFilter
from llama_parse.base import ResultType, Language


class Rag:
    
    def __init__(self):
        logging.basicConfig(
            stream=sys.stdout, level=logging.INFO
        )  # logging.DEBUG for more verbose output
        logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
        self.model= OpenAIKey()
        Settings.llm = self.model.llmmodel()
        Settings.embed_model = self.model.embedmodel()
        # Initialize memory and logging
        self.memory = MemoryWithLogging()
        self.filestore= Storefilepaths()

    def storage(self):
        # data_folder='/marble_data'
        # file_paths =[]
        # for filename in os.listdir(data_folder):
        #     file_path= os.path.join(data_folder, filename)
        #     if file_path not in file_paths:

        instructions= """The provided document is a report which will be used to extract numerical and textual data. Some pages do NOT have a title. It might contain tables. Try to extract all the information is a cohesive way."""
        parser= LlamaParse(
            result_type="markdown",  # "markdown" and "text" are available
            api_key= os.getenv('LLAMA_CLOUD_API_KEY'),
            parsing_instruction= instructions,
            verbose= True
        ).load_data("./temp_data_folder/BE-State-of-the-Transition-2023.pdf")

        # use SimpleDirectoryReader to parse our file
        file_extractor = {".pdf": parser}
        # reader = SimpleDirectoryReader('temp_data_folder', file_extractor=file_extractor).load_data()
        print("printing reader")
        # print(parser)
        # all_docs = []
        # for docs in reader:
        #     for doc in docs:
        #         # do something with the doc
        #         # doc.text = doc.text.upper()
        #         all_docs.append(doc)
        # print("File extractor:", file_extractor)
        # print("length of recursive doc", len(all_docs))
        # parser = LlamaParse(
        #     result_type="markdown",  # "markdown" and "text" are available
        #     api_key= os.getenv('LLAMA_CLOUD_API_KEY'),
        #     verbose= True,
        # )
        # # filename_fn = lambda filename: {"file_name": filename}
        # # print(filename_fn)
        # # use SimpleDirectoryReader to parse our file
        # file_paths= self.filestore.load_logs()
        # print("list of docs before directory reader", file_paths)
        # file_extractor = {".pdf": parser, ".txt":parser}
        # reader = SimpleDirectoryReader('temp_data_folder', file_extractor=file_extractor).load_data()
        # # docu= reader.load_data()
        # all_docs=[]
        # print("printing type reader", reader)
        # for docs in reader:
        #     # file_paths.append
        #     metadata= docs.metadata
        #     filepath= metadata.get('file_path')
        #     print("printing file path in the loop", filepath)
        #     self.filestore.log_query(filepath)
        #     print("docs getting printed")
        #     # for doc in docs:
        #     #     all_docs.append(doc)
        # print("list of docs after directory reader", self.filestore.load_logs())

        vector_store = MilvusVectorStore(
                uri="./milvus_test_2.db", dim=1536, overwrite=False # text-embedding-ada-002 has dimension of 1536
                )
        pipeline = IngestionPipeline(
                transformations=[
                    SentenceSplitter(chunk_size=256, chunk_overlap=32),
                    TitleExtractor(),
                    Settings.embed_model,
                    ],
                    vector_store = vector_store
                    )
        # existing_ids= vector_store.list_ids()
        # for file in os.listdir('Marble_data'):
        #     if vector_store.get_entity_by_id('pdf_vectors', file):
        #         existing_ids.append(file)
        
        # print("list of exsisting ids", existing_ids)
        # run the pipeline
        # nodes = pipeline.run(documents=documents)
        pipeline.run(documents=parser)
        self.index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
        print('self.index excuted successfully')
        # Store it for later
        # self.index.storage_context.persist(persist_dir=PERSIST_DIR)
        return self.index

    
    # Function to handle user queries
    def handle_query(self, user_id, query):
        # Retrieve user's memory (from logs)
        start_time = time.time()
        
        self.user_id= user_id
        self.query= query
        # self.user_memory = self.memory.get_logs(self.user_id)
        self.index= self.storage()
        # Combine the query with user's memory (if needed)
        # self.memory_content = " ".join(log['logs'] for log in self.user_memory) # Concatenate all memory values
        # self.combined_query += " " + self.memory_content
        # Query the index
        print('self.index called successfully in handle query')
        self.query_engine = self.index.as_query_engine()
        self.response = self.query_engine.query(self.query)
        pprint_response(self.response,show_source=True)
        self.response_text = self.response.response  # Adjust based on the actual response structure
        print("--- %s seconds ---" % (time.time() - start_time))
        # Log the query and response
        self.memory.log_query(self.user_id, self.query, self.response_text)

        # Update the memory based on the response (if applicable)
        # Example: if response includes memory updates
        # if hasattr(response, 'memory_update'):  # Check if 'memory_update' attribute exists
        #     for key, value in response.memory_update.items():
        #         memory.update_memory(user_id, key, value)
        return self.response_text

# obj = Rag()
# # # obj.storage()
# # # # # User ID and queries
# user_id = 'w23'
# # query="which year was breakthrough energy founded?"
# # print(obj.handle_query(user_id, query))
# queries = [
#     "Explain what Fervo company is"
#     # "what does the company Malta do? give some statistics about the change brought by the steps taken by this company.",
#     # "What are ESG risk categories?"
# ]

# # Loop through queries and get responses
# for i in range(len(queries)):
#     ans = obj.handle_query(user_id, queries[i])
#     print(f"Query {i+1}: {ans}")

       
#     "who founded breakthrough energy and when?",
    # "how much greenhouse gas is emitted?",
    # "what is the challenge with electricity and what are the solutions for it?",
    # "Explain how wind energy is helpful with some statistics figures."


#STREAMLIT

import streamlit as st
from streamlit_chat import message
from chat_func import Chatfunc

# Create an instance of the Chatfunc class
chatfunc = Chatfunc()

# Initialize session state
if 'messages' not in st.session_state:
    st.session_state.messages = []
if 'message_counter' not in st.session_state:
    st.session_state.message_counter = 0
if 'current_user_id' not in st.session_state:
    st.session_state.current_user_id = None

st.header("Chatbot")

# Display conversation history at the top
chat_container = st.container()

user_id = st.text_input("Type your user_id:")

# Check if user_id has changed
if user_id != st.session_state.current_user_id:
    st.session_state.messages = []
    st.session_state.message_counter = 0
    st.session_state.current_user_id = user_id

text_in = st.text_input("What's your question?")

if st.button("Send") and user_id:
    # Add user message to session state
    st.session_state.messages.append({
        "content": text_in, 
        "is_user": True, 
        "key": f"msg_{st.session_state.message_counter}"
    })
    st.session_state.message_counter += 1
    
    # Get the response from the Chatfunc class
    response = chatfunc.handle_query(user_id, text_in)
    
    # Add chatbot response to session state
    st.session_state.messages.append({
        "content": response, 
        "is_user": False, 
        "key": f"msg_{st.session_state.message_counter}"
    })
    st.session_state.message_counter += 1

# Display all messages in the chat container
with chat_container:
    for msg in st.session_state.messages:
        message(msg["content"], is_user=msg["is_user"], key=msg["key"])


################################

import streamlit as st
from streamlit_chat import message
from chat_func import Chatfunc

# Create an instance of the Chatfunc class
chatfunc = Chatfunc()

# Initialize session state
if 'messages' not in st.session_state:
    st.session_state.messages = []
if 'message_counter' not in st.session_state:
    st.session_state.message_counter = 0
if 'current_user_id' not in st.session_state:
    st.session_state.current_user_id = None
if 'feedback_given' not in st.session_state:
    st.session_state.feedback_given = False

# Custom CSS (same as before)
st.markdown("""
<style>
... (previous CSS remains the same)
.feedback-container {
    margin-top: 1rem;
    padding: 1rem;
    background-color: #1E1E1E;
    border-radius: 5px;
}
</style>
""", unsafe_allow_html=True)

# Fixed title at the top
st.markdown('<div class="title-container"><h1>Chatbot</h1></div>', unsafe_allow_html=True)

# Scrollable chat container
chat_container = st.container()
chat_container.markdown('<div class="chat-container">', unsafe_allow_html=True)

with chat_container:
    for msg in st.session_state.messages:
        message(msg["content"], is_user=msg["is_user"], key=msg["key"])
    
    # Display feedback widget after 3 chats
    if len(st.session_state.messages) >= 6 and not st.session_state.feedback_given:
        st.markdown('<div class="feedback-container">', unsafe_allow_html=True)
        st.write("How was your experience?")
        col1, col2, col3 = st.columns(3)
        with col1:
            if st.button("😃 Good"):
                st.session_state.feedback_given = True
                st.success("Thank you for your feedback!")
        with col2:
            if st.button("😐 Neutral"):
                st.session_state.feedback_given = True
                st.success("Thank you for your feedback!")
        with col3:
            if st.button("😟 Poor"):
                st.session_state.feedback_given = True
                st.success("Thank you for your feedback!")
        st.markdown('</div>', unsafe_allow_html=True)

chat_container.markdown('</div>', unsafe_allow_html=True)

# Fixed input container at the bottom (same as before)
input_container = st.container()
input_container.markdown('<div class="input-container">', unsafe_allow_html=True)

with input_container:
    col1, col2, col3 = st.columns([1, 3, 1])
    with col1:
        user_id = st.text_input("User ID:", key="user_id")
    with col2:
        text_in = st.text_input("Your question:", key="question")
    with col3:
        send_button = st.button("Send")

    if send_button and user_id:
        # Check if user_id has changed
        if user_id != st.session_state.current_user_id:
            st.session_state.messages = []
            st.session_state.message_counter = 0
            st.session_state.current_user_id = user_id
            st.session_state.feedback_given = False

        # Add user message to session state
        st.session_state.messages.append({
            "content": text_in, 
            "is_user": True, 
            "key": f"msg_{st.session_state.message_counter}"
        })
        st.session_state.message_counter += 1
        
        # Get the response from the Chatfunc class
        response = chatfunc.handle_query(user_id, text_in)
        
        # Add chatbot response to session state
        st.session_state.messages.append({
            "content": response, 
            "is_user": False, 
            "key": f"msg_{st.session_state.message_counter}"
        })
        st.session_state.message_counter += 1
        
        # Rerun the app to update the chat
        st.experimental_rerun()

input_container.markdown('</div>', unsafe_allow_html=True)