commit 27878ffba075b890e4e5c5a988556ca1ea65b8e8
Author: Ricardo Cunha <ricardocunha@TECHX.LOCAL>
Date:   Thu Mar 6 14:53:08 2025 +0000

    Transferencia de Ficheiros

diff --git a/.env b/.env
new file mode 100644
index 0000000..55744ec
--- /dev/null
+++ b/.env
@@ -0,0 +1,5 @@
+OPENAI_API_KEY=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
+DB_SERVER=TECHX-DEV1\SQLENERGYMSDEV # Exemplo: localhost\SQLEXPRESS
+DB_NAME=EnergyMS_CMBarcelos
+DB_USER=sa  # Deixa vazio se usares autenticação Windows
+DB_PASSWORD=EnergyMS+DEV # Deixa vazio se usares autenticação Windows
\ No newline at end of file
diff --git a/debug.py b/debug.py
new file mode 100644
index 0000000..e2ef657
--- /dev/null
+++ b/debug.py
@@ -0,0 +1,97 @@
+import fitz
+import os
+from openai import OpenAI
+from dotenv import load_dotenv
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings 
+from langchain_community.vectorstores import FAISS
+
+load_dotenv()
+api_key = os.getenv("OPENAI_API_KEY")
+client = OpenAI(api_key=api_key)
+
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with fitz.open(pdf_path) as doc:
+        for page in doc:
+            text += page.get_text("text") + "\n"
+    return text
+
+def criarChunk(texto):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50,
+        length_function=len
+    )
+    return text_splitter.split_text(texto)
+
+def create_faiss_index(chunks, embeddings):
+    vector_store = FAISS.from_texts(chunks, embeddings)  # Criar índice FAISS
+    return vector_store
+
+def search_faiss(vector_store, query, embeddings, top_k=3):
+    query_embedding = embeddings.embed_query(query)  # Gerar embedding da pergunta
+    docs = vector_store.similarity_search(query, k=top_k)  # Procurar no FAISS
+    return docs
+
+def debug_embeddings(chunks, embeddings):
+    embeddings_list = embeddings.embed_documents(chunks)
+
+    print(f"\n DEBUG: Embeddings Gerados")
+    print(f"Número total de chunks: {len(chunks)}")
+    print(f"Número total de embeddings: {len(embeddings_list)}")
+    
+    if embeddings_list:
+        print(f"Tamanho do primeiro embedding: {len(embeddings_list[0])}")
+
+    print("\n Exemplo de Chunk e seu Embedding:")
+    print(f"Chunk: {chunks[0]}")
+    print(f"Embedding (primeiros 10 valores): {embeddings_list[0][:10]}")
+
+def debug_faiss(vector_store, query, embeddings, top_k=3):
+    query_embedding = embeddings.embed_query(query)
+    print(f"\n DEBUG: Tamanho do vetor da pergunta: {len(query_embedding)}")
+
+    docs = vector_store.similarity_search(query, k=top_k)
+    print("\n DEBUG: Resultados da busca FAISS")
+    print(f"Número de chunks retornados: {len(docs)}")
+
+    for i, doc in enumerate(docs):
+        print(f"\n Chunk {i+1}:")
+        print(doc.page_content[:200])  # Mostra os primeiros 200 caracteres do chunk
+
+def generate_response(query, vector_store, embeddings):
+    docs = search_faiss(vector_store, query, embeddings)
+    context = "\n".join([doc.page_content for doc in docs])  
+
+    response = client.chat.completions.create(  
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "Use o contexto abaixo para responder."},
+            {"role": "system", "content": context},
+            {"role": "user", "content": query}
+        ]
+    )
+    return response.choices[0].message.content  
+
+pdf_file = "teste.pdf"
+texto_extraido = extract_text_from_pdf(pdf_file)
+chunks = criarChunk(texto_extraido)
+
+embeddings = OpenAIEmbeddings()
+
+debug_embeddings(chunks, embeddings)
+
+vector_store = create_faiss_index(chunks, embeddings)
+debug_faiss(vector_store, "Exemplo de pesquisa", embeddings)
+
+print("Chatbot: Olá! Como te posso ajudar?")
+while True:
+    user_input = input("Você: ")
+    if user_input.lower() in ["sair", "exit", "quit"]:
+        print("Chatbot: Até logo!")
+        break
+
+    debug_faiss(vector_store, user_input, embeddings)
+    resposta = generate_response(user_input, vector_store, embeddings)
+    print("Chatbot:", resposta)
diff --git a/debugAPI.py b/debugAPI.py
new file mode 100644
index 0000000..3163b3b
--- /dev/null
+++ b/debugAPI.py
@@ -0,0 +1,13 @@
+import openai
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+try:
+    response = openai.models.list()
+    print("API funciona!")
+except openai.OpenAIError as e:
+    print("Erro ao conectar a API:", e)
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..43a6b51
--- /dev/null
+++ b/main.py
@@ -0,0 +1,199 @@
+import openai
+import os
+import time
+import pyodbc
+import json
+import re
+from dotenv import load_dotenv
+# Carregar variáveis de ambiente
+load_dotenv()
+
+# Configurar OpenAI
+client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
+
+# Conectar à base de dados
+def connect_db():
+    try:
+        conn = pyodbc.connect(
+            "DRIVER={ODBC Driver 17 for SQL Server};"
+            f"SERVER={os.getenv('DB_SERVER')};"
+            f"DATABASE={os.getenv('DB_NAME')};"
+            f"UID={os.getenv('DB_USER')};"
+            f"PWD={os.getenv('DB_PASSWORD')};"
+        )
+        return conn
+    except Exception as e:
+        print(f"Erro ao conectar à base de dados: {e}")
+        return None
+
+def get_entry_by_position(position=1):
+    conn = connect_db()
+    if not conn:
+        return "Erro: Não foi possível conectar à base de dados."
+
+    try:
+        with conn.cursor() as cursor:
+            query = f"SELECT * FROM CUnitBills ORDER BY Id ASC OFFSET {position-1} ROWS FETCH NEXT 1 ROWS ONLY"
+            cursor.execute(query)
+
+            row = cursor.fetchone()
+            if row:
+                columns = [column[0] for column in cursor.description]
+                formatted_data = "\n".join([f"- **{column}**: {value if value is not None else 'Não disponível'}" for column, value in zip(columns, row)])
+                return formatted_data
+            return "Nenhum dado encontrado."
+    except Exception as e:
+        return f"Erro ao buscar dados: {e}"
+    finally:
+        conn.close()
+
+# Obter dados do SQL Server
+def get_data(atributes=None, limit=20):
+    conn = connect_db()
+    if not conn:
+        return "Erro: Não foi possível conectar à base de dados."
+
+    try:
+        with conn.cursor() as cursor:
+            if atributes:
+                query = f"SELECT TOP {limit} * FROM CUnitBills WHERE Number LIKE ? ORDER BY Id ASC"
+                cursor.execute(query, (f"%{atributes}%",))
+            else:
+                query = f"SELECT TOP {limit} * FROM CUnitBills ORDER BY Id ASC "
+                cursor.execute(query)
+
+            columns = [column[0] for column in cursor.description]
+            rows = [dict(zip(columns, row)) for row in cursor.fetchall()]
+            
+            if rows:
+                formatted_rows = "\n\n".join([
+                    "\n".join([f"- **{column}**: {row[column]}" for column in columns]) for row in rows
+                ])
+                return formatted_rows
+            return "Nenhum dado encontrado."
+    except Exception as e:
+        return f"Erro ao buscar dados: {e}"
+    finally:
+        conn.close()
+
+def get_filtered_data(cunit_id=None, date_billling_begin=None, date_billing_end=None, limit=2):
+    conn = connect_db()
+    if not conn:
+        return "Erro: Não foi possível conectar à base de dados."
+
+    try:
+        with conn.cursor() as cursor:
+            query = f"SELECT TOP {limit} * FROM CUnitBills"
+            conditions = []
+            params = []
+            
+            if cunit_id is not None:
+                conditions.append("CUnitId = ?")
+                params.append(cunit_id)
+
+            if date_billling_begin :
+                conditions.append("DateBilllingBegin >= ?")
+                params.append(date_billling_begin)
+
+            if date_billing_end :
+                conditions.append("DateBillingEnd = ?")
+                params.append(date_billing_end)
+
+            if conditions:
+                query += " WHERE " + " AND ".join(conditions)
+
+            query += " ORDER BY Id ASC"
+
+            print("Query Final:", query)  # Para debugging
+            print("Parâmetros SQL:", params)
+
+            cursor.execute(query, params)
+            columns = [column[0] for column in cursor.description]
+            rows = [dict(zip(columns, row)) for row in cursor.fetchall()]
+
+            if rows:
+                formatted_rows = "\n\n".join([
+                    "\n".join([f"- **{column}**: {row[column]}" for column in columns]) for row in rows
+                ])
+                return formatted_rows
+            return "Nenhum dado encontrado."
+    except Exception as e:
+        return "Erro ao buscar dados. Verifique os critérios e tente novamente."
+    finally:
+        conn.close()
+
+def parse_user_input(user_input):
+    cunit_id = None
+    date_billling_begin = None
+    date_billing_end = None
+
+    match_cunit_id = re.search(r"CUnitId\s*(?:de)?\s*(\d+)", user_input, re.IGNORECASE)
+    print(match_cunit_id)
+    match_date_billling_begin = re.search(r"DateBilllingBegin\s*(?:maior que|>=)\s*([\d-]+)", user_input, re.IGNORECASE)
+    match_date_billing_end = re.search(r"DateBillingEnd\s*(?:igual a|=)\s*([\d-]+)", user_input, re.IGNORECASE)
+
+    if match_cunit_id:
+        cunit_id = int(match_cunit_id.group(1))
+
+    if match_date_billling_begin:
+        date_billling_begin = match_date_billling_begin.group(1).strip()
+
+    if match_date_billing_end:
+        date_billing_end = match_date_billing_end.group(1).strip()
+
+
+    return cunit_id, date_billling_begin, date_billing_end
+    
+
+# Chat com GPT
+def chat_with_gpt(prompt, attempts=3):
+    for i in range(attempts):
+        try:
+            response = client.chat.completions.create(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=300
+            )
+            return response.choices[0].message.content.strip()
+        except openai.RateLimitError:
+            if i < attempts - 1:
+                print("Limite de requisições atingido! Tentando novamente...")
+                time.sleep(10)
+            else: 
+                return "Erro: Limite de requisições atingido várias vezes. Tente novamente mais tarde."
+        except Exception as e:
+            return f"Erro na API OpenAI: {e}"
+    
+# Main
+if __name__ == "__main__":
+    # Testar conexão antes de iniciar o chatbot
+    conn = connect_db()
+    if conn:
+        print("Conexão com a base de dados estabelecida com sucesso!")
+        conn.close()
+    else:
+        print("Erro ao conectar à base de dados.")
+
+    while True:
+        user_input = input("Eu: ")
+        if user_input.lower() in ["quit", "exit", "bye"]:
+            break
+
+        cunit_id, date_billling_begin, date_billing_end = parse_user_input(user_input)
+
+        if cunit_id or date_billling_begin or date_billing_end:
+            data = get_filtered_data(cunit_id, date_billling_begin, date_billing_end)
+
+            if data:
+                print(f"Chatbot: Aqui estão os dados encontrados:\n{data}")
+            else:
+                print("Chatbot: Nenhuma entrada encontrada para os critérios fornecidos.")
+            continue
+
+        if "dados" in user_input.lower():
+            data = get_data()
+            print(f"\nDados do SQL Server:\n{data}")
+            continue
+
+        response = chat_with_gpt(user_input)
+        print("Chatbot: ", response)