commit 27878ffba075b890e4e5c5a988556ca1ea65b8e8 Author: Ricardo Cunha Date: Thu Mar 6 14:53:08 2025 +0000 Transferencia de Ficheiros diff --git a/.env b/.env new file mode 100644 index 0000000..55744ec --- /dev/null +++ b/.env @@ -0,0 +1,5 @@ +OPENAI_API_KEY=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 +DB_SERVER=TECHX-DEV1\SQLENERGYMSDEV # Exemplo: localhost\SQLEXPRESS +DB_NAME=EnergyMS_CMBarcelos +DB_USER=sa # Deixa vazio se usares autenticação Windows +DB_PASSWORD=EnergyMS+DEV # Deixa vazio se usares autenticação Windows \ No newline at end of file diff --git a/debug.py b/debug.py new file mode 100644 index 0000000..e2ef657 --- /dev/null +++ b/debug.py @@ -0,0 +1,97 @@ +import fitz +import os +from openai import OpenAI +from dotenv import load_dotenv +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_openai import OpenAIEmbeddings +from langchain_community.vectorstores import FAISS + +load_dotenv() +api_key = os.getenv("OPENAI_API_KEY") +client = OpenAI(api_key=api_key) + +def extract_text_from_pdf(pdf_path): + text = "" + with fitz.open(pdf_path) as doc: + for page in doc: + text += page.get_text("text") + "\n" + return text + +def criarChunk(texto): + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=500, + chunk_overlap=50, + length_function=len + ) + return text_splitter.split_text(texto) + +def create_faiss_index(chunks, embeddings): + vector_store = FAISS.from_texts(chunks, embeddings) # Criar índice FAISS + return vector_store + +def search_faiss(vector_store, query, embeddings, top_k=3): + query_embedding = embeddings.embed_query(query) # Gerar embedding da pergunta + docs = vector_store.similarity_search(query, k=top_k) # Procurar no FAISS + return docs + +def debug_embeddings(chunks, embeddings): + embeddings_list = embeddings.embed_documents(chunks) + + print(f"\n DEBUG: Embeddings Gerados") + print(f"Número total de chunks: {len(chunks)}") + print(f"Número total de embeddings: {len(embeddings_list)}") + + if embeddings_list: + print(f"Tamanho do primeiro embedding: {len(embeddings_list[0])}") + + print("\n Exemplo de Chunk e seu Embedding:") + print(f"Chunk: {chunks[0]}") + print(f"Embedding (primeiros 10 valores): {embeddings_list[0][:10]}") + +def debug_faiss(vector_store, query, embeddings, top_k=3): + query_embedding = embeddings.embed_query(query) + print(f"\n DEBUG: Tamanho do vetor da pergunta: {len(query_embedding)}") + + docs = vector_store.similarity_search(query, k=top_k) + print("\n DEBUG: Resultados da busca FAISS") + print(f"Número de chunks retornados: {len(docs)}") + + for i, doc in enumerate(docs): + print(f"\n Chunk {i+1}:") + print(doc.page_content[:200]) # Mostra os primeiros 200 caracteres do chunk + +def generate_response(query, vector_store, embeddings): + docs = search_faiss(vector_store, query, embeddings) + context = "\n".join([doc.page_content for doc in docs]) + + response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "Use o contexto abaixo para responder."}, + {"role": "system", "content": context}, + {"role": "user", "content": query} + ] + ) + return response.choices[0].message.content + +pdf_file = "teste.pdf" +texto_extraido = extract_text_from_pdf(pdf_file) +chunks = criarChunk(texto_extraido) + +embeddings = OpenAIEmbeddings() + +debug_embeddings(chunks, embeddings) + +vector_store = create_faiss_index(chunks, embeddings) +debug_faiss(vector_store, "Exemplo de pesquisa", embeddings) + +print("Chatbot: Olá! Como te posso ajudar?") +while True: + user_input = input("Você: ") + if user_input.lower() in ["sair", "exit", "quit"]: + print("Chatbot: Até logo!") + break + + debug_faiss(vector_store, user_input, embeddings) + resposta = generate_response(user_input, vector_store, embeddings) + print("Chatbot:", resposta) diff --git a/debugAPI.py b/debugAPI.py new file mode 100644 index 0000000..3163b3b --- /dev/null +++ b/debugAPI.py @@ -0,0 +1,13 @@ +import openai +import os +from dotenv import load_dotenv + +load_dotenv() + +openai.api_key = os.getenv("OPENAI_API_KEY") + +try: + response = openai.models.list() + print("API funciona!") +except openai.OpenAIError as e: + print("Erro ao conectar a API:", e) diff --git a/main.py b/main.py new file mode 100644 index 0000000..43a6b51 --- /dev/null +++ b/main.py @@ -0,0 +1,199 @@ +import openai +import os +import time +import pyodbc +import json +import re +from dotenv import load_dotenv +# Carregar variáveis de ambiente +load_dotenv() + +# Configurar OpenAI +client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY')) + +# Conectar à base de dados +def connect_db(): + try: + conn = pyodbc.connect( + "DRIVER={ODBC Driver 17 for SQL Server};" + f"SERVER={os.getenv('DB_SERVER')};" + f"DATABASE={os.getenv('DB_NAME')};" + f"UID={os.getenv('DB_USER')};" + f"PWD={os.getenv('DB_PASSWORD')};" + ) + return conn + except Exception as e: + print(f"Erro ao conectar à base de dados: {e}") + return None + +def get_entry_by_position(position=1): + conn = connect_db() + if not conn: + return "Erro: Não foi possível conectar à base de dados." + + try: + with conn.cursor() as cursor: + query = f"SELECT * FROM CUnitBills ORDER BY Id ASC OFFSET {position-1} ROWS FETCH NEXT 1 ROWS ONLY" + cursor.execute(query) + + row = cursor.fetchone() + if row: + columns = [column[0] for column in cursor.description] + formatted_data = "\n".join([f"- **{column}**: {value if value is not None else 'Não disponível'}" for column, value in zip(columns, row)]) + return formatted_data + return "Nenhum dado encontrado." + except Exception as e: + return f"Erro ao buscar dados: {e}" + finally: + conn.close() + +# Obter dados do SQL Server +def get_data(atributes=None, limit=20): + conn = connect_db() + if not conn: + return "Erro: Não foi possível conectar à base de dados." + + try: + with conn.cursor() as cursor: + if atributes: + query = f"SELECT TOP {limit} * FROM CUnitBills WHERE Number LIKE ? ORDER BY Id ASC" + cursor.execute(query, (f"%{atributes}%",)) + else: + query = f"SELECT TOP {limit} * FROM CUnitBills ORDER BY Id ASC " + cursor.execute(query) + + columns = [column[0] for column in cursor.description] + rows = [dict(zip(columns, row)) for row in cursor.fetchall()] + + if rows: + formatted_rows = "\n\n".join([ + "\n".join([f"- **{column}**: {row[column]}" for column in columns]) for row in rows + ]) + return formatted_rows + return "Nenhum dado encontrado." + except Exception as e: + return f"Erro ao buscar dados: {e}" + finally: + conn.close() + +def get_filtered_data(cunit_id=None, date_billling_begin=None, date_billing_end=None, limit=2): + conn = connect_db() + if not conn: + return "Erro: Não foi possível conectar à base de dados." + + try: + with conn.cursor() as cursor: + query = f"SELECT TOP {limit} * FROM CUnitBills" + conditions = [] + params = [] + + if cunit_id is not None: + conditions.append("CUnitId = ?") + params.append(cunit_id) + + if date_billling_begin : + conditions.append("DateBilllingBegin >= ?") + params.append(date_billling_begin) + + if date_billing_end : + conditions.append("DateBillingEnd = ?") + params.append(date_billing_end) + + if conditions: + query += " WHERE " + " AND ".join(conditions) + + query += " ORDER BY Id ASC" + + print("Query Final:", query) # Para debugging + print("Parâmetros SQL:", params) + + cursor.execute(query, params) + columns = [column[0] for column in cursor.description] + rows = [dict(zip(columns, row)) for row in cursor.fetchall()] + + if rows: + formatted_rows = "\n\n".join([ + "\n".join([f"- **{column}**: {row[column]}" for column in columns]) for row in rows + ]) + return formatted_rows + return "Nenhum dado encontrado." + except Exception as e: + return "Erro ao buscar dados. Verifique os critérios e tente novamente." + finally: + conn.close() + +def parse_user_input(user_input): + cunit_id = None + date_billling_begin = None + date_billing_end = None + + match_cunit_id = re.search(r"CUnitId\s*(?:de)?\s*(\d+)", user_input, re.IGNORECASE) + print(match_cunit_id) + match_date_billling_begin = re.search(r"DateBilllingBegin\s*(?:maior que|>=)\s*([\d-]+)", user_input, re.IGNORECASE) + match_date_billing_end = re.search(r"DateBillingEnd\s*(?:igual a|=)\s*([\d-]+)", user_input, re.IGNORECASE) + + if match_cunit_id: + cunit_id = int(match_cunit_id.group(1)) + + if match_date_billling_begin: + date_billling_begin = match_date_billling_begin.group(1).strip() + + if match_date_billing_end: + date_billing_end = match_date_billing_end.group(1).strip() + + + return cunit_id, date_billling_begin, date_billing_end + + +# Chat com GPT +def chat_with_gpt(prompt, attempts=3): + for i in range(attempts): + try: + response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": prompt}], + max_tokens=300 + ) + return response.choices[0].message.content.strip() + except openai.RateLimitError: + if i < attempts - 1: + print("Limite de requisições atingido! Tentando novamente...") + time.sleep(10) + else: + return "Erro: Limite de requisições atingido várias vezes. Tente novamente mais tarde." + except Exception as e: + return f"Erro na API OpenAI: {e}" + +# Main +if __name__ == "__main__": + # Testar conexão antes de iniciar o chatbot + conn = connect_db() + if conn: + print("Conexão com a base de dados estabelecida com sucesso!") + conn.close() + else: + print("Erro ao conectar à base de dados.") + + while True: + user_input = input("Eu: ") + if user_input.lower() in ["quit", "exit", "bye"]: + break + + cunit_id, date_billling_begin, date_billing_end = parse_user_input(user_input) + + if cunit_id or date_billling_begin or date_billing_end: + data = get_filtered_data(cunit_id, date_billling_begin, date_billing_end) + + if data: + print(f"Chatbot: Aqui estão os dados encontrados:\n{data}") + else: + print("Chatbot: Nenhuma entrada encontrada para os critérios fornecidos.") + continue + + if "dados" in user_input.lower(): + data = get_data() + print(f"\nDados do SQL Server:\n{data}") + continue + + response = chat_with_gpt(user_input) + print("Chatbot: ", response)