ChatBot - A funcionar com a estrutura mas com erros na tradução(Commit corrigido)

2025-06-12 14:31:18 +01:00 · 2025-06-12 14:31:18 +01:00 · a44bdc5fce
commit a44bdc5fce
parent c9ff11b62a
16 changed files with 360 additions and 124 deletions
--- a/ChatBot-Python/.env
+++ b/ChatBot-Python/.env
@ -1,5 +0,0 @@
-OPENAI_API_KEY=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
-DB_SERVER=TECHX-DEV1\SQLENERGYMSDEV # Exemplo: localhost\SQLEXPRESS
-DB_NAME=EnergyMS_CMBarcelos
-DB_USER=sa  # Deixa vazio se usares autenticação Windows
-DB_PASSWORD=EnergyMS+DEV # Deixa vazio se usares autenticação Windows
--- a/ChatBot-Python/.env.example
+++ b/ChatBot-Python/.env.example
@ -0,0 +1,4 @@
+DATABASE_URL=your_database_url_here
+DATABASE_USER=your_database_user_here
+DATABASE_PASSWORD=your_database_password_here
+OPENAI_API_KEY=your_openai_api_key_here
--- a/ChatBot-Python/README.md
+++ b/ChatBot-Python/README.md
@ -0,0 +1,70 @@
+# Chatbot Database Structure
+
+This project implements a chatbot that can interact with users and provide information about the structure of a database. The chatbot is designed to be easy to use and requires minimal coding knowledge to operate.
+
+## Project Structure
+
+```
+chatbot-db-structure
+├── src
+│   ├── main.py               # Entry point of the application
+│   ├── chatbot.py            # Contains the Chatbot class for managing conversations
+│   ├── db
+│   │   ├── __init__.py       # Initializes the db package
+│   │   ├── connector.py       # Handles database connection
+│   │   └── schema.py         # Retrieves and formats the database schema
+│   └── utils
+│       └── helpers.py        # Utility functions for various tasks
+├── requirements.txt          # Lists project dependencies
+├── .env.example              # Example environment variables
+└── README.md                 # Documentation for the project
+```
+
+## Setup Instructions
+
+1. Clone the repository:
+   ```
+   git clone <repository-url>
+   cd chatbot-db-structure
+   ```
+
+2. Create a virtual environment:
+   ```
+   python -m venv venv
+   ```
+
+3. Activate the virtual environment:
+   - On Windows:
+     ```
+     venv\Scripts\activate
+     ```
+   - On macOS/Linux:
+     ```
+     source venv/bin/activate
+     ```
+
+4. Install the required dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+
+5. Configure your environment variables by copying `.env.example` to `.env` and filling in the necessary values.
+
+## Usage
+
+To start the chatbot, run the following command:
+```
+python src/main.py
+```
+
+Once the chatbot is running, you can ask questions about the database structure, and it will respond with relevant information.
+
+## Capabilities
+
+- Provides information about the database schema, including tables and their columns.
+- Allows users to interact naturally and receive structured responses.
+- Designed to be extensible for future enhancements and additional features.
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a pull request or open an issue for any suggestions or improvements.
--- a/ChatBot-Python/debug.py
+++ b/ChatBot-Python/debug.py
@ -1,97 +0,0 @@
-import fitz
-import os
-from openai import OpenAI
-from dotenv import load_dotenv
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_openai import OpenAIEmbeddings 
-from langchain_community.vectorstores import FAISS
-
-load_dotenv()
-api_key = os.getenv("OPENAI_API_KEY")
-client = OpenAI(api_key=api_key)
-
-def extract_text_from_pdf(pdf_path):
-    text = ""
-    with fitz.open(pdf_path) as doc:
-        for page in doc:
-            text += page.get_text("text") + "\n"
-    return text
-
-def criarChunk(texto):
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500,
-        chunk_overlap=50,
-        length_function=len
-    )
-    return text_splitter.split_text(texto)
-
-def create_faiss_index(chunks, embeddings):
-    vector_store = FAISS.from_texts(chunks, embeddings)  # Criar índice FAISS
-    return vector_store
-
-def search_faiss(vector_store, query, embeddings, top_k=3):
-    query_embedding = embeddings.embed_query(query)  # Gerar embedding da pergunta
-    docs = vector_store.similarity_search(query, k=top_k)  # Procurar no FAISS
-    return docs
-
-def debug_embeddings(chunks, embeddings):
-    embeddings_list = embeddings.embed_documents(chunks)
-
-    print(f"\n DEBUG: Embeddings Gerados")
-    print(f"Número total de chunks: {len(chunks)}")
-    print(f"Número total de embeddings: {len(embeddings_list)}")
-    
-    if embeddings_list:
-        print(f"Tamanho do primeiro embedding: {len(embeddings_list[0])}")
-
-    print("\n Exemplo de Chunk e seu Embedding:")
-    print(f"Chunk: {chunks[0]}")
-    print(f"Embedding (primeiros 10 valores): {embeddings_list[0][:10]}")
-
-def debug_faiss(vector_store, query, embeddings, top_k=3):
-    query_embedding = embeddings.embed_query(query)
-    print(f"\n DEBUG: Tamanho do vetor da pergunta: {len(query_embedding)}")
-
-    docs = vector_store.similarity_search(query, k=top_k)
-    print("\n DEBUG: Resultados da busca FAISS")
-    print(f"Número de chunks retornados: {len(docs)}")
-
-    for i, doc in enumerate(docs):
-        print(f"\n Chunk {i+1}:")
-        print(doc.page_content[:200])  # Mostra os primeiros 200 caracteres do chunk
-
-def generate_response(query, vector_store, embeddings):
-    docs = search_faiss(vector_store, query, embeddings)
-    context = "\n".join([doc.page_content for doc in docs])  
-
-    response = client.chat.completions.create(  
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": "Use o contexto abaixo para responder."},
-            {"role": "system", "content": context},
-            {"role": "user", "content": query}
-        ]
-    )
-    return response.choices[0].message.content  
-
-pdf_file = "teste.pdf"
-texto_extraido = extract_text_from_pdf(pdf_file)
-chunks = criarChunk(texto_extraido)
-
-embeddings = OpenAIEmbeddings()
-
-debug_embeddings(chunks, embeddings)
-
-vector_store = create_faiss_index(chunks, embeddings)
-debug_faiss(vector_store, "Exemplo de pesquisa", embeddings)
-
-print("Chatbot: Olá! Como te posso ajudar?")
-while True:
-    user_input = input("Você: ")
-    if user_input.lower() in ["sair", "exit", "quit"]:
-        print("Chatbot: Até logo!")
-        break
-
-    debug_faiss(vector_store, user_input, embeddings)
-    resposta = generate_response(user_input, vector_store, embeddings)
-    print("Chatbot:", resposta)
--- a/ChatBot-Python/debugAPI.py
+++ b/ChatBot-Python/debugAPI.py
@ -1,13 +0,0 @@
-import openai
-import os
-from dotenv import load_dotenv
-
-load_dotenv()
-
-openai.api_key = os.getenv("OPENAI_API_KEY")
-
-try:
-    response = openai.models.list()
-    print("API funciona!")
-except openai.OpenAIError as e:
-    print("Erro ao conectar a API:", e)
--- a/ChatBot-Python/package-lock.json
+++ b/ChatBot-Python/package-lock.json
@ -1,6 +0,0 @@
-{
-  "name": "ChatBot-Python",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {}
-}
--- a/ChatBot-Python/requirements.txt
+++ b/ChatBot-Python/requirements.txt
@ -0,0 +1,4 @@
+Flask==2.0.1
+openai==0.11.3
+python-dotenv==0.19.1
+pyodbc==4.0.30
--- a/ChatBot-Python/src/.env
+++ b/ChatBot-Python/src/.env
@ -0,0 +1,6 @@
+OPENAI_API_KEY=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
+DB_SERVER=TECHX-DEV1\SQLENERGYMSDEV # Exemplo: localhost\SQLEXPRESS
+DB_NAME=EnergyMS_CMBarcelos
+DB_USER=sa  # Deixa vazio se usares autenticação Windows
+DB_PASSWORD=EnergyMS+DEV # Deixa vazio se usares autenticação Windows
+#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
--- a/ChatBot-Python/src/pycache/main.cpython-313.pyc
+++ b/ChatBot-Python/src/pycache/main.cpython-313.pyc
--- a/ChatBot-Python/src/chatbot.py
+++ b/ChatBot-Python/src/chatbot.py
@ -0,0 +1,22 @@
+class Chatbot:
+    def __init__(self, db_connector):
+        self.db_connector = db_connector
+
+    def get_database_structure(self):
+        schema = self.db_connector.get_schema()
+        return self.format_schema(schema)
+
+    def format_schema(self, schema):
+        formatted_schema = ""
+        for table in schema:
+            formatted_schema += f"Table: {table['table']}\n"
+            for column in table['columns']:
+                formatted_schema += f"  - Column: {column['name']} (Type: {column['type']})\n"
+            formatted_schema += "\n"
+        return formatted_schema
+
+    def respond_to_query(self, query):
+        if "database structure" in query.lower():
+            return self.get_database_structure()
+        else:
+            return "I'm sorry, I can only provide information about the database structure."
--- a/ChatBot-Python/src/db/init.py
+++ b/ChatBot-Python/src/db/init.py
@ -0,0 +1 @@
+# This file is intentionally left blank.
--- a/ChatBot-Python/src/db/connector.py
+++ b/ChatBot-Python/src/db/connector.py
@ -0,0 +1,17 @@
+class DatabaseConnector:
+    def __init__(self, connection_string):
+        self.connection_string = connection_string
+        self.connection = None
+
+    def connect(self):
+        import pyodbc
+        try:
+            self.connection = pyodbc.connect(self.connection_string)
+            print("Database connection established.")
+        except Exception as e:
+            print(f"Error connecting to the database: {e}")
+
+    def disconnect(self):
+        if self.connection:
+            self.connection.close()
+            print("Database connection closed.")
--- a/ChatBot-Python/src/db/schema.py
+++ b/ChatBot-Python/src/db/schema.py
@ -0,0 +1,25 @@
+class DatabaseSchema:
+    def __init__(self, connector):
+        self.connector = connector
+
+    def get_schema(self):
+        conn = self.connector.connect()
+        if not conn:
+            return "Error: Unable to connect to the database."
+
+        try:
+            cursor = conn.cursor()
+            cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'")
+            tables = cursor.fetchall()
+
+            schema = {}
+            for (table_name,) in tables:
+                cursor.execute(f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = ?", table_name)
+                columns = [column[0] for column in cursor.fetchall()]
+                schema[table_name] = columns
+
+            return schema
+        except Exception as e:
+            return f"Error retrieving schema: {e}"
+        finally:
+            conn.close()
--- a/ChatBot-Python/src/main.py
+++ b/ChatBot-Python/src/main.py
@ -21,6 +21,7 @@ def connect_db():
            f"UID={os.getenv('DB_USER')};"
            f"PWD={os.getenv('DB_PASSWORD')};"
        )
+        print("Conexão com o banco de dados estabelecida com sucesso.")
        return conn
    except Exception as e:
        print(f"Erro ao conectar à base de dados: {e}")
@ -55,6 +56,32 @@ def get_data(atributes=None, limit=20):
    finally:
        conn.close()

+def get_schema_with_examples(limit=10):
+    conn = connect_db()
+    if not conn:
+        return "Erro: Não foi possível conectar à base de dados."
+    try:
+        cursor = conn.cursor()
+        tables = []
+        cursor.execute("SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'")
+        for schema_name, table_name in cursor.fetchall():
+            columns = []
+            cursor.execute(f"SELECT COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = ?", table_name)
+            for col_name, col_type in cursor.fetchall():
+                columns.append({'name': col_name, 'type': col_type})
+            try:
+                cursor.execute(f"SELECT TOP {limit} * FROM [{schema_name}].[{table_name}]")
+                example = cursor.fetchone()
+                example_dict = dict(zip([col['name'] for col in columns], example)) if example else {}
+            except Exception:
+                example_dict = {}
+            tables.append({'table': table_name, 'columns': columns, 'example': example_dict})
+        conn.close()
+        return tables
+    except Exception as e:
+        conn.close()
+        return f"Erro ao buscar schema: {e}"
+    
 def get_filtered_data(cunit_id=None, date_billling_begin=None, date_billing_end=None, limit=2):
    conn = connect_db()
    if not conn:
@ -135,7 +162,7 @@ def chat_with_gpt(prompt, attempts=3):
    for i in range(attempts):
        try:
            response = client.chat.completions.create(
-                model="gpt-3.5-turbo-0125",
+                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=300
            )
@ -478,6 +505,47 @@ def get_total_kwh_by_building_type(building_type=None):
    finally:
        conn.close()

+column_mapping = {
+    "id": "Id",
+    "tipo de energia": "EnergyTypesId",
+    "unidade de consumo": "CUnitId",
+    "tipo de fatura": "CUnitBillsInvoiceTypeId",
+    "tipo de documento": "DocumentTypeId",
+    "contrato eletricidade": "CUnitContractElectId",
+    "número": "Number",
+    "data de receção da fatura": "DateBillReceipt",
+    "data": "Date",
+    "data inicial": "DateBilllingBegin",
+    "data final": "DateBillingEnd",
+    "prazo de pagamento": "PaymentDeadline",
+    "total": "Total",
+    "mb entidade": "MBEnt",
+    "mb referência": "MBRef",
+    "saldo anterior": "PreviousBalance",
+    "saldo anterior dc": "PreviousBalanceDC",
+    "pagamentos efetuados": "PaymentsMade",
+    "saldo de pagamentos efetuados": "PaymentsMadeBalance",
+    "saldo de pagamentos efetuados dc": "PaymentsMadeBalanceDC",
+    "faturado": "Billed",
+    "saldo faturado": "BilledBalance",
+    "saldo faturado dc": "BilledBalanceDC",
+    "saldo atual": "CurrentBalance",
+    "saldo atual dc": "CurrentBalanceDC",
+    "fator de potência": "PowerFactor",
+    "potência tomada": "PotTomada",
+    "total sem iva normal": "TotalExcludingNormalVAT",
+    "total sem iva reduzido": "TotalExcludingReducedVAT",
+    "total iva normal": "TotalNormalVAT",
+    "total iva reduzido": "TotalReducedVAT",
+    "emissão co2": "CO2Emission",
+    "consumo médio período faturação": "AvgConsBillingPeriod",
+    "consumo médio últimos 12m": "AvgConsLast12M",
+    "informação adicional": "AddicionalInfo",
+    "data de revisão": "RevisionDate",
+    "número normalizado": "NormalizedNumber",
+    "data de pagamento": "PaymentDate"
+}
+
 if __name__ == "__main__":
    
    conn = connect_db()
@ -507,6 +575,37 @@ if __name__ == "__main__":
        if user_input.lower() in ["quit", "exit", "bye"]:
            break

+        if any(word in user_input.lower() for word in ["tabela", "coluna", "campos", "estrutura", "schema"]):
+            schema_info = get_schema_with_examples()
+            if isinstance(schema_info, str):
+                print(f"Chatbot: {schema_info}")
+            else:
+                if re.search(r"tabelas|todas as tabelas", user_input.lower()):
+                    nomes = [t['table'] for t in schema_info]
+                    print(f"Chatbot: As tabelas disponíveis são: {', '.join(nomes)}")
+                else:
+                    for t in schema_info:
+                        if t['table'].lower() in user_input.lower():
+                            colunas = ", ".join([
+                                next((k for k, v in column_mapping.items() if v == c['name']), c['name'])
+                                for c in t['columns']
+                            ])
+                            print(f"Chatbot: A tabela '{t['table']}' tem as colunas: {colunas}.")
+                            if t['example']:
+                                exemplo_traduzido = {
+                                    next((k for k, v in column_mapping.items() if v == k or v == k or v == col), col): val
+                                    for col, val in t['example'].items()
+                                }
+                                exemplo_traduzido = {
+                                    next((k for k, v in column_mapping.items() if v == col), col): val
+                                    for col, val in t['example'].items()
+                                }
+                                print(f"Exemplo de linha: {exemplo_traduzido}")
+                            break
+                    else:
+                        print("Chatbot: Não encontrei essa tabela. Pergunte por outra ou peça 'tabelas' para ver todas.")
+            continue
+
        cunit_id, date_billling_begin, date_billing_end, total_requested = parse_user_input(user_input)

        if total_requested and cunit_id:
--- a/ChatBot-Python/src/server.py
+++ b/ChatBot-Python/src/server.py
@ -1,13 +1,16 @@
 from flask import Flask, request, jsonify
 from flask_cors import CORS
+import json
+import os
 from main import (
    chat_with_gpt, parse_user_input, get_total_by_cunit, get_filtered_data,
    get_price_comparison, compare_current_vs_previous_year, get_top_consumers,
-    compare_kwh_current_vs_previous_year, get_invoices_by_month_year,
+    compare_kwh_current_vs_previous_year, get_invoices_by_month_year,get_schema_with_examples,
    get_invoices_from_inactive_units, get_total_kwh_by_building_type, get_data
 )
 import re
 from datetime import datetime
+import pyodbc  

 app = Flask(__name__)
 CORS(app)
@ -17,10 +20,103 @@ month_map = {
    "julho": 7, "agosto": 8, "setembro": 9, "outubro": 10, "novembro": 11, "dezembro": 12
 }

+column_mapping = {
+    "id": "Id",
+    "tipo de energia": "EnergyTypesId",
+    "unidade de consumo": "CUnitId",
+    "tipo de fatura": "CUnitBillsInvoiceTypeId",
+    "tipo de documento": "DocumentTypeId",
+    "contrato eletricidade": "CUnitContractElectId",
+    "número": "Number",
+    "data de receção da fatura": "DateBillReceipt",
+    "data": "Date",
+    "data inicial": "DateBilllingBegin",
+    "data final": "DateBillingEnd",
+    "prazo de pagamento": "PaymentDeadline",
+    "total": "Total",
+    "mb entidade": "MBEnt",
+    "mb referência": "MBRef",
+    "saldo anterior": "PreviousBalance",
+    "saldo anterior dc": "PreviousBalanceDC",
+    "pagamentos efetuados": "PaymentsMade",
+    "saldo de pagamentos efetuados": "PaymentsMadeBalance",
+    "saldo de pagamentos efetuados dc": "PaymentsMadeBalanceDC",
+    "faturado": "Billed",
+    "saldo faturado": "BilledBalance",
+    "saldo faturado dc": "BilledBalanceDC",
+    "saldo atual": "CurrentBalance",
+    "saldo atual dc": "CurrentBalanceDC",
+    "fator de potência": "PowerFactor",
+    "potência tomada": "PotTomada",
+    "total sem iva normal": "TotalExcludingNormalVAT",
+    "total sem iva reduzido": "TotalExcludingReducedVAT",
+    "total iva normal": "TotalNormalVAT",
+    "total iva reduzido": "TotalReducedVAT",
+    "emissão co2": "CO2Emission",
+    "consumo médio período faturação": "AvgConsBillingPeriod",
+    "consumo médio últimos 12m": "AvgConsLast12M",
+    "informação adicional": "AddicionalInfo",
+    "data de revisão": "RevisionDate",
+    "número normalizado": "NormalizedNumber",
+    "data de pagamento": "PaymentDate"
+}
+
+def get_db_schema():
+    conn = pyodbc.connect('DRIVER={SQL Server};SERVER=SEU_SERVIDOR;DATABASE=SEU_BANCO;UID=USUARIO;PWD=SENHA')
+    cursor = conn.cursor()
+    tables = []
+    for row in cursor.tables(tableType='TABLE'):
+        table_name = row.table_name
+        columns = []
+        for col in cursor.columns(table=table_name):
+            columns.append({'name': col.column_name, 'type': col.type_name})
+        tables.append({'table': table_name, 'columns': columns})
+    conn.close()
+    return tables
+
+def save_schema_to_file(schema, mapping, folder='schema'):
+    os.makedirs(folder, exist_ok=True)
+    with open(os.path.join(folder, 'schema.json'), 'w', encoding='utf-8') as f:
+        json.dump({'schema': schema, 'mapping': mapping}, f, ensure_ascii=False, indent=2)
+
+def answer_schema_question(question):
+    schema = get_schema_with_examples()
+    question = question.lower()
+    if "tabelas" in question:
+        return "As tabelas disponíveis são: " + ", ".join([t['table'] for t in schema])
+    for t in schema:
+        if t['table'].lower() in question:
+            cols = ", ".join([c['name'] for c in t['columns']])
+            return f"A tabela {t['table']} tem as colunas: {cols}.\nExemplo: {t['example']}"
+    return "Não consegui encontrar informação sobre essa tabela ou coluna."
+
+@app.route('/api/schema-exemplo', methods=['GET'])
+def schema_exemplo():
+    schema_info = get_schema_with_examples()
+    return jsonify({'schema': schema_info})
+
+@app.route('/api/schema', methods=['GET'])
+def schema():
+    schema_info = get_db_schema()
+    save_schema_to_file(schema_info, column_mapping)
+    return jsonify({'schema': schema_info, 'mapping': column_mapping})
+
@app.route('/api/chat', methods=['POST'])
 def chat():
    data = request.json
-    user_input = data.get('message', '')
+    user_input = data.get('message', '').lower()
+    
+    if "tabela" in user_input or "coluna" in user_input:
+        resposta = answer_schema_question(user_input)
+        return jsonify({'reply': resposta})
+    
+    if "colunas" in user_input and ("fatura" in user_input or "cunitbills" in user_input):
+        schema_info = get_db_schema()
+        for table in schema_info:
+            if table['table'].lower() == 'cunitbills':
+                colunas = [col['name'] for col in table['columns']]
+                return jsonify({'reply': "As colunas da tabela de faturas são:\n" + ", ".join(colunas)})
+        return jsonify({'reply': "Tabela de faturas não encontrada no banco de dados."})

    cunit_id, date_billling_begin, date_billing_end, total_requested = parse_user_input(user_input)

--- a/ChatBot-Python/src/utils/helpers.py
+++ b/ChatBot-Python/src/utils/helpers.py
@ -0,0 +1,13 @@
+def format_response(data):
+    formatted = ""
+    for table in data:
+        formatted += f"**Table:** {table['name']}\n"
+        formatted += "Columns:\n"
+        for column in table['columns']:
+            formatted += f"- {column['name']} ({column['type']})\n"
+        formatted += "\n"
+    return formatted
+
+def parse_user_input(user_input):
+    # This function can be expanded to include more complex parsing logic
+    return user_input.strip()