ChatBot - A funcionar com a estrutura mas com erros na tradução(Commit corrigido)

This commit is contained in:
Ricardo Cunha 2025-06-12 14:31:18 +01:00
parent c9ff11b62a
commit a44bdc5fce
16 changed files with 360 additions and 124 deletions

View File

@ -1,5 +0,0 @@
OPENAI_API_KEY=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
DB_SERVER=TECHX-DEV1\SQLENERGYMSDEV # Exemplo: localhost\SQLEXPRESS
DB_NAME=EnergyMS_CMBarcelos
DB_USER=sa # Deixa vazio se usares autenticação Windows
DB_PASSWORD=EnergyMS+DEV # Deixa vazio se usares autenticação Windows

View File

@ -0,0 +1,4 @@
DATABASE_URL=your_database_url_here
DATABASE_USER=your_database_user_here
DATABASE_PASSWORD=your_database_password_here
OPENAI_API_KEY=your_openai_api_key_here

70
ChatBot-Python/README.md Normal file
View File

@ -0,0 +1,70 @@
# Chatbot Database Structure
This project implements a chatbot that can interact with users and provide information about the structure of a database. The chatbot is designed to be easy to use and requires minimal coding knowledge to operate.
## Project Structure
```
chatbot-db-structure
├── src
│ ├── main.py # Entry point of the application
│ ├── chatbot.py # Contains the Chatbot class for managing conversations
│ ├── db
│ │ ├── __init__.py # Initializes the db package
│ │ ├── connector.py # Handles database connection
│ │ └── schema.py # Retrieves and formats the database schema
│ └── utils
│ └── helpers.py # Utility functions for various tasks
├── requirements.txt # Lists project dependencies
├── .env.example # Example environment variables
└── README.md # Documentation for the project
```
## Setup Instructions
1. Clone the repository:
```
git clone <repository-url>
cd chatbot-db-structure
```
2. Create a virtual environment:
```
python -m venv venv
```
3. Activate the virtual environment:
- On Windows:
```
venv\Scripts\activate
```
- On macOS/Linux:
```
source venv/bin/activate
```
4. Install the required dependencies:
```
pip install -r requirements.txt
```
5. Configure your environment variables by copying `.env.example` to `.env` and filling in the necessary values.
## Usage
To start the chatbot, run the following command:
```
python src/main.py
```
Once the chatbot is running, you can ask questions about the database structure, and it will respond with relevant information.
## Capabilities
- Provides information about the database schema, including tables and their columns.
- Allows users to interact naturally and receive structured responses.
- Designed to be extensible for future enhancements and additional features.
## Contributing
Contributions are welcome! Please feel free to submit a pull request or open an issue for any suggestions or improvements.

View File

@ -1,97 +0,0 @@
import fitz
import os
from openai import OpenAI
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
def extract_text_from_pdf(pdf_path):
text = ""
with fitz.open(pdf_path) as doc:
for page in doc:
text += page.get_text("text") + "\n"
return text
def criarChunk(texto):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
length_function=len
)
return text_splitter.split_text(texto)
def create_faiss_index(chunks, embeddings):
vector_store = FAISS.from_texts(chunks, embeddings) # Criar índice FAISS
return vector_store
def search_faiss(vector_store, query, embeddings, top_k=3):
query_embedding = embeddings.embed_query(query) # Gerar embedding da pergunta
docs = vector_store.similarity_search(query, k=top_k) # Procurar no FAISS
return docs
def debug_embeddings(chunks, embeddings):
embeddings_list = embeddings.embed_documents(chunks)
print(f"\n DEBUG: Embeddings Gerados")
print(f"Número total de chunks: {len(chunks)}")
print(f"Número total de embeddings: {len(embeddings_list)}")
if embeddings_list:
print(f"Tamanho do primeiro embedding: {len(embeddings_list[0])}")
print("\n Exemplo de Chunk e seu Embedding:")
print(f"Chunk: {chunks[0]}")
print(f"Embedding (primeiros 10 valores): {embeddings_list[0][:10]}")
def debug_faiss(vector_store, query, embeddings, top_k=3):
query_embedding = embeddings.embed_query(query)
print(f"\n DEBUG: Tamanho do vetor da pergunta: {len(query_embedding)}")
docs = vector_store.similarity_search(query, k=top_k)
print("\n DEBUG: Resultados da busca FAISS")
print(f"Número de chunks retornados: {len(docs)}")
for i, doc in enumerate(docs):
print(f"\n Chunk {i+1}:")
print(doc.page_content[:200]) # Mostra os primeiros 200 caracteres do chunk
def generate_response(query, vector_store, embeddings):
docs = search_faiss(vector_store, query, embeddings)
context = "\n".join([doc.page_content for doc in docs])
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Use o contexto abaixo para responder."},
{"role": "system", "content": context},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
pdf_file = "teste.pdf"
texto_extraido = extract_text_from_pdf(pdf_file)
chunks = criarChunk(texto_extraido)
embeddings = OpenAIEmbeddings()
debug_embeddings(chunks, embeddings)
vector_store = create_faiss_index(chunks, embeddings)
debug_faiss(vector_store, "Exemplo de pesquisa", embeddings)
print("Chatbot: Olá! Como te posso ajudar?")
while True:
user_input = input("Você: ")
if user_input.lower() in ["sair", "exit", "quit"]:
print("Chatbot: Até logo!")
break
debug_faiss(vector_store, user_input, embeddings)
resposta = generate_response(user_input, vector_store, embeddings)
print("Chatbot:", resposta)

View File

@ -1,13 +0,0 @@
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
try:
response = openai.models.list()
print("API funciona!")
except openai.OpenAIError as e:
print("Erro ao conectar a API:", e)

View File

@ -1,6 +0,0 @@
{
"name": "ChatBot-Python",
"lockfileVersion": 3,
"requires": true,
"packages": {}
}

View File

@ -0,0 +1,4 @@
Flask==2.0.1
openai==0.11.3
python-dotenv==0.19.1
pyodbc==4.0.30

6
ChatBot-Python/src/.env Normal file
View File

@ -0,0 +1,6 @@
OPENAI_API_KEY=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
DB_SERVER=TECHX-DEV1\SQLENERGYMSDEV # Exemplo: localhost\SQLEXPRESS
DB_NAME=EnergyMS_CMBarcelos
DB_USER=sa # Deixa vazio se usares autenticação Windows
DB_PASSWORD=EnergyMS+DEV # Deixa vazio se usares autenticação Windows
#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

View File

@ -0,0 +1,22 @@
class Chatbot:
def __init__(self, db_connector):
self.db_connector = db_connector
def get_database_structure(self):
schema = self.db_connector.get_schema()
return self.format_schema(schema)
def format_schema(self, schema):
formatted_schema = ""
for table in schema:
formatted_schema += f"Table: {table['table']}\n"
for column in table['columns']:
formatted_schema += f" - Column: {column['name']} (Type: {column['type']})\n"
formatted_schema += "\n"
return formatted_schema
def respond_to_query(self, query):
if "database structure" in query.lower():
return self.get_database_structure()
else:
return "I'm sorry, I can only provide information about the database structure."

View File

@ -0,0 +1 @@
# This file is intentionally left blank.

View File

@ -0,0 +1,17 @@
class DatabaseConnector:
def __init__(self, connection_string):
self.connection_string = connection_string
self.connection = None
def connect(self):
import pyodbc
try:
self.connection = pyodbc.connect(self.connection_string)
print("Database connection established.")
except Exception as e:
print(f"Error connecting to the database: {e}")
def disconnect(self):
if self.connection:
self.connection.close()
print("Database connection closed.")

View File

@ -0,0 +1,25 @@
class DatabaseSchema:
def __init__(self, connector):
self.connector = connector
def get_schema(self):
conn = self.connector.connect()
if not conn:
return "Error: Unable to connect to the database."
try:
cursor = conn.cursor()
cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'")
tables = cursor.fetchall()
schema = {}
for (table_name,) in tables:
cursor.execute(f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = ?", table_name)
columns = [column[0] for column in cursor.fetchall()]
schema[table_name] = columns
return schema
except Exception as e:
return f"Error retrieving schema: {e}"
finally:
conn.close()

View File

@ -21,6 +21,7 @@ def connect_db():
f"UID={os.getenv('DB_USER')};"
f"PWD={os.getenv('DB_PASSWORD')};"
)
print("Conexão com o banco de dados estabelecida com sucesso.")
return conn
except Exception as e:
print(f"Erro ao conectar à base de dados: {e}")
@ -55,6 +56,32 @@ def get_data(atributes=None, limit=20):
finally:
conn.close()
def get_schema_with_examples(limit=10):
conn = connect_db()
if not conn:
return "Erro: Não foi possível conectar à base de dados."
try:
cursor = conn.cursor()
tables = []
cursor.execute("SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'")
for schema_name, table_name in cursor.fetchall():
columns = []
cursor.execute(f"SELECT COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = ?", table_name)
for col_name, col_type in cursor.fetchall():
columns.append({'name': col_name, 'type': col_type})
try:
cursor.execute(f"SELECT TOP {limit} * FROM [{schema_name}].[{table_name}]")
example = cursor.fetchone()
example_dict = dict(zip([col['name'] for col in columns], example)) if example else {}
except Exception:
example_dict = {}
tables.append({'table': table_name, 'columns': columns, 'example': example_dict})
conn.close()
return tables
except Exception as e:
conn.close()
return f"Erro ao buscar schema: {e}"
def get_filtered_data(cunit_id=None, date_billling_begin=None, date_billing_end=None, limit=2):
conn = connect_db()
if not conn:
@ -135,7 +162,7 @@ def chat_with_gpt(prompt, attempts=3):
for i in range(attempts):
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo-0125",
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
max_tokens=300
)
@ -478,6 +505,47 @@ def get_total_kwh_by_building_type(building_type=None):
finally:
conn.close()
column_mapping = {
"id": "Id",
"tipo de energia": "EnergyTypesId",
"unidade de consumo": "CUnitId",
"tipo de fatura": "CUnitBillsInvoiceTypeId",
"tipo de documento": "DocumentTypeId",
"contrato eletricidade": "CUnitContractElectId",
"número": "Number",
"data de receção da fatura": "DateBillReceipt",
"data": "Date",
"data inicial": "DateBilllingBegin",
"data final": "DateBillingEnd",
"prazo de pagamento": "PaymentDeadline",
"total": "Total",
"mb entidade": "MBEnt",
"mb referência": "MBRef",
"saldo anterior": "PreviousBalance",
"saldo anterior dc": "PreviousBalanceDC",
"pagamentos efetuados": "PaymentsMade",
"saldo de pagamentos efetuados": "PaymentsMadeBalance",
"saldo de pagamentos efetuados dc": "PaymentsMadeBalanceDC",
"faturado": "Billed",
"saldo faturado": "BilledBalance",
"saldo faturado dc": "BilledBalanceDC",
"saldo atual": "CurrentBalance",
"saldo atual dc": "CurrentBalanceDC",
"fator de potência": "PowerFactor",
"potência tomada": "PotTomada",
"total sem iva normal": "TotalExcludingNormalVAT",
"total sem iva reduzido": "TotalExcludingReducedVAT",
"total iva normal": "TotalNormalVAT",
"total iva reduzido": "TotalReducedVAT",
"emissão co2": "CO2Emission",
"consumo médio período faturação": "AvgConsBillingPeriod",
"consumo médio últimos 12m": "AvgConsLast12M",
"informação adicional": "AddicionalInfo",
"data de revisão": "RevisionDate",
"número normalizado": "NormalizedNumber",
"data de pagamento": "PaymentDate"
}
if __name__ == "__main__":
conn = connect_db()
@ -507,6 +575,37 @@ if __name__ == "__main__":
if user_input.lower() in ["quit", "exit", "bye"]:
break
if any(word in user_input.lower() for word in ["tabela", "coluna", "campos", "estrutura", "schema"]):
schema_info = get_schema_with_examples()
if isinstance(schema_info, str):
print(f"Chatbot: {schema_info}")
else:
if re.search(r"tabelas|todas as tabelas", user_input.lower()):
nomes = [t['table'] for t in schema_info]
print(f"Chatbot: As tabelas disponíveis são: {', '.join(nomes)}")
else:
for t in schema_info:
if t['table'].lower() in user_input.lower():
colunas = ", ".join([
next((k for k, v in column_mapping.items() if v == c['name']), c['name'])
for c in t['columns']
])
print(f"Chatbot: A tabela '{t['table']}' tem as colunas: {colunas}.")
if t['example']:
exemplo_traduzido = {
next((k for k, v in column_mapping.items() if v == k or v == k or v == col), col): val
for col, val in t['example'].items()
}
exemplo_traduzido = {
next((k for k, v in column_mapping.items() if v == col), col): val
for col, val in t['example'].items()
}
print(f"Exemplo de linha: {exemplo_traduzido}")
break
else:
print("Chatbot: Não encontrei essa tabela. Pergunte por outra ou peça 'tabelas' para ver todas.")
continue
cunit_id, date_billling_begin, date_billing_end, total_requested = parse_user_input(user_input)
if total_requested and cunit_id:

View File

@ -1,13 +1,16 @@
from flask import Flask, request, jsonify
from flask_cors import CORS
import json
import os
from main import (
chat_with_gpt, parse_user_input, get_total_by_cunit, get_filtered_data,
get_price_comparison, compare_current_vs_previous_year, get_top_consumers,
compare_kwh_current_vs_previous_year, get_invoices_by_month_year,
compare_kwh_current_vs_previous_year, get_invoices_by_month_year,get_schema_with_examples,
get_invoices_from_inactive_units, get_total_kwh_by_building_type, get_data
)
import re
from datetime import datetime
import pyodbc
app = Flask(__name__)
CORS(app)
@ -17,10 +20,103 @@ month_map = {
"julho": 7, "agosto": 8, "setembro": 9, "outubro": 10, "novembro": 11, "dezembro": 12
}
column_mapping = {
"id": "Id",
"tipo de energia": "EnergyTypesId",
"unidade de consumo": "CUnitId",
"tipo de fatura": "CUnitBillsInvoiceTypeId",
"tipo de documento": "DocumentTypeId",
"contrato eletricidade": "CUnitContractElectId",
"número": "Number",
"data de receção da fatura": "DateBillReceipt",
"data": "Date",
"data inicial": "DateBilllingBegin",
"data final": "DateBillingEnd",
"prazo de pagamento": "PaymentDeadline",
"total": "Total",
"mb entidade": "MBEnt",
"mb referência": "MBRef",
"saldo anterior": "PreviousBalance",
"saldo anterior dc": "PreviousBalanceDC",
"pagamentos efetuados": "PaymentsMade",
"saldo de pagamentos efetuados": "PaymentsMadeBalance",
"saldo de pagamentos efetuados dc": "PaymentsMadeBalanceDC",
"faturado": "Billed",
"saldo faturado": "BilledBalance",
"saldo faturado dc": "BilledBalanceDC",
"saldo atual": "CurrentBalance",
"saldo atual dc": "CurrentBalanceDC",
"fator de potência": "PowerFactor",
"potência tomada": "PotTomada",
"total sem iva normal": "TotalExcludingNormalVAT",
"total sem iva reduzido": "TotalExcludingReducedVAT",
"total iva normal": "TotalNormalVAT",
"total iva reduzido": "TotalReducedVAT",
"emissão co2": "CO2Emission",
"consumo médio período faturação": "AvgConsBillingPeriod",
"consumo médio últimos 12m": "AvgConsLast12M",
"informação adicional": "AddicionalInfo",
"data de revisão": "RevisionDate",
"número normalizado": "NormalizedNumber",
"data de pagamento": "PaymentDate"
}
def get_db_schema():
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=SEU_SERVIDOR;DATABASE=SEU_BANCO;UID=USUARIO;PWD=SENHA')
cursor = conn.cursor()
tables = []
for row in cursor.tables(tableType='TABLE'):
table_name = row.table_name
columns = []
for col in cursor.columns(table=table_name):
columns.append({'name': col.column_name, 'type': col.type_name})
tables.append({'table': table_name, 'columns': columns})
conn.close()
return tables
def save_schema_to_file(schema, mapping, folder='schema'):
os.makedirs(folder, exist_ok=True)
with open(os.path.join(folder, 'schema.json'), 'w', encoding='utf-8') as f:
json.dump({'schema': schema, 'mapping': mapping}, f, ensure_ascii=False, indent=2)
def answer_schema_question(question):
schema = get_schema_with_examples()
question = question.lower()
if "tabelas" in question:
return "As tabelas disponíveis são: " + ", ".join([t['table'] for t in schema])
for t in schema:
if t['table'].lower() in question:
cols = ", ".join([c['name'] for c in t['columns']])
return f"A tabela {t['table']} tem as colunas: {cols}.\nExemplo: {t['example']}"
return "Não consegui encontrar informação sobre essa tabela ou coluna."
@app.route('/api/schema-exemplo', methods=['GET'])
def schema_exemplo():
schema_info = get_schema_with_examples()
return jsonify({'schema': schema_info})
@app.route('/api/schema', methods=['GET'])
def schema():
schema_info = get_db_schema()
save_schema_to_file(schema_info, column_mapping)
return jsonify({'schema': schema_info, 'mapping': column_mapping})
@app.route('/api/chat', methods=['POST'])
def chat():
data = request.json
user_input = data.get('message', '')
user_input = data.get('message', '').lower()
if "tabela" in user_input or "coluna" in user_input:
resposta = answer_schema_question(user_input)
return jsonify({'reply': resposta})
if "colunas" in user_input and ("fatura" in user_input or "cunitbills" in user_input):
schema_info = get_db_schema()
for table in schema_info:
if table['table'].lower() == 'cunitbills':
colunas = [col['name'] for col in table['columns']]
return jsonify({'reply': "As colunas da tabela de faturas são:\n" + ", ".join(colunas)})
return jsonify({'reply': "Tabela de faturas não encontrada no banco de dados."})
cunit_id, date_billling_begin, date_billing_end, total_requested = parse_user_input(user_input)

View File

@ -0,0 +1,13 @@
def format_response(data):
formatted = ""
for table in data:
formatted += f"**Table:** {table['name']}\n"
formatted += "Columns:\n"
for column in table['columns']:
formatted += f"- {column['name']} ({column['type']})\n"
formatted += "\n"
return formatted
def parse_user_input(user_input):
# This function can be expanded to include more complex parsing logic
return user_input.strip()