diff --git a/ChatBot-Python/.env b/ChatBot-Python/.env deleted file mode 100644 index 55744ec..0000000 --- a/ChatBot-Python/.env +++ /dev/null @@ -1,5 +0,0 @@ -OPENAI_API_KEY=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 -DB_SERVER=TECHX-DEV1\SQLENERGYMSDEV # Exemplo: localhost\SQLEXPRESS -DB_NAME=EnergyMS_CMBarcelos -DB_USER=sa # Deixa vazio se usares autenticação Windows -DB_PASSWORD=EnergyMS+DEV # Deixa vazio se usares autenticação Windows \ No newline at end of file diff --git a/ChatBot-Python/.env.example b/ChatBot-Python/.env.example new file mode 100644 index 0000000..c560e34 --- /dev/null +++ b/ChatBot-Python/.env.example @@ -0,0 +1,4 @@ +DATABASE_URL=your_database_url_here +DATABASE_USER=your_database_user_here +DATABASE_PASSWORD=your_database_password_here +OPENAI_API_KEY=your_openai_api_key_here \ No newline at end of file diff --git a/ChatBot-Python/README.md b/ChatBot-Python/README.md new file mode 100644 index 0000000..95e2c83 --- /dev/null +++ b/ChatBot-Python/README.md @@ -0,0 +1,70 @@ +# Chatbot Database Structure + +This project implements a chatbot that can interact with users and provide information about the structure of a database. The chatbot is designed to be easy to use and requires minimal coding knowledge to operate. + +## Project Structure + +``` +chatbot-db-structure +├── src +│ ├── main.py # Entry point of the application +│ ├── chatbot.py # Contains the Chatbot class for managing conversations +│ ├── db +│ │ ├── __init__.py # Initializes the db package +│ │ ├── connector.py # Handles database connection +│ │ └── schema.py # Retrieves and formats the database schema +│ └── utils +│ └── helpers.py # Utility functions for various tasks +├── requirements.txt # Lists project dependencies +├── .env.example # Example environment variables +└── README.md # Documentation for the project +``` + +## Setup Instructions + +1. Clone the repository: + ``` + git clone + cd chatbot-db-structure + ``` + +2. Create a virtual environment: + ``` + python -m venv venv + ``` + +3. Activate the virtual environment: + - On Windows: + ``` + venv\Scripts\activate + ``` + - On macOS/Linux: + ``` + source venv/bin/activate + ``` + +4. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` + +5. Configure your environment variables by copying `.env.example` to `.env` and filling in the necessary values. + +## Usage + +To start the chatbot, run the following command: +``` +python src/main.py +``` + +Once the chatbot is running, you can ask questions about the database structure, and it will respond with relevant information. + +## Capabilities + +- Provides information about the database schema, including tables and their columns. +- Allows users to interact naturally and receive structured responses. +- Designed to be extensible for future enhancements and additional features. + +## Contributing + +Contributions are welcome! Please feel free to submit a pull request or open an issue for any suggestions or improvements. \ No newline at end of file diff --git a/ChatBot-Python/debug.py b/ChatBot-Python/debug.py deleted file mode 100644 index e2ef657..0000000 --- a/ChatBot-Python/debug.py +++ /dev/null @@ -1,97 +0,0 @@ -import fitz -import os -from openai import OpenAI -from dotenv import load_dotenv -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_openai import OpenAIEmbeddings -from langchain_community.vectorstores import FAISS - -load_dotenv() -api_key = os.getenv("OPENAI_API_KEY") -client = OpenAI(api_key=api_key) - -def extract_text_from_pdf(pdf_path): - text = "" - with fitz.open(pdf_path) as doc: - for page in doc: - text += page.get_text("text") + "\n" - return text - -def criarChunk(texto): - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=500, - chunk_overlap=50, - length_function=len - ) - return text_splitter.split_text(texto) - -def create_faiss_index(chunks, embeddings): - vector_store = FAISS.from_texts(chunks, embeddings) # Criar índice FAISS - return vector_store - -def search_faiss(vector_store, query, embeddings, top_k=3): - query_embedding = embeddings.embed_query(query) # Gerar embedding da pergunta - docs = vector_store.similarity_search(query, k=top_k) # Procurar no FAISS - return docs - -def debug_embeddings(chunks, embeddings): - embeddings_list = embeddings.embed_documents(chunks) - - print(f"\n DEBUG: Embeddings Gerados") - print(f"Número total de chunks: {len(chunks)}") - print(f"Número total de embeddings: {len(embeddings_list)}") - - if embeddings_list: - print(f"Tamanho do primeiro embedding: {len(embeddings_list[0])}") - - print("\n Exemplo de Chunk e seu Embedding:") - print(f"Chunk: {chunks[0]}") - print(f"Embedding (primeiros 10 valores): {embeddings_list[0][:10]}") - -def debug_faiss(vector_store, query, embeddings, top_k=3): - query_embedding = embeddings.embed_query(query) - print(f"\n DEBUG: Tamanho do vetor da pergunta: {len(query_embedding)}") - - docs = vector_store.similarity_search(query, k=top_k) - print("\n DEBUG: Resultados da busca FAISS") - print(f"Número de chunks retornados: {len(docs)}") - - for i, doc in enumerate(docs): - print(f"\n Chunk {i+1}:") - print(doc.page_content[:200]) # Mostra os primeiros 200 caracteres do chunk - -def generate_response(query, vector_store, embeddings): - docs = search_faiss(vector_store, query, embeddings) - context = "\n".join([doc.page_content for doc in docs]) - - response = client.chat.completions.create( - model="gpt-3.5-turbo", - messages=[ - {"role": "system", "content": "Use o contexto abaixo para responder."}, - {"role": "system", "content": context}, - {"role": "user", "content": query} - ] - ) - return response.choices[0].message.content - -pdf_file = "teste.pdf" -texto_extraido = extract_text_from_pdf(pdf_file) -chunks = criarChunk(texto_extraido) - -embeddings = OpenAIEmbeddings() - -debug_embeddings(chunks, embeddings) - -vector_store = create_faiss_index(chunks, embeddings) -debug_faiss(vector_store, "Exemplo de pesquisa", embeddings) - -print("Chatbot: Olá! Como te posso ajudar?") -while True: - user_input = input("Você: ") - if user_input.lower() in ["sair", "exit", "quit"]: - print("Chatbot: Até logo!") - break - - debug_faiss(vector_store, user_input, embeddings) - resposta = generate_response(user_input, vector_store, embeddings) - print("Chatbot:", resposta) diff --git a/ChatBot-Python/debugAPI.py b/ChatBot-Python/debugAPI.py deleted file mode 100644 index 3163b3b..0000000 --- a/ChatBot-Python/debugAPI.py +++ /dev/null @@ -1,13 +0,0 @@ -import openai -import os -from dotenv import load_dotenv - -load_dotenv() - -openai.api_key = os.getenv("OPENAI_API_KEY") - -try: - response = openai.models.list() - print("API funciona!") -except openai.OpenAIError as e: - print("Erro ao conectar a API:", e) diff --git a/ChatBot-Python/package-lock.json b/ChatBot-Python/package-lock.json deleted file mode 100644 index 4c1e26c..0000000 --- a/ChatBot-Python/package-lock.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "ChatBot-Python", - "lockfileVersion": 3, - "requires": true, - "packages": {} -} diff --git a/ChatBot-Python/requirements.txt b/ChatBot-Python/requirements.txt new file mode 100644 index 0000000..7eec745 --- /dev/null +++ b/ChatBot-Python/requirements.txt @@ -0,0 +1,4 @@ +Flask==2.0.1 +openai==0.11.3 +python-dotenv==0.19.1 +pyodbc==4.0.30 \ No newline at end of file diff --git a/ChatBot-Python/src/.env b/ChatBot-Python/src/.env new file mode 100644 index 0000000..3776dcc --- /dev/null +++ b/ChatBot-Python/src/.env @@ -0,0 +1,6 @@ +OPENAI_API_KEY=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 +DB_SERVER=TECHX-DEV1\SQLENERGYMSDEV # Exemplo: localhost\SQLEXPRESS +DB_NAME=EnergyMS_CMBarcelos +DB_USER=sa # Deixa vazio se usares autenticação Windows +DB_PASSWORD=EnergyMS+DEV # Deixa vazio se usares autenticação Windows +#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 \ No newline at end of file diff --git a/ChatBot-Python/__pycache__/main.cpython-313.pyc b/ChatBot-Python/src/__pycache__/main.cpython-313.pyc similarity index 54% rename from ChatBot-Python/__pycache__/main.cpython-313.pyc rename to ChatBot-Python/src/__pycache__/main.cpython-313.pyc index e03822c..700110c 100644 Binary files a/ChatBot-Python/__pycache__/main.cpython-313.pyc and b/ChatBot-Python/src/__pycache__/main.cpython-313.pyc differ diff --git a/ChatBot-Python/src/chatbot.py b/ChatBot-Python/src/chatbot.py new file mode 100644 index 0000000..ee307bb --- /dev/null +++ b/ChatBot-Python/src/chatbot.py @@ -0,0 +1,22 @@ +class Chatbot: + def __init__(self, db_connector): + self.db_connector = db_connector + + def get_database_structure(self): + schema = self.db_connector.get_schema() + return self.format_schema(schema) + + def format_schema(self, schema): + formatted_schema = "" + for table in schema: + formatted_schema += f"Table: {table['table']}\n" + for column in table['columns']: + formatted_schema += f" - Column: {column['name']} (Type: {column['type']})\n" + formatted_schema += "\n" + return formatted_schema + + def respond_to_query(self, query): + if "database structure" in query.lower(): + return self.get_database_structure() + else: + return "I'm sorry, I can only provide information about the database structure." \ No newline at end of file diff --git a/ChatBot-Python/src/db/__init__.py b/ChatBot-Python/src/db/__init__.py new file mode 100644 index 0000000..82789f2 --- /dev/null +++ b/ChatBot-Python/src/db/__init__.py @@ -0,0 +1 @@ +# This file is intentionally left blank. \ No newline at end of file diff --git a/ChatBot-Python/src/db/connector.py b/ChatBot-Python/src/db/connector.py new file mode 100644 index 0000000..b7543f1 --- /dev/null +++ b/ChatBot-Python/src/db/connector.py @@ -0,0 +1,17 @@ +class DatabaseConnector: + def __init__(self, connection_string): + self.connection_string = connection_string + self.connection = None + + def connect(self): + import pyodbc + try: + self.connection = pyodbc.connect(self.connection_string) + print("Database connection established.") + except Exception as e: + print(f"Error connecting to the database: {e}") + + def disconnect(self): + if self.connection: + self.connection.close() + print("Database connection closed.") \ No newline at end of file diff --git a/ChatBot-Python/src/db/schema.py b/ChatBot-Python/src/db/schema.py new file mode 100644 index 0000000..0f561c5 --- /dev/null +++ b/ChatBot-Python/src/db/schema.py @@ -0,0 +1,25 @@ +class DatabaseSchema: + def __init__(self, connector): + self.connector = connector + + def get_schema(self): + conn = self.connector.connect() + if not conn: + return "Error: Unable to connect to the database." + + try: + cursor = conn.cursor() + cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'") + tables = cursor.fetchall() + + schema = {} + for (table_name,) in tables: + cursor.execute(f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = ?", table_name) + columns = [column[0] for column in cursor.fetchall()] + schema[table_name] = columns + + return schema + except Exception as e: + return f"Error retrieving schema: {e}" + finally: + conn.close() \ No newline at end of file diff --git a/ChatBot-Python/main.py b/ChatBot-Python/src/main.py similarity index 83% rename from ChatBot-Python/main.py rename to ChatBot-Python/src/main.py index bac07a5..9f648da 100644 --- a/ChatBot-Python/main.py +++ b/ChatBot-Python/src/main.py @@ -21,6 +21,7 @@ def connect_db(): f"UID={os.getenv('DB_USER')};" f"PWD={os.getenv('DB_PASSWORD')};" ) + print("Conexão com o banco de dados estabelecida com sucesso.") return conn except Exception as e: print(f"Erro ao conectar à base de dados: {e}") @@ -55,6 +56,32 @@ def get_data(atributes=None, limit=20): finally: conn.close() +def get_schema_with_examples(limit=10): + conn = connect_db() + if not conn: + return "Erro: Não foi possível conectar à base de dados." + try: + cursor = conn.cursor() + tables = [] + cursor.execute("SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'") + for schema_name, table_name in cursor.fetchall(): + columns = [] + cursor.execute(f"SELECT COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = ?", table_name) + for col_name, col_type in cursor.fetchall(): + columns.append({'name': col_name, 'type': col_type}) + try: + cursor.execute(f"SELECT TOP {limit} * FROM [{schema_name}].[{table_name}]") + example = cursor.fetchone() + example_dict = dict(zip([col['name'] for col in columns], example)) if example else {} + except Exception: + example_dict = {} + tables.append({'table': table_name, 'columns': columns, 'example': example_dict}) + conn.close() + return tables + except Exception as e: + conn.close() + return f"Erro ao buscar schema: {e}" + def get_filtered_data(cunit_id=None, date_billling_begin=None, date_billing_end=None, limit=2): conn = connect_db() if not conn: @@ -135,7 +162,7 @@ def chat_with_gpt(prompt, attempts=3): for i in range(attempts): try: response = client.chat.completions.create( - model="gpt-3.5-turbo-0125", + model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}], max_tokens=300 ) @@ -478,6 +505,47 @@ def get_total_kwh_by_building_type(building_type=None): finally: conn.close() +column_mapping = { + "id": "Id", + "tipo de energia": "EnergyTypesId", + "unidade de consumo": "CUnitId", + "tipo de fatura": "CUnitBillsInvoiceTypeId", + "tipo de documento": "DocumentTypeId", + "contrato eletricidade": "CUnitContractElectId", + "número": "Number", + "data de receção da fatura": "DateBillReceipt", + "data": "Date", + "data inicial": "DateBilllingBegin", + "data final": "DateBillingEnd", + "prazo de pagamento": "PaymentDeadline", + "total": "Total", + "mb entidade": "MBEnt", + "mb referência": "MBRef", + "saldo anterior": "PreviousBalance", + "saldo anterior dc": "PreviousBalanceDC", + "pagamentos efetuados": "PaymentsMade", + "saldo de pagamentos efetuados": "PaymentsMadeBalance", + "saldo de pagamentos efetuados dc": "PaymentsMadeBalanceDC", + "faturado": "Billed", + "saldo faturado": "BilledBalance", + "saldo faturado dc": "BilledBalanceDC", + "saldo atual": "CurrentBalance", + "saldo atual dc": "CurrentBalanceDC", + "fator de potência": "PowerFactor", + "potência tomada": "PotTomada", + "total sem iva normal": "TotalExcludingNormalVAT", + "total sem iva reduzido": "TotalExcludingReducedVAT", + "total iva normal": "TotalNormalVAT", + "total iva reduzido": "TotalReducedVAT", + "emissão co2": "CO2Emission", + "consumo médio período faturação": "AvgConsBillingPeriod", + "consumo médio últimos 12m": "AvgConsLast12M", + "informação adicional": "AddicionalInfo", + "data de revisão": "RevisionDate", + "número normalizado": "NormalizedNumber", + "data de pagamento": "PaymentDate" +} + if __name__ == "__main__": conn = connect_db() @@ -507,6 +575,37 @@ if __name__ == "__main__": if user_input.lower() in ["quit", "exit", "bye"]: break + if any(word in user_input.lower() for word in ["tabela", "coluna", "campos", "estrutura", "schema"]): + schema_info = get_schema_with_examples() + if isinstance(schema_info, str): + print(f"Chatbot: {schema_info}") + else: + if re.search(r"tabelas|todas as tabelas", user_input.lower()): + nomes = [t['table'] for t in schema_info] + print(f"Chatbot: As tabelas disponíveis são: {', '.join(nomes)}") + else: + for t in schema_info: + if t['table'].lower() in user_input.lower(): + colunas = ", ".join([ + next((k for k, v in column_mapping.items() if v == c['name']), c['name']) + for c in t['columns'] + ]) + print(f"Chatbot: A tabela '{t['table']}' tem as colunas: {colunas}.") + if t['example']: + exemplo_traduzido = { + next((k for k, v in column_mapping.items() if v == k or v == k or v == col), col): val + for col, val in t['example'].items() + } + exemplo_traduzido = { + next((k for k, v in column_mapping.items() if v == col), col): val + for col, val in t['example'].items() + } + print(f"Exemplo de linha: {exemplo_traduzido}") + break + else: + print("Chatbot: Não encontrei essa tabela. Pergunte por outra ou peça 'tabelas' para ver todas.") + continue + cunit_id, date_billling_begin, date_billing_end, total_requested = parse_user_input(user_input) if total_requested and cunit_id: diff --git a/ChatBot-Python/server.py b/ChatBot-Python/src/server.py similarity index 58% rename from ChatBot-Python/server.py rename to ChatBot-Python/src/server.py index 6d71de7..46669b9 100644 --- a/ChatBot-Python/server.py +++ b/ChatBot-Python/src/server.py @@ -1,13 +1,16 @@ from flask import Flask, request, jsonify from flask_cors import CORS +import json +import os from main import ( chat_with_gpt, parse_user_input, get_total_by_cunit, get_filtered_data, get_price_comparison, compare_current_vs_previous_year, get_top_consumers, - compare_kwh_current_vs_previous_year, get_invoices_by_month_year, + compare_kwh_current_vs_previous_year, get_invoices_by_month_year,get_schema_with_examples, get_invoices_from_inactive_units, get_total_kwh_by_building_type, get_data ) import re from datetime import datetime +import pyodbc app = Flask(__name__) CORS(app) @@ -17,10 +20,103 @@ month_map = { "julho": 7, "agosto": 8, "setembro": 9, "outubro": 10, "novembro": 11, "dezembro": 12 } +column_mapping = { + "id": "Id", + "tipo de energia": "EnergyTypesId", + "unidade de consumo": "CUnitId", + "tipo de fatura": "CUnitBillsInvoiceTypeId", + "tipo de documento": "DocumentTypeId", + "contrato eletricidade": "CUnitContractElectId", + "número": "Number", + "data de receção da fatura": "DateBillReceipt", + "data": "Date", + "data inicial": "DateBilllingBegin", + "data final": "DateBillingEnd", + "prazo de pagamento": "PaymentDeadline", + "total": "Total", + "mb entidade": "MBEnt", + "mb referência": "MBRef", + "saldo anterior": "PreviousBalance", + "saldo anterior dc": "PreviousBalanceDC", + "pagamentos efetuados": "PaymentsMade", + "saldo de pagamentos efetuados": "PaymentsMadeBalance", + "saldo de pagamentos efetuados dc": "PaymentsMadeBalanceDC", + "faturado": "Billed", + "saldo faturado": "BilledBalance", + "saldo faturado dc": "BilledBalanceDC", + "saldo atual": "CurrentBalance", + "saldo atual dc": "CurrentBalanceDC", + "fator de potência": "PowerFactor", + "potência tomada": "PotTomada", + "total sem iva normal": "TotalExcludingNormalVAT", + "total sem iva reduzido": "TotalExcludingReducedVAT", + "total iva normal": "TotalNormalVAT", + "total iva reduzido": "TotalReducedVAT", + "emissão co2": "CO2Emission", + "consumo médio período faturação": "AvgConsBillingPeriod", + "consumo médio últimos 12m": "AvgConsLast12M", + "informação adicional": "AddicionalInfo", + "data de revisão": "RevisionDate", + "número normalizado": "NormalizedNumber", + "data de pagamento": "PaymentDate" +} + +def get_db_schema(): + conn = pyodbc.connect('DRIVER={SQL Server};SERVER=SEU_SERVIDOR;DATABASE=SEU_BANCO;UID=USUARIO;PWD=SENHA') + cursor = conn.cursor() + tables = [] + for row in cursor.tables(tableType='TABLE'): + table_name = row.table_name + columns = [] + for col in cursor.columns(table=table_name): + columns.append({'name': col.column_name, 'type': col.type_name}) + tables.append({'table': table_name, 'columns': columns}) + conn.close() + return tables + +def save_schema_to_file(schema, mapping, folder='schema'): + os.makedirs(folder, exist_ok=True) + with open(os.path.join(folder, 'schema.json'), 'w', encoding='utf-8') as f: + json.dump({'schema': schema, 'mapping': mapping}, f, ensure_ascii=False, indent=2) + +def answer_schema_question(question): + schema = get_schema_with_examples() + question = question.lower() + if "tabelas" in question: + return "As tabelas disponíveis são: " + ", ".join([t['table'] for t in schema]) + for t in schema: + if t['table'].lower() in question: + cols = ", ".join([c['name'] for c in t['columns']]) + return f"A tabela {t['table']} tem as colunas: {cols}.\nExemplo: {t['example']}" + return "Não consegui encontrar informação sobre essa tabela ou coluna." + +@app.route('/api/schema-exemplo', methods=['GET']) +def schema_exemplo(): + schema_info = get_schema_with_examples() + return jsonify({'schema': schema_info}) + +@app.route('/api/schema', methods=['GET']) +def schema(): + schema_info = get_db_schema() + save_schema_to_file(schema_info, column_mapping) + return jsonify({'schema': schema_info, 'mapping': column_mapping}) + @app.route('/api/chat', methods=['POST']) def chat(): data = request.json - user_input = data.get('message', '') + user_input = data.get('message', '').lower() + + if "tabela" in user_input or "coluna" in user_input: + resposta = answer_schema_question(user_input) + return jsonify({'reply': resposta}) + + if "colunas" in user_input and ("fatura" in user_input or "cunitbills" in user_input): + schema_info = get_db_schema() + for table in schema_info: + if table['table'].lower() == 'cunitbills': + colunas = [col['name'] for col in table['columns']] + return jsonify({'reply': "As colunas da tabela de faturas são:\n" + ", ".join(colunas)}) + return jsonify({'reply': "Tabela de faturas não encontrada no banco de dados."}) cunit_id, date_billling_begin, date_billing_end, total_requested = parse_user_input(user_input) diff --git a/ChatBot-Python/src/utils/helpers.py b/ChatBot-Python/src/utils/helpers.py new file mode 100644 index 0000000..33a6b93 --- /dev/null +++ b/ChatBot-Python/src/utils/helpers.py @@ -0,0 +1,13 @@ +def format_response(data): + formatted = "" + for table in data: + formatted += f"**Table:** {table['name']}\n" + formatted += "Columns:\n" + for column in table['columns']: + formatted += f"- {column['name']} ({column['type']})\n" + formatted += "\n" + return formatted + +def parse_user_input(user_input): + # This function can be expanded to include more complex parsing logic + return user_input.strip() \ No newline at end of file