from django.http import HttpResponse
import os
import pdfplumber
import requests
import time
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
from langchain_groq import ChatGroq
from langchain_groq.chat_models import SystemMessage, HumanMessage
from nltk import download as nltk_download, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from docx import Document
import openpyxl

API_KEY = os.environ.get('GROQ_API_KEY')

nltk_download('punkt')
nltk_download('wordnet')
nltk_download('stopwords')

def clean_text(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('french'))
    cleaned_tokens = [WordNetLemmatizer().lemmatize(token) for token in tokens if token.isalpha() and not token in stop_words]
    return ' '.join(cleaned_tokens)

def analyze_data(request):
    if request.method == 'POST':
        prompt_user = request.POST.get('prompt')
        file = request.FILES.get('file')
        url = request.POST.get('url')

        if not prompt_user:
            return HttpResponse('<p>Erreur : Aucun prompt fourni.</p>')

        headers = {'Authorization': f'Bearer {API_KEY}'}
        data = ''

        if file:
            file_extension = file.name.split('.')[-1].lower()
            if file_extension == 'pdf':
                file_path = default_storage.save(file.name, ContentFile(file.read()))
                try:
                    with pdfplumber.open(default_storage.path(file_path)) as pdf:
                        data = '\n'.join(page.extract_text() for page in pdf.pages if page.extract_text())
                    data = clean_text(data)
                finally:
                    default_storage.delete(file_path)
            elif file_extension == 'txt':
                data = file.read().decode("utf-8")
                data = clean_text(data)
            elif file_extension in ['doc', 'docx']:  # Traitement pour les fichiers Word
                doc = Document(file)
                for paragraph in doc.paragraphs:
                    data += paragraph.text
                data = clean_text(data)
            elif file_extension in ['xls', 'xlsx']:  # Traitement pour les fichiers Excel
                wb = openpyxl.load_workbook(file)
                for sheet_name in wb.sheetnames:
                    sheet = wb[sheet_name]
                    for row in sheet.iter_rows(values_only=True):
                        for cell in row:
                            if cell:
                                data += str(cell) + ' '
                data = clean_text(data)
            else:
                return HttpResponse('<p>Erreur : Format de fichier non pris en charge.</p>')
        elif url:
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                data = response.text
            else:
                return HttpResponse(f'<p>Erreur lors de l\'accès à l\'URL fournie : {response.status_code}</p>')


        if data:
            data = clean_text(data.replace('\n', ' ').replace('\r', ''))

        # Incorporate both the user's prompt and any data from file or URL
        full_text = f"{prompt_user} {data}".strip()

        chat = ChatGroq(groq_api_key=API_KEY, model_name="llama3-8b-8192")
        system_message = SystemMessage(content="Tu es un assistant bienveillant, tu es là pour aider des personnes. Tu réponds toujours en français et utilises le contexte de la conversation puis tu réponds avec des titres, sous titres, liste...")
        human_message = HumanMessage(content=full_text)

        result = chat.invoke([system_message, human_message])

        return HttpResponse(f'<p>Prompt de l\'utilisateur : {prompt_user}</p><p>Résultat de l\'analyse : {result.content}</p>')
    return HttpResponse('<p>Erreur : Requête invalide.</p>')
