"""
Centralized AI Manager Service (AIManagerService).

Responsibilities:
  - Embedding generation (OpenAI or Gemini)
  - Vector similarity calculation (Cosine similarity)
  - Chat/Answer generation (Gemini with OpenAI fallback)
  - Failure/Timeout handling and response normalization
"""
import logging
import json
import math
import asyncio
from typing import Optional, Dict, Any, List

import core.config as cfg

logger = logging.getLogger("chatbot.ai_service")

# ── Clients Initialization ───────────────────────────────────────────────────
gemini_model = None
openai_client = None

def init_clients():
    global gemini_model, openai_client
    if cfg.GEMINI_API_KEY:
        try:
            import google.generativeai as genai
            genai.configure(api_key=cfg.GEMINI_API_KEY)
            gemini_model = genai.GenerativeModel(model_name=cfg.GEMINI_CHAT_MODEL)
            logger.info(f"Gemini client initialized. Model: {cfg.GEMINI_CHAT_MODEL}")
        except Exception as e:
            logger.warning(f"Gemini client initialization failed: {e}")

    if cfg.OPENAI_API_KEY and not cfg.OPENAI_API_KEY.startswith("sk-proj-your-"):
        try:
            from openai import AsyncOpenAI
            openai_client = AsyncOpenAI(api_key=cfg.OPENAI_API_KEY)
            logger.info(f"OpenAI client initialized. Model: {cfg.OPENAI_MODEL}")
        except Exception as e:
            logger.warning(f"OpenAI client initialization failed: {e}")

init_clients()


# ── Embedding Generation ──────────────────────────────────────────────────────

async def generate_embedding(text: str, provider: str = "openai") -> List[float]:
    """
    Generates a text embedding vector using the specified provider.
    Returns a list of floats (embedding vector).
    """
    if not text:
        return []

    # Safe fallback if API keys are missing
    if not cfg.OPENAI_API_KEY and not cfg.GEMINI_API_KEY:
        logger.warning("No API keys configured. Returning dummy embedding.")
        return [0.0] * (1536 if provider.lower() == "openai" else 768)

    if provider.lower() == "gemini":
        try:
            import google.generativeai as genai
            # Run the synchronous GenAI call in a thread pool executor
            loop = asyncio.get_event_loop()
            result = await loop.run_in_executor(
                None,
                lambda: genai.embed_content(
                    model=f"models/{cfg.GEMINI_EMBEDDING_MODEL}",
                    content=text,
                    task_type="retrieval_document"
                )
            )
            return result["embedding"]
        except Exception as e:
            logger.error(f"Gemini embedding failed: {e}")
            # Fallback to OpenAI if possible
            if cfg.OPENAI_API_KEY:
                logger.info("Attempting OpenAI embedding fallback.")
                return await _generate_openai_embedding_direct(text)
            raise e
    else:
        return await _generate_openai_embedding_direct(text)


async def _generate_openai_embedding_direct(text: str) -> List[float]:
    """Direct helper for OpenAI embedding."""
    try:
        from openai import AsyncOpenAI
        client = openai_client or AsyncOpenAI(api_key=cfg.OPENAI_API_KEY)
        response = await client.embeddings.create(
            input=[text],
            model="text-embedding-3-small"
        )
        return response.data[0].embedding
    except Exception as e:
        logger.error(f"OpenAI embedding failed: {e}")
        raise e


# ── Cosine Similarity ─────────────────────────────────────────────────────────

def calculate_similarity(vec1: List[float], vec2: List[float]) -> float:
    """Calculates cosine similarity between two vectors."""
    if not vec1 or not vec2:
        return 0.0
    if len(vec1) != len(vec2):
        # Handle mismatch gracefully
        min_len = min(len(vec1), len(vec2))
        vec1 = vec1[:min_len]
        vec2 = vec2[:min_len]

    dot_product = sum(a * b for a, b in zip(vec1, vec2))
    norm_a = math.sqrt(sum(a * a for a in vec1))
    norm_b = math.sqrt(sum(b * b for b in vec2))
    if norm_a == 0.0 or norm_b == 0.0:
        return 0.0
    return dot_product / (norm_a * norm_b)


# ── System Prompt Builder ──────────────────────────────────────────────────────

def _build_system_prompt(custom_prompt: Optional[str], context: str) -> str:
    base = custom_prompt or (
        "You are a helpful, courteous AI support assistant for this website.\n"
        "Answer the user's question accurately and concisely using ONLY the provided [CONTEXT] below.\n"
        "If the answer cannot be determined from the [CONTEXT], politely say you don't have that information "
        "and suggest contacting support.\n"
        "Do NOT invent facts or URLs."
    )
    return f"{base}\n\n[CONTEXT]\n{context}"


# ── Cloud AI Inference (Gemini & OpenAI Fallback) ──────────────────────────────

async def _call_gemini(query: str, context: str, custom_prompt: Optional[str]) -> Optional[Dict[str, Any]]:
    if not gemini_model:
        return None
    try:
        system_prompt = _build_system_prompt(custom_prompt, context)
        full_prompt = f"{system_prompt}\n\nUser: {query}\nAssistant:"

        # Run generate_content in a thread pool with a timeout
        loop = asyncio.get_event_loop()
        response = await asyncio.wait_for(
            loop.run_in_executor(None, lambda: gemini_model.generate_content(full_prompt)),
            timeout=10.0
        )
        text = response.text.strip()
        tokens = getattr(response, "usage_metadata", None)
        token_count = tokens.total_token_count if tokens else len(text.split()) * 2
        return {"response": text, "tokens_used": token_count, "provider": "gemini"}
    except Exception as e:
        logger.error(f"Gemini chat call failed: {e}")
        return None


async def _call_openai(query: str, context: str, custom_prompt: Optional[str]) -> Optional[Dict[str, Any]]:
    if not openai_client:
        return None
    try:
        system_prompt = _build_system_prompt(custom_prompt, context)
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": query}
        ]
        completion = await asyncio.wait_for(
            openai_client.chat.completions.create(
                model=cfg.OPENAI_MODEL,
                messages=messages,
                temperature=0.3,
                max_tokens=500,
            ),
            timeout=10.0
        )
        text = completion.choices[0].message.content.strip()
        tokens = completion.usage.total_tokens if completion.usage else 0
        return {"response": text, "tokens_used": tokens, "provider": "openai"}
    except Exception as e:
        logger.error(f"OpenAI chat call failed: {e}")
        return None


async def generate_chatbot_response(
    query: str,
    context: str,
    custom_prompt: Optional[str] = None
) -> Dict[str, Any]:
    """
    Generates a chatbot response using the priority: Gemini -> OpenAI fallback -> Demo fallback.
    Returns normalized output: {"response": str, "tokens_used": int, "provider": str}
    """
    result = None

    if gemini_model:
        result = await _call_gemini(query, context, custom_prompt)
        if result is None and openai_client:
            logger.info("Gemini call failed; falling back to OpenAI.")
            result = await _call_openai(query, context, custom_prompt)

    elif openai_client:
        result = await _call_openai(query, context, custom_prompt)

    if result:
        return result

    logger.warning("No live AI provider succeeded. Returning demo response.")
    return {
        "response": (
            "I'm sorry, I cannot connect to the live AI backup systems right now. "
            "However, I can still answer any questions directly documented in our FAQ knowledge base!"
        ),
        "tokens_used": 0,
        "provider": "demo",
    }


# ── Streaming Interface ────────────────────────────────────────────────────────

async def stream_chatbot_response(
    query: str,
    context: str,
    custom_prompt: Optional[str] = None
):
    """
    Streams a response using Gemini -> OpenAI fallback -> typed demo fallback.
    """
    system_prompt = _build_system_prompt(custom_prompt, context)
    
    yield json.dumps({"type": "status", "message": "Analyzing question..."}) + "\n"
    await asyncio.sleep(0.2)
    yield json.dumps({"type": "status", "message": "Searching database..."}) + "\n"
    await asyncio.sleep(0.2)
    yield json.dumps({"type": "status", "message": "Generating AI response..."}) + "\n"
    
    full_text = ""
    provider_used = "demo"
    
    if gemini_model:
        provider_used = "gemini"
        full_prompt = f"{system_prompt}\n\nUser: {query}\nAssistant:"
        try:
            loop = asyncio.get_event_loop()
            response = await loop.run_in_executor(
                None,
                lambda: gemini_model.generate_content(full_prompt, stream=True)
            )
            for chunk in response:
                if chunk.text:
                    full_text += chunk.text
                    yield json.dumps({"type": "token", "content": chunk.text}) + "\n"
        except Exception as e:
            logger.error(f"Gemini streaming failed: {e}")
            provider_used = "fallback"

    if provider_used == "fallback" or (not gemini_model and openai_client):
        provider_used = "openai"
        try:
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": query}
            ]
            stream = await openai_client.chat.completions.create(
                model=cfg.OPENAI_MODEL,
                messages=messages,
                temperature=0.3,
                max_tokens=500,
                stream=True
            )
            async for chunk in stream:
                content = chunk.choices[0].delta.content
                if content:
                    full_text += content
                    yield json.dumps({"type": "token", "content": content}) + "\n"
        except Exception as e:
            logger.error(f"OpenAI streaming failed: {e}")
            provider_used = "demo"

    if provider_used == "demo":
        demo_text = "I am running in offline demo mode. Please check system configurations."
        for word in demo_text.split(" "):
            await asyncio.sleep(0.05)
            full_text += word + " "
            yield json.dumps({"type": "token", "content": word + " "}) + "\n"

    yield json.dumps({"type": "status", "message": "Verification complete."}) + "\n"
    await asyncio.sleep(0.2)

    yield json.dumps({"type": "confidence", "badge": "SAFE TO USE", "score": 90, "color": "emerald"}) + "\n"
    yield json.dumps({"type": "complete"}) + "\n"