from __future__ import annotations

import json

from ..typing import AsyncResult, Messages
from ..providers.response import Reasoning
from ..requests import StreamSession
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin


class GradientNetwork(AsyncGeneratorProvider, ProviderModelMixin):
    """
    Provider for chat.gradient.network
    Supports streaming text generation with Qwen and GPT OSS models.
    """
    label = "Gradient Network"
    url = "https://chat.gradient.network"
    api_endpoint = "https://chat.gradient.network/api/generate"

    working = True
    needs_auth = False
    supports_stream = True
    supports_system_message = True
    supports_message_history = True

    default_model = "Qwen3 235B"
    models = [
        default_model,
        "GPT OSS 120B",
    ]
    model_aliases = {
        "qwen-3-235b": "Qwen3 235B",
        "qwen3-235b": "Qwen3 235B",
        "gpt-oss-120b": "GPT OSS 120B",
    }

    @classmethod
    async def create_async_generator(
        cls,
        model: str,
        messages: Messages,
        proxy: str = None,
        temperature: float = None,
        max_tokens: int = None,
        enable_thinking: bool = False,
        **kwargs
    ) -> AsyncResult:
        """
        Create an async generator for streaming chat responses.

        Args:
            model: The model name to use
            messages: List of message dictionaries
            proxy: Optional proxy URL
            temperature: Optional temperature parameter
            max_tokens: Optional max tokens parameter
            enable_thinking: Enable the thinking/analysis channel (maps to enableThinking in API)
            **kwargs: Additional arguments

        Yields:
            str: Content chunks from the response
            Reasoning: Reasoning content when enable_thinking is True
        """
        model = cls.get_model(model)

        headers = {
            "Accept": "application/x-ndjson",
            "Content-Type": "application/json",
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
            "Origin": cls.url,
            "Referer": f"{cls.url}/",
        }

        payload = {
            "model": model,
            "messages": messages,
        }

        if temperature is not None:
            payload["temperature"] = temperature
        if max_tokens is not None:
            payload["max_tokens"] = max_tokens
        if enable_thinking:
            payload["enableThinking"] = enable_thinking

        async with StreamSession(headers=headers, proxy=proxy) as session:
            async with session.post(
                cls.api_endpoint,
                json=payload,
            ) as response:
                response.raise_for_status()

                async for line in response.iter_lines():
                    if not line:
                        continue

                    try:
                        data = json.loads(line)
                        msg_type = data.get("type")

                        if msg_type == "reply":
                            # Response chunks with content or reasoningContent
                            reply_data = data.get("data", {})
                            content = reply_data.get("content")
                            reasoning_content = reply_data.get("reasoningContent")

                            if reasoning_content:
                                yield Reasoning(reasoning_content)
                            if content:
                                yield content

                        # Skip clusterInfo and blockUpdate GPU visualization messages

                    except json.JSONDecodeError:
                        # Skip non-JSON lines (may be partial data or empty)
                        continue