From 7771cf3d4350fb0a2179b566dba4e6bc7f965a24 Mon Sep 17 00:00:00 2001 From: Ammar Date: Wed, 26 Nov 2025 15:02:51 +0200 Subject: [PATCH] Improve Yupp provider account handling , request timeout and get byte from url (#3249) * Add image caching to Yupp provider Introduces an image cache to avoid redundant uploads in the Yupp provider. Refactors media attachment handling into a new prepare_files method, improving efficiency and code organization. Updates .gitignore to exclude .idea directory. * Refactor Yupp stream handling and chunk processing Improves stream segmentation in the Yupp provider by introducing buffers for target, variant, quick, thinking, and extra streams. Refactors chunk processing to better handle image-gen, quick responses, and variant outputs, and adds more robust stream ID extraction and routing logic. Yields a consolidated JsonResponse with all stream segments for downstream use. * Handle ClientResponseError in Yupp provider Adds specific handling for aiohttp ClientResponseError in the Yupp provider. Marks account as invalid on 500 Internal Server Error, otherwise increments error count and raises ProviderException for other errors. * Update Yupp.py fix 429 'Too Many Requests' * Update Yupp.py * Improve Yupp provider account handling and request timeout Refactored account loading to preserve account history and error counts when updating tokens. Enhanced request logic to support custom timeouts using aiohttp's ClientTimeout, allowing for more flexible timeout configuration. * Update __init__.py * Handle multi-line and blocks in Yupp Added logic to capture and process multi-line and blocks referenced by special IDs. Introduced block storage and extraction functions, enabling reasoning and image-gen content to be handled via references in the response stream. * Update LMArena.py Not Found Model error * Refactor to use StreamSession in Qwen and Yupp providers Replaced aiohttp.ClientSession with StreamSession in Qwen.py and Yupp.py for improved session handling. Updated exception and timeout references in Yupp.py to use aiohttp types. Improved default argument handling in StreamSession initialization. * Update Yupp.py * Add status parameter to get_generated_image method Introduces a 'status' parameter to the get_generated_image method to allow passing image generation status. Updates method calls and response objects to include status in their metadata for improved tracking of image generation progress. * Update OpenaiChat.py * Refactor Qwen image upload and caching logic and token Reworked the image upload flow in Qwen provider to use direct file uploads with OSS headers, added caching for uploaded images, and improved file type detection. Updated prepare_files to handle uploads via session and cache results, and added utility for generating OSS headers. Minor imports and typing adjustments included and token support. * Refactor Qwen and Yupp providers for improved async handling Updated Qwen provider to handle timeout via kwargs and improved type annotations. Refactored Yupp provider for better code organization, formatting, and async account rotation logic. Enhanced readability and maintainability by reordering imports, adding whitespace, and clarifying function implementations. * Add image caching to OpenaiChat provider Introduces an image cache mechanism to OpenaiChat for uploaded images, reducing redundant uploads and improving efficiency. Also refactors code for clarity, updates type hints, and makes minor formatting improvements throughout the file. --- g4f/Provider/Qwen.py | 220 +++++++++++--- g4f/Provider/Yupp.py | 264 ++++++++++++----- g4f/Provider/needs_auth/LMArena.py | 2 +- g4f/Provider/needs_auth/OpenaiChat.py | 405 +++++++++++++++----------- g4f/image/__init__.py | 9 + g4f/requests/aiohttp.py | 12 +- 6 files changed, 623 insertions(+), 289 deletions(-) diff --git a/g4f/Provider/Qwen.py b/g4f/Provider/Qwen.py index 67514332..049504e1 100644 --- a/g4f/Provider/Qwen.py +++ b/g4f/Provider/Qwen.py @@ -1,22 +1,29 @@ from __future__ import annotations import asyncio +import datetime +import hashlib +import hmac import json -import mimetypes import re import uuid from time import time -from typing import Literal, Optional +from typing import Literal, Optional, Dict +from urllib.parse import quote import aiohttp -from ..errors import RateLimitError, ResponseError -from ..typing import AsyncResult, Messages, MediaListType -from ..providers.response import JsonConversation, Reasoning, Usage, ImageResponse, FinishReason -from ..requests import sse_stream -from ..tools.media import merge_media + +from g4f.image import to_bytes, detect_file_type +from g4f.requests import raise_for_status from .base_provider import AsyncGeneratorProvider, ProviderModelMixin from .helper import get_last_user_message from .. import debug +from ..errors import RateLimitError, ResponseError +from ..providers.response import JsonConversation, Reasoning, Usage, ImageResponse, FinishReason +from ..requests import sse_stream +from ..requests.aiohttp import StreamSession +from ..tools.media import merge_media +from ..typing import AsyncResult, Messages, MediaListType try: import curl_cffi @@ -25,6 +32,56 @@ try: except ImportError: has_curl_cffi = False +# Global variables to manage Qwen Image Cache +ImagesCache: Dict[str, dict] = {} + + +def get_oss_headers(method: str, date_str: str, sts_data: dict, content_type: str) -> dict[str, str]: + bucket_name = sts_data.get('bucketname', 'qwen-webui-prod') + file_path = sts_data.get('file_path', '') + access_key_id = sts_data.get('access_key_id') + access_key_secret = sts_data.get('access_key_secret') + security_token = sts_data.get('security_token') + headers = { + 'Content-Type': content_type, + 'x-oss-content-sha256': 'UNSIGNED-PAYLOAD', + 'x-oss-date': date_str, + 'x-oss-security-token': security_token, + 'x-oss-user-agent': 'aliyun-sdk-js/6.23.0 Chrome 132.0.0.0 on Windows 10 64-bit' + } + headers_lower = {k.lower(): v for k, v in headers.items()} + + canonical_headers_list = [] + signed_headers_list = [] + required_headers = ['content-md5', 'content-type', 'x-oss-content-sha256', 'x-oss-date', 'x-oss-security-token', + 'x-oss-user-agent'] + for header_name in sorted(required_headers): + if header_name in headers_lower: + canonical_headers_list.append(f"{header_name}:{headers_lower[header_name]}") + signed_headers_list.append(header_name) + + canonical_headers = '\n'.join(canonical_headers_list) + '\n' + canonical_uri = f"/{bucket_name}/{quote(file_path, safe='/')}" + + canonical_request = f"{method}\n{canonical_uri}\n\n{canonical_headers}\n\nUNSIGNED-PAYLOAD" + + date_parts = date_str.split('T') + date_scope = f"{date_parts[0]}/ap-southeast-1/oss/aliyun_v4_request" + string_to_sign = f"OSS4-HMAC-SHA256\n{date_str}\n{date_scope}\n{hashlib.sha256(canonical_request.encode()).hexdigest()}" + + def sign(key, msg): + return hmac.new(key, msg.encode() if isinstance(msg, str) else msg, hashlib.sha256).digest() + + date_key = sign(f"aliyun_v4{access_key_secret}".encode(), date_parts[0]) + region_key = sign(date_key, "ap-southeast-1") + service_key = sign(region_key, "oss") + signing_key = sign(service_key, "aliyun_v4_request") + signature = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest() + + headers['authorization'] = f"OSS4-HMAC-SHA256 Credential={access_key_id}/{date_scope},Signature={signature}" + return headers + + text_models = [ 'qwen3-max-preview', 'qwen-plus-2025-09-11', 'qwen3-235b-a22b', 'qwen3-coder-plus', 'qwen3-30b-a3b', 'qwen3-coder-30b-a3b-instruct', 'qwen-max-latest', 'qwen-plus-2025-01-25', 'qwq-32b', 'qwen-turbo-2025-02-11', @@ -60,19 +117,19 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): active_by_default = True supports_stream = True supports_message_history = False - + image_cache = True _models_loaded = True image_models = image_models text_models = text_models vision_models = vision_models - models = models + models: list[str] = models default_model = "qwen3-235b-a22b" _midtoken: str = None _midtoken_uses: int = 0 @classmethod - def get_models(cls) -> list[str]: + def get_models(cls, **kwargs) -> list[str]: if not cls._models_loaded and has_curl_cffi: response = curl_cffi.get(f"{cls.url}/api/models") if response.ok: @@ -97,34 +154,106 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): return cls.models @classmethod - async def prepare_files(cls, media, chat_type="")->list: + async def prepare_files(cls, media, session: aiohttp.ClientSession, headers=None) -> list: + if headers is None: + headers = {} files = [] - for _file, file_name in media: - file_type, _ = mimetypes.guess_type(file_name) - file_class: Literal["default", "vision", "video", "audio", "document"] = "default" - _type: Literal["file", "image", "video", "audio"] = "file" - showType: Literal["file", "image", "video", "audio"] = "file" + for index, (_file, file_name) in enumerate(media): - if isinstance(_file, str) and _file.startswith('http'): - if chat_type == "image_edit" or (file_type and file_type.startswith("image")): - file_class = "vision" - _type = "image" - if not file_type: - # Try to infer from file extension, fallback to generic - ext = file_name.split('.')[-1].lower() if '.' in file_name else '' - file_type = mimetypes.types_map.get(f'.{ext}', 'application/octet-stream') - showType = "image" + data_bytes = to_bytes(_file) + # Check Cache + hasher = hashlib.md5() + hasher.update(data_bytes) + image_hash = hasher.hexdigest() + file = ImagesCache.get(image_hash) + if cls.image_cache and file: + debug.log("Using cached image") + files.append(file) + continue - files.append( - { - "type": _type, + extension, file_type = detect_file_type(data_bytes) + file_name = file_name or f"file-{len(data_bytes)}{extension}" + file_size = len(data_bytes) + + # Get File Url + async with session.post( + f'{cls.url}/api/v2/files/getstsToken', + json={"filename": file_name, + "filesize": file_size, "filetype": file_type}, + headers=headers + + ) as r: + await raise_for_status(r, "Create file failed") + res_data = await r.json() + data = res_data.get("data") + + if res_data["success"] is False: + raise RateLimitError(f"{data['code']}:{data['details']}") + file_url = data.get("file_url") + file_id = data.get("file_id") + + # Put File into Url + str_date = datetime.datetime.now(datetime.UTC).strftime('%Y%m%dT%H%M%SZ') + headers = get_oss_headers('PUT', str_date, data, file_type) + async with session.put( + file_url.split("?")[0], + data=data_bytes, + headers=headers + ) as response: + await raise_for_status(response) + + file_class: Literal["default", "vision", "video", "audio", "document"] + _type: Literal["file", "image", "video", "audio"] + show_type: Literal["file", "image", "video", "audio"] + if "image" in file_type: + _type = "image" + show_type = "image" + file_class = "vision" + elif "video" in file_type: + _type = "video" + show_type = "video" + file_class = "video" + elif "audio" in file_type: + _type = "audio" + show_type = "audio" + file_class = "audio" + else: + _type = "file" + show_type = "file" + file_class = "document" + + file = { + "type": _type, + "file": { + "created_at": int(time() * 1000), + "data": {}, + "filename": file_name, + "hash": None, + "id": file_id, + "meta": { "name": file_name, - "file_type": file_type, - "showType": showType, - "file_class": file_class, - "url": _file - } - ) + "size": file_size, + "content_type": file_type + }, + "update_at": int(time() * 1000), + }, + "id": file_id, + "url": file_url, + "name": file_name, + "collection_name": "", + "progress": 0, + "status": "uploaded", + "greenNet": "success", + "size": file_size, + "error": "", + "itemId": str(uuid.uuid4()), + "file_type": file_type, + "showType": show_type, + "file_class": file_class, + "uploadTaskId": str(uuid.uuid4()) + } + ImagesCache[image_hash] = file + files.append(file) return files @classmethod @@ -135,7 +264,6 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): media: MediaListType = None, conversation: JsonConversation = None, proxy: str = None, - timeout: int = 120, stream: bool = True, enable_thinking: bool = True, chat_type: Literal[ @@ -157,7 +285,7 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): """ model_name = cls.get_model(model) - + token = kwargs.get("token") headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', 'Accept': '*/*', @@ -169,13 +297,24 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'Connection': 'keep-alive', - 'Authorization': 'Bearer', + 'Authorization': f'Bearer {token}' if token else "Bearer", 'Source': 'web' } prompt = get_last_user_message(messages) - - async with aiohttp.ClientSession(headers=headers) as session: + _timeout = kwargs.get("timeout") + if isinstance(_timeout, aiohttp.ClientTimeout): + timeout = _timeout + else: + total = float(_timeout) if isinstance(_timeout, (int, float)) else 5 * 60 + timeout = aiohttp.ClientTimeout(total=total) + async with StreamSession(headers=headers) as session: + try: + async with session.get('https://chat.qwen.ai/api/v1/auths/', proxy=proxy) as user_info_res: + user_info_res.raise_for_status() + debug.log(await user_info_res.json()) + except: + ... for attempt in range(5): try: if not cls._midtoken: @@ -221,7 +360,8 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): files = [] media = list(merge_media(media, messages)) if media: - files = await cls.prepare_files(media, chat_type=chat_type) + files = await cls.prepare_files(media, session=session, + headers=req_headers) msg_payload = { "stream": stream, diff --git a/g4f/Provider/Yupp.py b/g4f/Provider/Yupp.py index d5aa4d9a..c4b1f203 100644 --- a/g4f/Provider/Yupp.py +++ b/g4f/Provider/Yupp.py @@ -1,25 +1,26 @@ +import asyncio import hashlib import json +import os +import re import time import uuid -import re -import os -import asyncio -import aiohttp -from aiohttp import ClientResponseError -from ..typing import AsyncResult, Messages, Optional, Dict, Any, List +import aiohttp + +from .helper import get_last_user_message +from .yupp.models import YuppModelManager +from ..cookies import get_cookies +from ..debug import log +from ..errors import RateLimitError, ProviderException, MissingAuthError +from ..image import is_accepted_format, to_bytes from ..providers.base_provider import AsyncGeneratorProvider, ProviderModelMixin from ..providers.response import Reasoning, PlainTextResponse, PreviewResponse, JsonConversation, ImageResponse, \ ProviderInfo, FinishReason, JsonResponse -from ..errors import RateLimitError, ProviderException, MissingAuthError -from ..cookies import get_cookies +from ..requests.aiohttp import StreamSession from ..tools.auth import AuthManager from ..tools.media import merge_media -from ..image import is_accepted_format, to_bytes -from .yupp.models import YuppModelManager -from .helper import get_last_user_message -from ..debug import log +from ..typing import AsyncResult, Messages, Optional, Dict, Any, List # Global variables to manage Yupp accounts YUPP_ACCOUNT = Dict[str, Any] @@ -27,22 +28,24 @@ YUPP_ACCOUNTS: List[YUPP_ACCOUNT] = [] account_rotation_lock = asyncio.Lock() # Global variables to manage Yupp Image Cache -ImagesCache:Dict[str, dict] = {} +ImagesCache: Dict[str, dict] = {} + class YuppAccount: """Yupp account representation""" + def __init__(self, token: str, is_valid: bool = True, error_count: int = 0, last_used: float = 0): self.token = token self.is_valid = is_valid self.error_count = error_count self.last_used = last_used + def load_yupp_accounts(tokens_str: str): """Load Yupp accounts from token string""" global YUPP_ACCOUNTS if not tokens_str: return - tokens = [token.strip() for token in tokens_str.split(',') if token.strip()] YUPP_ACCOUNTS = [ { @@ -54,6 +57,7 @@ def load_yupp_accounts(tokens_str: str): for token in tokens ] + def create_headers() -> Dict[str, str]: """Create headers for requests""" return { @@ -66,6 +70,7 @@ def create_headers() -> Dict[str, str]: "Sec-Fetch-Site": "same-origin", } + async def get_best_yupp_account() -> Optional[YUPP_ACCOUNT]: """Get the best available Yupp account using smart selection algorithm""" max_error_count = int(os.getenv("MAX_ERROR_COUNT", "3")) @@ -77,10 +82,10 @@ async def get_best_yupp_account() -> Optional[YUPP_ACCOUNT]: acc for acc in YUPP_ACCOUNTS if acc["is_valid"] - and ( - acc["error_count"] < max_error_count - or now - acc["last_used"] > error_cooldown - ) + and ( + acc["error_count"] < max_error_count + or now - acc["last_used"] > error_cooldown + ) ] if not valid_accounts: @@ -89,8 +94,8 @@ async def get_best_yupp_account() -> Optional[YUPP_ACCOUNT]: # Reset error count for accounts in cooldown for acc in valid_accounts: if ( - acc["error_count"] >= max_error_count - and now - acc["last_used"] > error_cooldown + acc["error_count"] >= max_error_count + and now - acc["last_used"] > error_cooldown ): acc["error_count"] = 0 @@ -100,6 +105,7 @@ async def get_best_yupp_account() -> Optional[YUPP_ACCOUNT]: account["last_used"] = now return account + async def claim_yupp_reward(session: aiohttp.ClientSession, account: YUPP_ACCOUNT, reward_id: str): """Claim Yupp reward asynchronously""" try: @@ -109,7 +115,7 @@ async def claim_yupp_reward(session: aiohttp.ClientSession, account: YUPP_ACCOUN headers = { "Content-Type": "application/json", "Cookie": f"__Secure-yupp.session-token={account['token']}", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", } async with session.post(url, json=payload, headers=headers) as response: @@ -122,6 +128,7 @@ async def claim_yupp_reward(session: aiohttp.ClientSession, account: YUPP_ACCOUN log_debug(f"Failed to claim reward {reward_id}. Error: {e}") return None + async def make_chat_private(session: aiohttp.ClientSession, account: YUPP_ACCOUNT, chat_id: str) -> bool: """Set a Yupp chat's sharing status to PRIVATE""" try: @@ -138,7 +145,7 @@ async def make_chat_private(session: aiohttp.ClientSession, account: YUPP_ACCOUN headers = { "Content-Type": "application/json", "Cookie": f"__Secure-yupp.session-token={account['token']}", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", } @@ -146,8 +153,8 @@ async def make_chat_private(session: aiohttp.ClientSession, account: YUPP_ACCOUN response.raise_for_status() data = await response.json() if ( - isinstance(data, list) and len(data) > 0 - and "json" in data[0].get("result", {}).get("data", {}) + isinstance(data, list) and len(data) > 0 + and "json" in data[0].get("result", {}).get("data", {}) ): log_debug(f"Chat {chat_id} is now PRIVATE ✅") return True @@ -159,6 +166,7 @@ async def make_chat_private(session: aiohttp.ClientSession, account: YUPP_ACCOUN log_debug(f"Failed to make chat {chat_id} private: {e}") return False + def log_debug(message: str): """Debug logging""" if os.getenv("DEBUG_MODE", "false").lower() == "true": @@ -166,11 +174,12 @@ def log_debug(message: str): else: log(f"[Yupp] {message}") + def format_messages_for_yupp(messages: Messages) -> str: """Format multi-turn conversation for Yupp single-turn format""" if not messages: return "" - + if len(messages) == 1 and isinstance(messages[0].get("content"), str): return messages[0].get("content", "").strip() @@ -202,6 +211,7 @@ def format_messages_for_yupp(messages: Messages) -> str: return result + class Yupp(AsyncGeneratorProvider, ProviderModelMixin): """ Yupp.ai Provider for g4f @@ -214,7 +224,7 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): active_by_default = True supports_stream = True image_cache = True - + @classmethod def get_models(cls, api_key: str = None, **kwargs) -> List[str]: if not cls.models: @@ -230,11 +240,12 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): cls.models_tags = {model.get("name"): manager.processor.generate_tags(model) for model in models} cls.models = [model.get("name") for model in models] cls.image_models = [model.get("name") for model in models if model.get("isImageGeneration")] - cls.vision_models = [model.get("name") for model in models if "image/*" in model.get("supportedAttachmentMimeTypes", [])] + cls.vision_models = [model.get("name") for model in models if + "image/*" in model.get("supportedAttachmentMimeTypes", [])] return cls.models @classmethod - async def prepare_files(cls, media, session:aiohttp.ClientSession, account:YUPP_ACCOUNT)->list: + async def prepare_files(cls, media, session: aiohttp.ClientSession, account: YUPP_ACCOUNT) -> list: files = [] if not media: return files @@ -291,11 +302,11 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): @classmethod async def create_async_generator( - cls, - model: str, - messages: Messages, - proxy: str = None, - **kwargs, + cls, + model: str, + messages: Messages, + proxy: str = None, + **kwargs, ) -> AsyncResult: """ Create async completion using Yupp.ai API with account rotation @@ -314,15 +325,16 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): conversation = kwargs.get("conversation") url_uuid = conversation.url_uuid if conversation else None is_new_conversation = url_uuid is None - + prompt = kwargs.get("prompt") if prompt is None: if is_new_conversation: prompt = format_messages_for_yupp(messages) else: prompt = get_last_user_message(messages, prompt) - - log_debug(f"Use url_uuid: {url_uuid}, Formatted prompt length: {len(prompt)}, Is new conversation: {is_new_conversation}") + + log_debug( + f"Use url_uuid: {url_uuid}, Formatted prompt length: {len(prompt)}, Is new conversation: {is_new_conversation}") # Try all accounts with rotation max_attempts = len(YUPP_ACCOUNTS) @@ -332,10 +344,9 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): raise ProviderException("No valid Yupp accounts available") try: - async with aiohttp.ClientSession() as session: + async with StreamSession() as session: turn_id = str(uuid.uuid4()) - # Handle media attachments media = kwargs.get("media") if media: @@ -390,16 +401,23 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): log_debug(f"Sending request to: {url}") log_debug(f"Payload structure: {type(payload)}, length: {len(str(payload))}") - + _timeout = kwargs.get("timeout") + if isinstance(_timeout, aiohttp.ClientTimeout): + timeout = _timeout + else: + total = float(_timeout) if isinstance(_timeout, (int, float)) else 5 * 60 + timeout = aiohttp.ClientTimeout(total=total) # Send request - async with session.post(url, json=payload, headers=headers, proxy=proxy) as response: + async with session.post(url, json=payload, headers=headers, proxy=proxy, + timeout=timeout) as response: response.raise_for_status() # Make chat private in background asyncio.create_task(make_chat_private(session, account, url_uuid)) # Process stream - async for chunk in cls._process_stream_response(response.content, account, session, prompt, model): + async for chunk in cls._process_stream_response(response.content, account, session, prompt, + model): yield chunk return @@ -417,7 +435,7 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): else: account["error_count"] += 1 continue - except ClientResponseError as e: + except aiohttp.ClientResponseError as e: log_debug(f"Account ...{account['token'][-4:]} failed: {str(e)}") # No Available Yupp credits if e.status == 500 and 'Internal Server Error' in e.message: @@ -439,12 +457,12 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): @classmethod async def _process_stream_response( - cls, - response_content, - account: YUPP_ACCOUNT, - session: aiohttp.ClientSession, - prompt: str, - model_id: str + cls, + response_content, + account: YUPP_ACCOUNT, + session: aiohttp.ClientSession, + prompt: str, + model_id: str ) -> AsyncResult: """Process Yupp stream response asynchronously""" @@ -461,15 +479,33 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): "target": [], "variant": [], "quick": [], - "thinking": [] , + "thinking": [], "extra": [] } # Holds leftStream / rightStream definitions to determine target/variant select_stream = [None, None] - + # State for capturing a multi-line + block (fa-style) + capturing_ref_id: Optional[str] = None + capturing_lines: List[bytes] = [] + + # Storage for special referenced blocks like $fa + think_blocks: Dict[str, str] = {} + image_blocks: Dict[str, str] = {} + def extract_ref_id(ref): """Extract ID from reference string, e.g., from '$@123' extract '123'""" return ref[2:] if ref and isinstance(ref, str) and ref.startswith("$@") else None + + def extract_ref_name(ref: str) -> Optional[str]: + """Extract simple ref name from '$fa' → 'fa'""" + if not isinstance(ref, str): + return None + if ref.startswith("$@"): + return ref[2:] + if ref.startswith("$") and len(ref) > 1: + return ref[1:] + return None + def is_valid_content(content: str) -> bool: """Check if content is valid""" if not content or content in [None, "", "$undefined"]: @@ -515,7 +551,27 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): if for_target: normal_content += content yield content - + + def finalize_capture_block(ref_id: str, lines: List[bytes]): + """Parse captured + block for a given ref ID.""" + text = b"".join(lines).decode("utf-8", errors="ignore") + + # Extract ... + think_start = text.find("") + think_end = text.find("") + if think_start != -1 and think_end != -1 and think_end > think_start: + inner = text[think_start + len(""):think_end].strip() + if inner: + think_blocks[ref_id] = inner + + # Extract ... + yapp_start = text.find('') + if yapp_start != -1: + yapp_end = text.find("", yapp_start) + if yapp_end != -1: + yapp_block = text[yapp_start:yapp_end + len("")] + image_blocks[ref_id] = yapp_block + try: line_count = 0 quick_response_id = None @@ -531,17 +587,55 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): right_message_id = None nudge_new_chat_id = None nudge_new_chat = False - async for line in response_content: line_count += 1 - + # If we are currently capturing a think/image block for some ref ID + if capturing_ref_id is not None: + capturing_lines.append(line) + + # Check if this line closes the block; after that, block is complete + if b"" in line: # or b':{"curr"' in line: + # We may have trailing "2:{...}" after on the same line + # Get id using re + idx = line.find(b"") + suffix = line[idx + len(b""):] + + # Finalize captured block for this ref ID + finalize_capture_block(capturing_ref_id, capturing_lines) + capturing_ref_id = None + capturing_lines = [] + + # If there is trailing content (e.g. '2:{"curr":"$fa"...}') + if suffix.strip(): + # Process suffix as a new "line" in the same iteration + line = suffix + else: + # Nothing more on this line + continue + else: + # Still inside captured block; skip normal processing + continue + + # Detect start of a block assigned to a ref like 'fa:...' + if b"" in line: + m = line_pattern.match(line) + if m: + capturing_ref_id = m.group(1).decode() + capturing_lines = [line] + # Skip normal parsing; the rest of the block will be captured until + continue + match = line_pattern.match(line) if not match: continue chunk_id, chunk_data = match.groups() chunk_id = chunk_id.decode() - + + if nudge_new_chat_id and chunk_id == nudge_new_chat_id: + nudge_new_chat = chunk_data.decode() + continue + try: data = json.loads(chunk_data) if chunk_data != b"{}" else {} except json.JSONDecodeError: @@ -550,7 +644,7 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): if chunk_id == reward_id and isinstance(data, dict) and "unclaimedRewardInfo" in data: reward_info = data log_debug(f"Found reward info") - + # Process initial setup elif chunk_id == "1": yield PlainTextResponse(line.decode(errors="ignore")) @@ -558,7 +652,8 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): left_stream = data.get("leftStream", {}) right_stream = data.get("rightStream", {}) if data.get("quickResponse", {}) != "$undefined": - quick_response_id = extract_ref_id(data.get("quickResponse", {}).get("stream", {}).get("next")) + quick_response_id = extract_ref_id( + data.get("quickResponse", {}).get("stream", {}).get("next")) if data.get("turnId", {}) != "$undefined": turn_id = extract_ref_id(data.get("turnId", {}).get("next")) @@ -592,7 +687,7 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): provider_info["variantUrl"] = selection.get("externalUrl") log_debug(f"Found variant stream ID: {variant_stream_id}") yield ProviderInfo.from_dict(provider_info) - + # Process target stream content elif target_stream_id and chunk_id == target_stream_id: yield PlainTextResponse(line.decode(errors="ignore")) @@ -600,15 +695,41 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): target_stream_id = extract_ref_id(data.get("next")) content = data.get("curr", "") if content: - async for chunk in process_content_chunk( - content, - chunk_id, - line_count, - for_target=True - ): - stream["target"].append(chunk) - is_started = True - yield chunk + # Handle special "$fa" / "$" reference + ref_name = extract_ref_name(content) + if ref_name and (ref_name in think_blocks or ref_name in image_blocks): + # Thinking block + if ref_name in think_blocks: + t_text = think_blocks[ref_name] + if t_text: + reasoning = Reasoning(t_text) + # thinking_content += t_text + stream["thinking"].append(reasoning) + # yield reasoning + + # Image-gen block + if ref_name in image_blocks: + img_block_text = image_blocks[ref_name] + async for chunk in process_content_chunk( + img_block_text, + ref_name, + line_count, + for_target=True + ): + stream["target"].append(chunk) + is_started = True + yield chunk + else: + # Normal textual chunk + async for chunk in process_content_chunk( + content, + chunk_id, + line_count, + for_target=True + ): + stream["target"].append(chunk) + is_started = True + yield chunk # Variant stream (comparison) elif variant_stream_id and chunk_id == variant_stream_id: yield PlainTextResponse("[Variant] " + line.decode(errors="ignore")) @@ -651,8 +772,6 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): ... elif chunk_id == left_message_id: ... - elif chunk_id == nudge_new_chat_id: - nudge_new_chat = data # Miscellaneous extra content elif isinstance(data, dict) and "curr" in data: content = data.get("curr", "") @@ -664,24 +783,23 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): for_target=False ): stream["extra"].append(chunk) - if isinstance(chunk,str) and " list[ImageRequest]: + cls, + session: StreamSession, + auth_result: AuthResult, + media: MediaListType, + ) -> List[ImageRequest]: """ Upload an image to the service and get the download URL @@ -143,11 +151,20 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): Returns: An ImageRequest object that contains the download URL, file name, and other data """ - async def upload_file(file, image_name=None): + + async def upload_file(file, image_name=None) -> ImageRequest: debug.log(f"Uploading file: {image_name}") file_data = {} data_bytes = to_bytes(file) + # Check Cache + hasher = hashlib.md5() + hasher.update(data_bytes) + image_hash = hasher.hexdigest() + cache_file = ImagesCache.get(image_hash) + if cls.image_cache and file: + debug.log("Using cached image") + return ImageRequest(cache_file) extension, mime_type = detect_file_type(data_bytes) if "image" in mime_type: # Convert the image to a PIL Image object @@ -181,30 +198,31 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): # Put the image bytes to the upload URL and check the status await asyncio.sleep(1) async with session.put( - file_data["upload_url"], - data=data_bytes, - headers={ - **UPLOAD_HEADERS, - "Content-Type": file_data["mime_type"], - "x-ms-blob-type": "BlockBlob", - "x-ms-version": "2020-04-08", - "Origin": "https://chatgpt.com", - } + file_data["upload_url"], + data=data_bytes, + headers={ + **UPLOAD_HEADERS, + "Content-Type": file_data["mime_type"], + "x-ms-blob-type": "BlockBlob", + "x-ms-version": "2020-04-08", + "Origin": "https://chatgpt.com", + } ) as response: await raise_for_status(response) # Post the file ID to the service and get the download URL async with session.post( - f"{cls.url}/backend-api/files/{file_data['file_id']}/uploaded", - json={}, - headers=auth_result.headers + f"{cls.url}/backend-api/files/{file_data['file_id']}/uploaded", + json={}, + headers=auth_result.headers ) as response: cls._update_request_args(auth_result, session) await raise_for_status(response, "Get download url failed") uploaded_data = await response.json() file_data["download_url"] = uploaded_data["download_url"] + ImagesCache[image_hash] = file_data.copy() return ImageRequest(file_data) - medias = [] + medias: List["ImageRequest"] = [] for item in media: item = item if isinstance(item, tuple) else (item,) __uploaded_media = await upload_file(*item) @@ -242,7 +260,8 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): "id": str(uuid.uuid4()), "author": {"role": message["role"]}, "content": {"content_type": "text", "parts": [to_string(message["content"])]}, - "metadata": {"serialization_metadata": {"custom_symbol_offsets": []}, **({"system_hints": system_hints} if system_hints else {})}, + "metadata": {"serialization_metadata": {"custom_symbol_offsets": []}, + **({"system_hints": system_hints} if system_hints else {})}, "create_time": time.time(), } for message in messages] # Check if there is an image response @@ -256,11 +275,11 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): "size_bytes": image_request.get("file_size"), "width": image_request.get("width"), } - for image_request in image_requests + for image_request in image_requests # Add For Images Only if image_request.get("use_case") == "multimodal" ], - messages[-1]["content"]["parts"][0]] + messages[-1]["content"]["parts"][0]] } # Add the metadata object with the attachments messages[-1]["metadata"] = { @@ -278,12 +297,14 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): else {} ), } - for image_request in image_requests] + for image_request in image_requests] } return messages @classmethod - async def get_generated_image(cls, session: StreamSession, auth_result: AuthResult, element: Union[dict, str], prompt: str = None, conversation_id: str = None) -> ImagePreview|ImageResponse|None: + async def get_generated_image(cls, session: StreamSession, auth_result: AuthResult, element: Union[dict, str], + prompt: str = None, conversation_id: str = None, + status: Optional[str] = None) -> ImagePreview | ImageResponse | None: download_urls = [] is_sediment = False if prompt is None: @@ -292,7 +313,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): except KeyError: pass if "asset_pointer" in element: - element = element["asset_pointer"] + element = element["asset_pointer"] if isinstance(element, str) and element.startswith("file-service://"): element = element.split("file-service://", 1)[-1] elif isinstance(element, str) and element.startswith("sediment://"): @@ -303,7 +324,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if is_sediment: url = f"{cls.url}/backend-api/conversation/{conversation_id}/attachment/{element}/download" else: - url =f"{cls.url}/backend-api/files/{element}/download" + url = f"{cls.url}/backend-api/files/{element}/download" try: async with session.get(url, headers=auth_result.headers) as response: cls._update_request_args(auth_result, session) @@ -319,27 +340,31 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): debug.error("OpenaiChat: Download image failed") debug.error(e) if download_urls: - return ImagePreview(download_urls, prompt, {"headers": auth_result.headers}) if is_sediment else ImageResponse(download_urls, prompt, {"headers": auth_result.headers}) + # status = None, finished_successfully + if is_sediment and status is None: + return ImagePreview(download_urls, prompt, {"status": status, "headers": auth_result.headers}) + else: + return ImageResponse(download_urls, prompt, {"status": status, "headers": auth_result.headers}) @classmethod async def create_authed( - cls, - model: str, - messages: Messages, - auth_result: AuthResult, - proxy: str = None, - timeout: int = 360, - auto_continue: bool = False, - action: Optional[str] = None, - conversation: Conversation = None, - media: MediaListType = None, - return_conversation: bool = True, - web_search: bool = False, - prompt: str = None, - conversation_mode: Optional[dict] = None, - temporary: Optional[bool] = None, - conversation_id: Optional[str] = None, - **kwargs + cls, + model: str, + messages: Messages, + auth_result: AuthResult, + proxy: str = None, + timeout: int = 360, + auto_continue: bool = False, + action: Optional[str] = None, + conversation: Conversation = None, + media: MediaListType = None, + return_conversation: bool = True, + web_search: bool = False, + prompt: str = None, + conversation_mode: Optional[dict] = None, + temporary: Optional[bool] = None, + conversation_id: Optional[str] = None, + **kwargs ) -> AsyncResult: """ Create an asynchronous generator for the conversation. @@ -367,12 +392,12 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if action is None: action = "next" async with StreamSession( - proxy=proxy, - impersonate="chrome", - timeout=timeout + proxy=proxy, + impersonate="chrome", + timeout=timeout ) as session: image_requests = None - media = merge_media(media, messages) + media = merge_media(media, messages) if not cls.needs_auth and not media: if cls._headers is None: cls._create_request_args(cls._cookies) @@ -436,18 +461,19 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if temporary: data["history_and_training_disabled"] = True async with session.post( - prepare_url, - json=data, - headers=cls._headers + prepare_url, + json=data, + headers=cls._headers ) as response: await raise_for_status(response) conduit_token = (await response.json())["conduit_token"] async with session.post( - f"{cls.url}/backend-anon/sentinel/chat-requirements" - if cls._api_key is None else - f"{cls.url}/backend-api/sentinel/chat-requirements", - json={"p": None if not getattr(auth_result, "proof_token", None) else get_requirements_token(getattr(auth_result, "proof_token", None))}, - headers=cls._headers + f"{cls.url}/backend-anon/sentinel/chat-requirements" + if cls._api_key is None else + f"{cls.url}/backend-api/sentinel/chat-requirements", + json={"p": None if not getattr(auth_result, "proof_token", None) else get_requirements_token( + getattr(auth_result, "proof_token", None))}, + headers=cls._headers ) as response: if response.status in (401, 403): raise MissingAuthError(f"Response status: {response.status}") @@ -456,10 +482,10 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): await raise_for_status(response) chat_requirements = await response.json() need_turnstile = chat_requirements.get("turnstile", {}).get("required", False) - need_arkose = chat_requirements.get("arkose", {}).get("required", False) - chat_token = chat_requirements.get("token") + need_arkose = chat_requirements.get("arkose", {}).get("required", False) + chat_token = chat_requirements.get("token") - # if need_arkose and cls.request_config.arkose_token is None: + # if need_arkose and cls.request_config.arkose_token is None: # await get_request_config(proxy) # cls._create_request_args(auth_result.cookies, auth_result.headers) # cls._set_api_key(auth_result.access_token) @@ -476,23 +502,25 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): proof_token=proof_token ) # [debug.log(text) for text in ( - #f"Arkose: {'False' if not need_arkose else auth_result.arkose_token[:12]+'...'}", - #f"Proofofwork: {'False' if proofofwork is None else proofofwork[:12]+'...'}", - #f"AccessToken: {'False' if cls._api_key is None else cls._api_key[:12]+'...'}", + # f"Arkose: {'False' if not need_arkose else auth_result.arkose_token[:12]+'...'}", + # f"Proofofwork: {'False' if proofofwork is None else proofofwork[:12]+'...'}", + # f"AccessToken: {'False' if cls._api_key is None else cls._api_key[:12]+'...'}", # )] data = { "action": "next", "parent_message_id": conversation.message_id, "model": model, - "timezone_offset_min":-120, - "timezone":"Europe/Berlin", - "conversation_mode":{"kind":"primary_assistant"}, - "enable_message_followups":True, + "timezone_offset_min": -120, + "timezone": "Europe/Berlin", + "conversation_mode": {"kind": "primary_assistant"}, + "enable_message_followups": True, "system_hints": ["search"] if web_search else None, - "supports_buffering":True, - "supported_encodings":["v1"], - "client_contextual_info":{"is_dark_mode":False,"time_since_loaded":random.randint(20, 500),"page_height":578,"page_width":1850,"pixel_ratio":1,"screen_height":1080,"screen_width":1920}, - "paragen_cot_summary_display_override":"allow" + "supports_buffering": True, + "supported_encodings": ["v1"], + "client_contextual_info": {"is_dark_mode": False, "time_since_loaded": random.randint(20, 500), + "page_height": 578, "page_width": 1850, "pixel_ratio": 1, + "screen_height": 1080, "screen_width": 1920}, + "paragen_cot_summary_display_override": "allow" } if temporary: data["history_and_training_disabled"] = True @@ -512,7 +540,8 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): new_messages = [] else: new_messages.append(message) - data["messages"] = cls.create_messages(new_messages, image_requests, ["search"] if web_search else None) + data["messages"] = cls.create_messages(new_messages, image_requests, + ["search"] if web_search else None) yield JsonRequest.from_dict(data) headers = { **cls._headers, @@ -521,18 +550,18 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): "openai-sentinel-chat-requirements-token": chat_token, **({} if conduit_token is None else {"x-conduit-token": conduit_token}) } - #if cls.request_config.arkose_token: + # if cls.request_config.arkose_token: # headers["openai-sentinel-arkose-token"] = cls.request_config.arkose_token if proofofwork is not None: headers["openai-sentinel-proof-token"] = proofofwork if need_turnstile and getattr(auth_result, "turnstile_token", None) is not None: headers['openai-sentinel-turnstile-token'] = auth_result.turnstile_token async with session.post( - backend_anon_url - if cls._api_key is None else - backend_url, - json=data, - headers=headers + backend_anon_url + if cls._api_key is None else + backend_url, + json=data, + headers=headers ) as response: cls._update_request_args(auth_result, session) if response.status in (401, 403, 429, 500): @@ -548,10 +577,12 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if match.group(0) in matches: continue matches.append(match.group(0)) - generated_image = await cls.get_generated_image(session, auth_result, match.group(0), prompt) + generated_image = await cls.get_generated_image(session, auth_result, match.group(0), + prompt) if generated_image is not None: yield generated_image - async for chunk in cls.iter_messages_line(session, auth_result, line, conversation, sources, references): + async for chunk in cls.iter_messages_line(session, auth_result, line, conversation, sources, + references): if isinstance(chunk, str): chunk = chunk.replace("\ue203", "").replace("\ue204", "").replace("\ue206", "") buffer += chunk @@ -561,9 +592,9 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): def citation_replacer(match: re.Match[str]): ref_type = match.group(1) ref_index = int(match.group(2)) - if ((ref_type == "image" and is_image_embedding) or - is_video_embedding or - ref_type == "forecast"): + if ((ref_type == "image" and is_image_embedding) or + is_video_embedding or + ref_type == "forecast"): reference = references.get_reference({ "ref_index": ref_index, @@ -571,7 +602,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): }) if not reference: return "" - + if ref_type == "forecast": if reference.get("alt"): return reference.get("alt") @@ -580,11 +611,13 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if is_image_embedding and reference.get("content_url", ""): return f"![{reference.get('title', '')}]({reference.get('content_url')})" - + if is_video_embedding: - if reference.get("url", "") and reference.get("thumbnail_url", ""): + if reference.get("url", "") and reference.get("thumbnail_url", + ""): return f"[![{reference.get('title', '')}]({reference['thumbnail_url']})]({reference['url']})" - video_match = re.match(r"video\n(.*?)\nturn[0-9]+", match.group(0)) + video_match = re.match(r"video\n(.*?)\nturn[0-9]+", + match.group(0)) if video_match: return video_match.group(1) return "" @@ -595,9 +628,9 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): }) if source_index is not None and len(sources.list) > source_index: link = sources.list[source_index]["url"] - return f"[[{source_index+1}]]({link})" + return f"[[{source_index + 1}]]({link})" return f"" - + def products_replacer(match: re.Match[str]): try: products_data = json.loads(match.group(1)) @@ -612,25 +645,30 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): return "" sequence_content = match.group(1) - sequence_content = sequence_content.replace("\ue200", "").replace("\ue202", "\n").replace("\ue201", "") + sequence_content = sequence_content.replace("\ue200", "").replace("\ue202", + "\n").replace( + "\ue201", "") sequence_content = sequence_content.replace("navlist\n", "#### ") - + # Handle search, news, view and image citations is_image_embedding = sequence_content.startswith("i\nturn") is_video_embedding = sequence_content.startswith("video\n") sequence_content = re.sub( - r'(?:cite\nturn[0-9]+|forecast\nturn[0-9]+|video\n.*?\nturn[0-9]+|i?\n?turn[0-9]+)(search|news|view|image|forecast)(\d+)', - citation_replacer, + r'(?:cite\nturn[0-9]+|forecast\nturn[0-9]+|video\n.*?\nturn[0-9]+|i?\n?turn[0-9]+)(search|news|view|image|forecast)(\d+)', + citation_replacer, sequence_content ) - sequence_content = re.sub(r'products\n(.*)', products_replacer, sequence_content) - sequence_content = re.sub(r'product_entity\n\[".*","(.*)"\]', lambda x: x.group(1), sequence_content) + sequence_content = re.sub(r'products\n(.*)', products_replacer, + sequence_content) + sequence_content = re.sub(r'product_entity\n\[".*","(.*)"\]', + lambda x: x.group(1), sequence_content) return sequence_content - + # process only completed sequences and do not touch start of next not completed sequence - buffer = re.sub(r'\ue200(.*?)\ue201', sequence_replacer, buffer, flags=re.DOTALL) - - if buffer.find(u"\ue200") != -1: # still have uncompleted sequence + buffer = re.sub(r'\ue200(.*?)\ue201', sequence_replacer, buffer, + flags=re.DOTALL) + + if buffer.find(u"\ue200") != -1: # still have uncompleted sequence continue else: # do not yield to consume rest part of special sequence @@ -647,7 +685,8 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if sources.list: yield sources if conversation.generated_images: - yield ImageResponse(conversation.generated_images.urls, conversation.prompt, {"headers": auth_result.headers}) + yield ImageResponse(conversation.generated_images.urls, conversation.prompt, + {"headers": auth_result.headers}) conversation.generated_images = None conversation.prompt = None if return_conversation: @@ -667,7 +706,9 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): yield FinishReason(conversation.finish_reason) @classmethod - async def iter_messages_line(cls, session: StreamSession, auth_result: AuthResult, line: bytes, fields: Conversation, sources: OpenAISources, references: ContentReferences) -> AsyncIterator: + async def iter_messages_line(cls, session: StreamSession, auth_result: AuthResult, line: bytes, + fields: Conversation, sources: OpenAISources, + references: ContentReferences) -> AsyncIterator: if not line.startswith(b"data: "): return elif line.startswith(b"data: [DONE]"): @@ -706,9 +747,14 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): elif m.get("p") == "/message/metadata/image_gen_title": fields.prompt = m.get("v") elif m.get("p") == "/message/content/parts/0/asset_pointer": - generated_images = fields.generated_images = await cls.get_generated_image(session, auth_result, m.get("v"), fields.prompt, fields.conversation_id) + status = next(filter(lambda x: x.get("p") == '/message/status', v), {}).get('v', None) + generated_images = fields.generated_images = await cls.get_generated_image(session, auth_result, + m.get("v"), + fields.prompt, + fields.conversation_id, + status) if generated_images is not None: - if buffer: + if buffer: yield buffer yield generated_images elif m.get("p") == "/message/metadata/search_result_groups": @@ -735,41 +781,48 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if match and m.get("o") == "append" and isinstance(m.get("v"), dict): idx = int(match.group(1)) references.merge_reference(idx, m.get("v")) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/fallback_items$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/fallback_items$", + m.get("p")) and isinstance(m.get("v"), list): for link in m.get("v", []) or []: sources.add_source(link) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/items$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/items$", + m.get("p")) and isinstance(m.get("v"), list): for link in m.get("v", []) or []: sources.add_source(link) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/refs$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/refs$", + m.get("p")) and isinstance(m.get("v"), list): match = re.match(r"^/message/metadata/content_references/(\d+)/refs$", m.get("p")) if match: idx = int(match.group(1)) references.update_reference(idx, m.get("o"), "refs", m.get("v")) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/alt$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/alt$", + m.get("p")) and isinstance(m.get("v"), list): match = re.match(r"^/message/metadata/content_references/(\d+)/alt$", m.get("p")) if match: idx = int(match.group(1)) references.update_reference(idx, m.get("o"), "alt", m.get("v")) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/prompt_text$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/prompt_text$", + m.get("p")) and isinstance(m.get("v"), list): match = re.match(r"^/message/metadata/content_references/(\d+)/prompt_text$", m.get("p")) if match: idx = int(match.group(1)) references.update_reference(idx, m.get("o"), "prompt_text", m.get("v")) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/refs/\d+$", m.get("p")) and isinstance(m.get("v"), dict): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/refs/\d+$", + m.get("p")) and isinstance(m.get("v"), dict): match = re.match(r"^/message/metadata/content_references/(\d+)/refs/(\d+)$", m.get("p")) if match: reference_idx = int(match.group(1)) ref_idx = int(match.group(2)) references.update_reference(reference_idx, m.get("o"), "refs", m.get("v"), ref_idx) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/images$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/images$", + m.get("p")) and isinstance(m.get("v"), list): match = re.match(r"^/message/metadata/content_references/(\d+)/images$", m.get("p")) if match: idx = int(match.group(1)) references.update_reference(idx, m.get("o"), "images", m.get("v")) elif m.get("p") == "/message/metadata/finished_text": fields.is_thinking = False - if buffer: + if buffer: yield buffer yield Reasoning(status=m.get("v")) elif m.get("p") == "/message/metadata" and fields.recipient == "all": @@ -785,10 +838,11 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): fields.recipient = m.get("recipient", fields.recipient) if fields.recipient == "all": c = m.get("content", {}) - if c.get("content_type") == "text" and m.get("author", {}).get("role") == "tool" and "initial_text" in m.get("metadata", {}): + if c.get("content_type") == "text" and m.get("author", {}).get( + "role") == "tool" and "initial_text" in m.get("metadata", {}): fields.is_thinking = True yield Reasoning(status=m.get("metadata", {}).get("initial_text")) - #if c.get("content_type") == "multimodal_text": + # if c.get("content_type") == "multimodal_text": # for part in c.get("parts"): # if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer": # yield await cls.get_generated_image(session, auth_result, part, fields.prompt, fields.conversation_id) @@ -803,13 +857,13 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): @classmethod async def synthesize(cls, params: dict) -> AsyncIterator[bytes]: async with StreamSession( - impersonate="chrome", - timeout=0 + impersonate="chrome", + timeout=0 ) as session: async with session.get( - f"{cls.url}/backend-api/synthesize", - params=params, - headers=cls._headers + f"{cls.url}/backend-api/synthesize", + params=params, + headers=cls._headers ) as response: await raise_for_status(response) async for chunk in response.iter_content(): @@ -817,15 +871,15 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): @classmethod async def login( - cls, - proxy: str = None, - api_key: str = None, - proof_token: str = None, - cookies: Cookies = None, - headers: dict = None, - **kwargs + cls, + proxy: str = None, + api_key: str = None, + proof_token: str = None, + cookies: Cookies = None, + headers: dict = None, + **kwargs ) -> AsyncIterator: - if cls._expires is not None and (cls._expires - 60*10) < time.time(): + if cls._expires is not None and (cls._expires - 60 * 10) < time.time(): cls._headers = cls._api_key = None if cls._headers is None or headers is not None: cls._headers = {} if headers is None else headers @@ -858,6 +912,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): async def nodriver_auth(cls, proxy: str = None): async with get_nodriver_session(proxy=proxy) as browser: page = await browser.get(cls.url) + def on_request(event: nodriver.cdp.network.RequestWillBeSent, page=None): if event.request.url == start_url or event.request.url.startswith(conversation_url): if cls.request_config.headers is None: @@ -866,9 +921,10 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): cls.request_config.headers[key.lower()] = value elif event.request.url in (backend_url, backend_anon_url): if "OpenAI-Sentinel-Proof-Token" in event.request.headers: - cls.request_config.proof_token = json.loads(base64.b64decode( - event.request.headers["OpenAI-Sentinel-Proof-Token"].split("gAAAAAB", 1)[-1].split("~")[0].encode() - ).decode()) + cls.request_config.proof_token = json.loads(base64.b64decode( + event.request.headers["OpenAI-Sentinel-Proof-Token"].split("gAAAAAB", 1)[-1].split("~")[ + 0].encode() + ).decode()) if "OpenAI-Sentinel-Turnstile-Token" in event.request.headers: cls.request_config.turnstile_token = event.request.headers["OpenAI-Sentinel-Turnstile-Token"] if "Authorization" in event.request.headers: @@ -881,6 +937,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): arkBody=event.request.post_data, userAgent=event.request.headers.get("User-Agent") ) + await page.send(nodriver.cdp.network.enable()) page.add_handler(nodriver.cdp.network.RequestWillBeSent, on_request) await page.reload() @@ -912,7 +969,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if cls._api_key is not None or not cls.needs_auth: break await asyncio.sleep(1) - debug.log(f"OpenaiChat: Access token: {'False' if cls._api_key is None else cls._api_key[:12]+'...'}") + debug.log(f"OpenaiChat: Access token: {'False' if cls._api_key is None else cls._api_key[:12] + '...'}") while True: if cls.request_config.proof_token: break @@ -970,11 +1027,14 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if cls._cookies: cls._headers["cookie"] = format_cookies(cls._cookies) + class Conversation(JsonConversation): """ Class to encapsulate response fields. """ - def __init__(self, conversation_id: str = None, message_id: str = None, user_id: str = None, finish_reason: str = None, parent_message_id: str = None, is_thinking: bool = False): + + def __init__(self, conversation_id: str = None, message_id: str = None, user_id: str = None, + finish_reason: str = None, parent_message_id: str = None, is_thinking: bool = False): self.conversation_id = conversation_id self.message_id = message_id self.finish_reason = finish_reason @@ -987,8 +1047,9 @@ class Conversation(JsonConversation): self.prompt = None self.generated_images: ImagePreview = None + def get_cookies( - urls: Optional[Iterator[str]] = None + urls: Optional[Iterator[str]] = None ) -> Generator[Dict, Dict, Dict[str, str]]: params = {} if urls is not None: @@ -1000,6 +1061,7 @@ def get_cookies( json = yield cmd_dict return {c["name"]: c["value"] for c in json['cookies']} if 'cookies' in json else {} + class OpenAISources(ResponseType): list: List[Dict[str, str]] @@ -1025,7 +1087,7 @@ class OpenAISources(ResponseType): if existing_source and idx is not None: self.list[idx] = source return - + existing_source, idx = self.find_by_url(source["url"]) if existing_source and idx is not None: self.list[idx] = source @@ -1038,53 +1100,54 @@ class OpenAISources(ResponseType): if not self.list: return "" return "\n\n\n\n" + ("\n>\n".join([ - f"> [{idx+1}] {format_link(link['url'], link.get('title', ''))}" + f"> [{idx + 1}] {format_link(link['url'], link.get('title', ''))}" for idx, link in enumerate(self.list) ])) - - def get_ref_info(self, source: Dict[str, str]) -> dict[str, str|int] | None: + + def get_ref_info(self, source: Dict[str, str]) -> dict[str, str | int] | None: ref_index = source.get("ref_id", {}).get("ref_index", None) ref_type = source.get("ref_id", {}).get("ref_type", None) if isinstance(ref_index, int): return { - "ref_index": ref_index, + "ref_index": ref_index, "ref_type": ref_type, } - + for ref_info in source.get('refs') or []: ref_index = ref_info.get("ref_index", None) ref_type = ref_info.get("ref_type", None) if isinstance(ref_index, int): return { - "ref_index": ref_index, + "ref_index": ref_index, "ref_type": ref_type, } - + return None - def find_by_ref_info(self, ref_info: dict[str, str|int]): + def find_by_ref_info(self, ref_info: dict[str, str | int]): for idx, source in enumerate(self.list): source_ref_info = self.get_ref_info(source) - if (source_ref_info and - source_ref_info["ref_index"] == ref_info["ref_index"] and - source_ref_info["ref_type"] == ref_info["ref_type"]): - return source, idx + if (source_ref_info and + source_ref_info["ref_index"] == ref_info["ref_index"] and + source_ref_info["ref_type"] == ref_info["ref_type"]): + return source, idx return None, None - + def find_by_url(self, url: str): for idx, source in enumerate(self.list): if source["url"] == url: return source, idx - return None, None + return None, None - def get_index(self, ref_info: dict[str, str|int]) -> int | None: + def get_index(self, ref_info: dict[str, str | int]) -> int | None: _, index = self.find_by_ref_info(ref_info) if index is not None: - return index + return index return None + class ContentReferences: def __init__(self) -> None: self.list: List[Dict[str, Any]] = [] @@ -1098,16 +1161,16 @@ class ContentReferences: self.list[idx] = {**self.list[idx], **reference_part} - def update_reference(self, idx: int, operation: str, field: str, value: Any, ref_idx = None) -> None: + def update_reference(self, idx: int, operation: str, field: str, value: Any, ref_idx=None) -> None: while len(self.list) <= idx: self.list.append({}) - + if operation == "append" or operation == "add": if not isinstance(self.list[idx].get(field, None), list): self.list[idx][field] = [] if isinstance(value, list): self.list[idx][field].extend(value) - else: + else: self.list[idx][field].append(value) if operation == "replace" and ref_idx is not None: @@ -1123,10 +1186,10 @@ class ContentReferences: self.list[idx][field] = value def get_ref_info( - self, - source: Dict[str, str], - target_ref_info: Dict[str, Union[str, int]] - ) -> dict[str, str|int] | None: + self, + source: Dict[str, str], + target_ref_info: Dict[str, Union[str, int]] + ) -> dict[str, str | int] | None: for idx, ref_info in enumerate(source.get("refs", [])) or []: if not isinstance(ref_info, dict): continue @@ -1134,11 +1197,11 @@ class ContentReferences: ref_index = ref_info.get("ref_index", None) ref_type = ref_info.get("ref_type", None) if isinstance(ref_index, int) and isinstance(ref_type, str): - if (not target_ref_info or - (target_ref_info["ref_index"] == ref_index and - target_ref_info["ref_type"] == ref_type)): + if (not target_ref_info or + (target_ref_info["ref_index"] == ref_index and + target_ref_info["ref_type"] == ref_type)): return { - "ref_index": ref_index, + "ref_index": ref_index, "ref_type": ref_type, "idx": idx } @@ -1149,9 +1212,9 @@ class ContentReferences: for reference in self.list: reference_ref_info = self.get_ref_info(reference, ref_info) - if (not reference_ref_info or - reference_ref_info["ref_index"] != ref_info["ref_index"] or - reference_ref_info["ref_type"] != ref_info["ref_type"]): + if (not reference_ref_info or + reference_ref_info["ref_index"] != ref_info["ref_index"] or + reference_ref_info["ref_type"] != ref_info["ref_type"]): continue if ref_info["ref_type"] != "image": diff --git a/g4f/image/__init__.py b/g4f/image/__init__.py index c09f484e..7c918b34 100644 --- a/g4f/image/__init__.py +++ b/g4f/image/__init__.py @@ -9,6 +9,8 @@ from pathlib import Path from typing import Optional from urllib.parse import urlparse +import requests + try: from PIL import Image, ImageOps has_requirements = True @@ -383,6 +385,13 @@ def to_bytes(image: ImageType) -> bytes: return Path(path).read_bytes() else: raise FileNotFoundError(f"File not found: {path}") + else: + resp = requests.get(image, headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", + }) + if resp.ok and is_accepted_format(resp.content): + return resp.content + raise ValueError("Invalid image url. Expected bytes, str, or PIL Image.") else: raise ValueError("Invalid image format. Expected bytes, str, or PIL Image.") elif isinstance(image, Image.Image): diff --git a/g4f/requests/aiohttp.py b/g4f/requests/aiohttp.py index 644ae026..d3d33271 100644 --- a/g4f/requests/aiohttp.py +++ b/g4f/requests/aiohttp.py @@ -31,17 +31,21 @@ class StreamResponse(ClientResponse): except json.JSONDecodeError: continue -class StreamSession(): +class StreamSession: def __init__( self, - headers: dict = {}, + headers=None, timeout: int = None, connector: BaseConnector = None, proxy: str = None, - proxies: dict = {}, + proxies=None, impersonate = None, **kwargs ): + if proxies is None: + proxies = {} + if headers is None: + headers = {} if impersonate: headers = { **DEFAULT_HEADERS, @@ -49,7 +53,7 @@ class StreamSession(): } connect = None if isinstance(timeout, tuple): - connect, timeout = timeout; + connect, timeout = timeout if timeout is not None: timeout = ClientTimeout(timeout, connect) if proxy is None: