diff --git a/g4f/Provider/Qwen.py b/g4f/Provider/Qwen.py index 67514332..049504e1 100644 --- a/g4f/Provider/Qwen.py +++ b/g4f/Provider/Qwen.py @@ -1,22 +1,29 @@ from __future__ import annotations import asyncio +import datetime +import hashlib +import hmac import json -import mimetypes import re import uuid from time import time -from typing import Literal, Optional +from typing import Literal, Optional, Dict +from urllib.parse import quote import aiohttp -from ..errors import RateLimitError, ResponseError -from ..typing import AsyncResult, Messages, MediaListType -from ..providers.response import JsonConversation, Reasoning, Usage, ImageResponse, FinishReason -from ..requests import sse_stream -from ..tools.media import merge_media + +from g4f.image import to_bytes, detect_file_type +from g4f.requests import raise_for_status from .base_provider import AsyncGeneratorProvider, ProviderModelMixin from .helper import get_last_user_message from .. import debug +from ..errors import RateLimitError, ResponseError +from ..providers.response import JsonConversation, Reasoning, Usage, ImageResponse, FinishReason +from ..requests import sse_stream +from ..requests.aiohttp import StreamSession +from ..tools.media import merge_media +from ..typing import AsyncResult, Messages, MediaListType try: import curl_cffi @@ -25,6 +32,56 @@ try: except ImportError: has_curl_cffi = False +# Global variables to manage Qwen Image Cache +ImagesCache: Dict[str, dict] = {} + + +def get_oss_headers(method: str, date_str: str, sts_data: dict, content_type: str) -> dict[str, str]: + bucket_name = sts_data.get('bucketname', 'qwen-webui-prod') + file_path = sts_data.get('file_path', '') + access_key_id = sts_data.get('access_key_id') + access_key_secret = sts_data.get('access_key_secret') + security_token = sts_data.get('security_token') + headers = { + 'Content-Type': content_type, + 'x-oss-content-sha256': 'UNSIGNED-PAYLOAD', + 'x-oss-date': date_str, + 'x-oss-security-token': security_token, + 'x-oss-user-agent': 'aliyun-sdk-js/6.23.0 Chrome 132.0.0.0 on Windows 10 64-bit' + } + headers_lower = {k.lower(): v for k, v in headers.items()} + + canonical_headers_list = [] + signed_headers_list = [] + required_headers = ['content-md5', 'content-type', 'x-oss-content-sha256', 'x-oss-date', 'x-oss-security-token', + 'x-oss-user-agent'] + for header_name in sorted(required_headers): + if header_name in headers_lower: + canonical_headers_list.append(f"{header_name}:{headers_lower[header_name]}") + signed_headers_list.append(header_name) + + canonical_headers = '\n'.join(canonical_headers_list) + '\n' + canonical_uri = f"/{bucket_name}/{quote(file_path, safe='/')}" + + canonical_request = f"{method}\n{canonical_uri}\n\n{canonical_headers}\n\nUNSIGNED-PAYLOAD" + + date_parts = date_str.split('T') + date_scope = f"{date_parts[0]}/ap-southeast-1/oss/aliyun_v4_request" + string_to_sign = f"OSS4-HMAC-SHA256\n{date_str}\n{date_scope}\n{hashlib.sha256(canonical_request.encode()).hexdigest()}" + + def sign(key, msg): + return hmac.new(key, msg.encode() if isinstance(msg, str) else msg, hashlib.sha256).digest() + + date_key = sign(f"aliyun_v4{access_key_secret}".encode(), date_parts[0]) + region_key = sign(date_key, "ap-southeast-1") + service_key = sign(region_key, "oss") + signing_key = sign(service_key, "aliyun_v4_request") + signature = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest() + + headers['authorization'] = f"OSS4-HMAC-SHA256 Credential={access_key_id}/{date_scope},Signature={signature}" + return headers + + text_models = [ 'qwen3-max-preview', 'qwen-plus-2025-09-11', 'qwen3-235b-a22b', 'qwen3-coder-plus', 'qwen3-30b-a3b', 'qwen3-coder-30b-a3b-instruct', 'qwen-max-latest', 'qwen-plus-2025-01-25', 'qwq-32b', 'qwen-turbo-2025-02-11', @@ -60,19 +117,19 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): active_by_default = True supports_stream = True supports_message_history = False - + image_cache = True _models_loaded = True image_models = image_models text_models = text_models vision_models = vision_models - models = models + models: list[str] = models default_model = "qwen3-235b-a22b" _midtoken: str = None _midtoken_uses: int = 0 @classmethod - def get_models(cls) -> list[str]: + def get_models(cls, **kwargs) -> list[str]: if not cls._models_loaded and has_curl_cffi: response = curl_cffi.get(f"{cls.url}/api/models") if response.ok: @@ -97,34 +154,106 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): return cls.models @classmethod - async def prepare_files(cls, media, chat_type="")->list: + async def prepare_files(cls, media, session: aiohttp.ClientSession, headers=None) -> list: + if headers is None: + headers = {} files = [] - for _file, file_name in media: - file_type, _ = mimetypes.guess_type(file_name) - file_class: Literal["default", "vision", "video", "audio", "document"] = "default" - _type: Literal["file", "image", "video", "audio"] = "file" - showType: Literal["file", "image", "video", "audio"] = "file" + for index, (_file, file_name) in enumerate(media): - if isinstance(_file, str) and _file.startswith('http'): - if chat_type == "image_edit" or (file_type and file_type.startswith("image")): - file_class = "vision" - _type = "image" - if not file_type: - # Try to infer from file extension, fallback to generic - ext = file_name.split('.')[-1].lower() if '.' in file_name else '' - file_type = mimetypes.types_map.get(f'.{ext}', 'application/octet-stream') - showType = "image" + data_bytes = to_bytes(_file) + # Check Cache + hasher = hashlib.md5() + hasher.update(data_bytes) + image_hash = hasher.hexdigest() + file = ImagesCache.get(image_hash) + if cls.image_cache and file: + debug.log("Using cached image") + files.append(file) + continue - files.append( - { - "type": _type, + extension, file_type = detect_file_type(data_bytes) + file_name = file_name or f"file-{len(data_bytes)}{extension}" + file_size = len(data_bytes) + + # Get File Url + async with session.post( + f'{cls.url}/api/v2/files/getstsToken', + json={"filename": file_name, + "filesize": file_size, "filetype": file_type}, + headers=headers + + ) as r: + await raise_for_status(r, "Create file failed") + res_data = await r.json() + data = res_data.get("data") + + if res_data["success"] is False: + raise RateLimitError(f"{data['code']}:{data['details']}") + file_url = data.get("file_url") + file_id = data.get("file_id") + + # Put File into Url + str_date = datetime.datetime.now(datetime.UTC).strftime('%Y%m%dT%H%M%SZ') + headers = get_oss_headers('PUT', str_date, data, file_type) + async with session.put( + file_url.split("?")[0], + data=data_bytes, + headers=headers + ) as response: + await raise_for_status(response) + + file_class: Literal["default", "vision", "video", "audio", "document"] + _type: Literal["file", "image", "video", "audio"] + show_type: Literal["file", "image", "video", "audio"] + if "image" in file_type: + _type = "image" + show_type = "image" + file_class = "vision" + elif "video" in file_type: + _type = "video" + show_type = "video" + file_class = "video" + elif "audio" in file_type: + _type = "audio" + show_type = "audio" + file_class = "audio" + else: + _type = "file" + show_type = "file" + file_class = "document" + + file = { + "type": _type, + "file": { + "created_at": int(time() * 1000), + "data": {}, + "filename": file_name, + "hash": None, + "id": file_id, + "meta": { "name": file_name, - "file_type": file_type, - "showType": showType, - "file_class": file_class, - "url": _file - } - ) + "size": file_size, + "content_type": file_type + }, + "update_at": int(time() * 1000), + }, + "id": file_id, + "url": file_url, + "name": file_name, + "collection_name": "", + "progress": 0, + "status": "uploaded", + "greenNet": "success", + "size": file_size, + "error": "", + "itemId": str(uuid.uuid4()), + "file_type": file_type, + "showType": show_type, + "file_class": file_class, + "uploadTaskId": str(uuid.uuid4()) + } + ImagesCache[image_hash] = file + files.append(file) return files @classmethod @@ -135,7 +264,6 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): media: MediaListType = None, conversation: JsonConversation = None, proxy: str = None, - timeout: int = 120, stream: bool = True, enable_thinking: bool = True, chat_type: Literal[ @@ -157,7 +285,7 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): """ model_name = cls.get_model(model) - + token = kwargs.get("token") headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', 'Accept': '*/*', @@ -169,13 +297,24 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'Connection': 'keep-alive', - 'Authorization': 'Bearer', + 'Authorization': f'Bearer {token}' if token else "Bearer", 'Source': 'web' } prompt = get_last_user_message(messages) - - async with aiohttp.ClientSession(headers=headers) as session: + _timeout = kwargs.get("timeout") + if isinstance(_timeout, aiohttp.ClientTimeout): + timeout = _timeout + else: + total = float(_timeout) if isinstance(_timeout, (int, float)) else 5 * 60 + timeout = aiohttp.ClientTimeout(total=total) + async with StreamSession(headers=headers) as session: + try: + async with session.get('https://chat.qwen.ai/api/v1/auths/', proxy=proxy) as user_info_res: + user_info_res.raise_for_status() + debug.log(await user_info_res.json()) + except: + ... for attempt in range(5): try: if not cls._midtoken: @@ -221,7 +360,8 @@ class Qwen(AsyncGeneratorProvider, ProviderModelMixin): files = [] media = list(merge_media(media, messages)) if media: - files = await cls.prepare_files(media, chat_type=chat_type) + files = await cls.prepare_files(media, session=session, + headers=req_headers) msg_payload = { "stream": stream, diff --git a/g4f/Provider/Yupp.py b/g4f/Provider/Yupp.py index d5aa4d9a..c4b1f203 100644 --- a/g4f/Provider/Yupp.py +++ b/g4f/Provider/Yupp.py @@ -1,25 +1,26 @@ +import asyncio import hashlib import json +import os +import re import time import uuid -import re -import os -import asyncio -import aiohttp -from aiohttp import ClientResponseError -from ..typing import AsyncResult, Messages, Optional, Dict, Any, List +import aiohttp + +from .helper import get_last_user_message +from .yupp.models import YuppModelManager +from ..cookies import get_cookies +from ..debug import log +from ..errors import RateLimitError, ProviderException, MissingAuthError +from ..image import is_accepted_format, to_bytes from ..providers.base_provider import AsyncGeneratorProvider, ProviderModelMixin from ..providers.response import Reasoning, PlainTextResponse, PreviewResponse, JsonConversation, ImageResponse, \ ProviderInfo, FinishReason, JsonResponse -from ..errors import RateLimitError, ProviderException, MissingAuthError -from ..cookies import get_cookies +from ..requests.aiohttp import StreamSession from ..tools.auth import AuthManager from ..tools.media import merge_media -from ..image import is_accepted_format, to_bytes -from .yupp.models import YuppModelManager -from .helper import get_last_user_message -from ..debug import log +from ..typing import AsyncResult, Messages, Optional, Dict, Any, List # Global variables to manage Yupp accounts YUPP_ACCOUNT = Dict[str, Any] @@ -27,22 +28,24 @@ YUPP_ACCOUNTS: List[YUPP_ACCOUNT] = [] account_rotation_lock = asyncio.Lock() # Global variables to manage Yupp Image Cache -ImagesCache:Dict[str, dict] = {} +ImagesCache: Dict[str, dict] = {} + class YuppAccount: """Yupp account representation""" + def __init__(self, token: str, is_valid: bool = True, error_count: int = 0, last_used: float = 0): self.token = token self.is_valid = is_valid self.error_count = error_count self.last_used = last_used + def load_yupp_accounts(tokens_str: str): """Load Yupp accounts from token string""" global YUPP_ACCOUNTS if not tokens_str: return - tokens = [token.strip() for token in tokens_str.split(',') if token.strip()] YUPP_ACCOUNTS = [ { @@ -54,6 +57,7 @@ def load_yupp_accounts(tokens_str: str): for token in tokens ] + def create_headers() -> Dict[str, str]: """Create headers for requests""" return { @@ -66,6 +70,7 @@ def create_headers() -> Dict[str, str]: "Sec-Fetch-Site": "same-origin", } + async def get_best_yupp_account() -> Optional[YUPP_ACCOUNT]: """Get the best available Yupp account using smart selection algorithm""" max_error_count = int(os.getenv("MAX_ERROR_COUNT", "3")) @@ -77,10 +82,10 @@ async def get_best_yupp_account() -> Optional[YUPP_ACCOUNT]: acc for acc in YUPP_ACCOUNTS if acc["is_valid"] - and ( - acc["error_count"] < max_error_count - or now - acc["last_used"] > error_cooldown - ) + and ( + acc["error_count"] < max_error_count + or now - acc["last_used"] > error_cooldown + ) ] if not valid_accounts: @@ -89,8 +94,8 @@ async def get_best_yupp_account() -> Optional[YUPP_ACCOUNT]: # Reset error count for accounts in cooldown for acc in valid_accounts: if ( - acc["error_count"] >= max_error_count - and now - acc["last_used"] > error_cooldown + acc["error_count"] >= max_error_count + and now - acc["last_used"] > error_cooldown ): acc["error_count"] = 0 @@ -100,6 +105,7 @@ async def get_best_yupp_account() -> Optional[YUPP_ACCOUNT]: account["last_used"] = now return account + async def claim_yupp_reward(session: aiohttp.ClientSession, account: YUPP_ACCOUNT, reward_id: str): """Claim Yupp reward asynchronously""" try: @@ -109,7 +115,7 @@ async def claim_yupp_reward(session: aiohttp.ClientSession, account: YUPP_ACCOUN headers = { "Content-Type": "application/json", "Cookie": f"__Secure-yupp.session-token={account['token']}", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", } async with session.post(url, json=payload, headers=headers) as response: @@ -122,6 +128,7 @@ async def claim_yupp_reward(session: aiohttp.ClientSession, account: YUPP_ACCOUN log_debug(f"Failed to claim reward {reward_id}. Error: {e}") return None + async def make_chat_private(session: aiohttp.ClientSession, account: YUPP_ACCOUNT, chat_id: str) -> bool: """Set a Yupp chat's sharing status to PRIVATE""" try: @@ -138,7 +145,7 @@ async def make_chat_private(session: aiohttp.ClientSession, account: YUPP_ACCOUN headers = { "Content-Type": "application/json", "Cookie": f"__Secure-yupp.session-token={account['token']}", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", } @@ -146,8 +153,8 @@ async def make_chat_private(session: aiohttp.ClientSession, account: YUPP_ACCOUN response.raise_for_status() data = await response.json() if ( - isinstance(data, list) and len(data) > 0 - and "json" in data[0].get("result", {}).get("data", {}) + isinstance(data, list) and len(data) > 0 + and "json" in data[0].get("result", {}).get("data", {}) ): log_debug(f"Chat {chat_id} is now PRIVATE ✅") return True @@ -159,6 +166,7 @@ async def make_chat_private(session: aiohttp.ClientSession, account: YUPP_ACCOUN log_debug(f"Failed to make chat {chat_id} private: {e}") return False + def log_debug(message: str): """Debug logging""" if os.getenv("DEBUG_MODE", "false").lower() == "true": @@ -166,11 +174,12 @@ def log_debug(message: str): else: log(f"[Yupp] {message}") + def format_messages_for_yupp(messages: Messages) -> str: """Format multi-turn conversation for Yupp single-turn format""" if not messages: return "" - + if len(messages) == 1 and isinstance(messages[0].get("content"), str): return messages[0].get("content", "").strip() @@ -202,6 +211,7 @@ def format_messages_for_yupp(messages: Messages) -> str: return result + class Yupp(AsyncGeneratorProvider, ProviderModelMixin): """ Yupp.ai Provider for g4f @@ -214,7 +224,7 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): active_by_default = True supports_stream = True image_cache = True - + @classmethod def get_models(cls, api_key: str = None, **kwargs) -> List[str]: if not cls.models: @@ -230,11 +240,12 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): cls.models_tags = {model.get("name"): manager.processor.generate_tags(model) for model in models} cls.models = [model.get("name") for model in models] cls.image_models = [model.get("name") for model in models if model.get("isImageGeneration")] - cls.vision_models = [model.get("name") for model in models if "image/*" in model.get("supportedAttachmentMimeTypes", [])] + cls.vision_models = [model.get("name") for model in models if + "image/*" in model.get("supportedAttachmentMimeTypes", [])] return cls.models @classmethod - async def prepare_files(cls, media, session:aiohttp.ClientSession, account:YUPP_ACCOUNT)->list: + async def prepare_files(cls, media, session: aiohttp.ClientSession, account: YUPP_ACCOUNT) -> list: files = [] if not media: return files @@ -291,11 +302,11 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): @classmethod async def create_async_generator( - cls, - model: str, - messages: Messages, - proxy: str = None, - **kwargs, + cls, + model: str, + messages: Messages, + proxy: str = None, + **kwargs, ) -> AsyncResult: """ Create async completion using Yupp.ai API with account rotation @@ -314,15 +325,16 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): conversation = kwargs.get("conversation") url_uuid = conversation.url_uuid if conversation else None is_new_conversation = url_uuid is None - + prompt = kwargs.get("prompt") if prompt is None: if is_new_conversation: prompt = format_messages_for_yupp(messages) else: prompt = get_last_user_message(messages, prompt) - - log_debug(f"Use url_uuid: {url_uuid}, Formatted prompt length: {len(prompt)}, Is new conversation: {is_new_conversation}") + + log_debug( + f"Use url_uuid: {url_uuid}, Formatted prompt length: {len(prompt)}, Is new conversation: {is_new_conversation}") # Try all accounts with rotation max_attempts = len(YUPP_ACCOUNTS) @@ -332,10 +344,9 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): raise ProviderException("No valid Yupp accounts available") try: - async with aiohttp.ClientSession() as session: + async with StreamSession() as session: turn_id = str(uuid.uuid4()) - # Handle media attachments media = kwargs.get("media") if media: @@ -390,16 +401,23 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): log_debug(f"Sending request to: {url}") log_debug(f"Payload structure: {type(payload)}, length: {len(str(payload))}") - + _timeout = kwargs.get("timeout") + if isinstance(_timeout, aiohttp.ClientTimeout): + timeout = _timeout + else: + total = float(_timeout) if isinstance(_timeout, (int, float)) else 5 * 60 + timeout = aiohttp.ClientTimeout(total=total) # Send request - async with session.post(url, json=payload, headers=headers, proxy=proxy) as response: + async with session.post(url, json=payload, headers=headers, proxy=proxy, + timeout=timeout) as response: response.raise_for_status() # Make chat private in background asyncio.create_task(make_chat_private(session, account, url_uuid)) # Process stream - async for chunk in cls._process_stream_response(response.content, account, session, prompt, model): + async for chunk in cls._process_stream_response(response.content, account, session, prompt, + model): yield chunk return @@ -417,7 +435,7 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): else: account["error_count"] += 1 continue - except ClientResponseError as e: + except aiohttp.ClientResponseError as e: log_debug(f"Account ...{account['token'][-4:]} failed: {str(e)}") # No Available Yupp credits if e.status == 500 and 'Internal Server Error' in e.message: @@ -439,12 +457,12 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): @classmethod async def _process_stream_response( - cls, - response_content, - account: YUPP_ACCOUNT, - session: aiohttp.ClientSession, - prompt: str, - model_id: str + cls, + response_content, + account: YUPP_ACCOUNT, + session: aiohttp.ClientSession, + prompt: str, + model_id: str ) -> AsyncResult: """Process Yupp stream response asynchronously""" @@ -461,15 +479,33 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): "target": [], "variant": [], "quick": [], - "thinking": [] , + "thinking": [], "extra": [] } # Holds leftStream / rightStream definitions to determine target/variant select_stream = [None, None] - + # State for capturing a multi-line + block (fa-style) + capturing_ref_id: Optional[str] = None + capturing_lines: List[bytes] = [] + + # Storage for special referenced blocks like $fa + think_blocks: Dict[str, str] = {} + image_blocks: Dict[str, str] = {} + def extract_ref_id(ref): """Extract ID from reference string, e.g., from '$@123' extract '123'""" return ref[2:] if ref and isinstance(ref, str) and ref.startswith("$@") else None + + def extract_ref_name(ref: str) -> Optional[str]: + """Extract simple ref name from '$fa' → 'fa'""" + if not isinstance(ref, str): + return None + if ref.startswith("$@"): + return ref[2:] + if ref.startswith("$") and len(ref) > 1: + return ref[1:] + return None + def is_valid_content(content: str) -> bool: """Check if content is valid""" if not content or content in [None, "", "$undefined"]: @@ -515,7 +551,27 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): if for_target: normal_content += content yield content - + + def finalize_capture_block(ref_id: str, lines: List[bytes]): + """Parse captured + block for a given ref ID.""" + text = b"".join(lines).decode("utf-8", errors="ignore") + + # Extract ... + think_start = text.find("") + think_end = text.find("") + if think_start != -1 and think_end != -1 and think_end > think_start: + inner = text[think_start + len(""):think_end].strip() + if inner: + think_blocks[ref_id] = inner + + # Extract ... + yapp_start = text.find('') + if yapp_start != -1: + yapp_end = text.find("", yapp_start) + if yapp_end != -1: + yapp_block = text[yapp_start:yapp_end + len("")] + image_blocks[ref_id] = yapp_block + try: line_count = 0 quick_response_id = None @@ -531,17 +587,55 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): right_message_id = None nudge_new_chat_id = None nudge_new_chat = False - async for line in response_content: line_count += 1 - + # If we are currently capturing a think/image block for some ref ID + if capturing_ref_id is not None: + capturing_lines.append(line) + + # Check if this line closes the block; after that, block is complete + if b"" in line: # or b':{"curr"' in line: + # We may have trailing "2:{...}" after on the same line + # Get id using re + idx = line.find(b"") + suffix = line[idx + len(b""):] + + # Finalize captured block for this ref ID + finalize_capture_block(capturing_ref_id, capturing_lines) + capturing_ref_id = None + capturing_lines = [] + + # If there is trailing content (e.g. '2:{"curr":"$fa"...}') + if suffix.strip(): + # Process suffix as a new "line" in the same iteration + line = suffix + else: + # Nothing more on this line + continue + else: + # Still inside captured block; skip normal processing + continue + + # Detect start of a block assigned to a ref like 'fa:...' + if b"" in line: + m = line_pattern.match(line) + if m: + capturing_ref_id = m.group(1).decode() + capturing_lines = [line] + # Skip normal parsing; the rest of the block will be captured until + continue + match = line_pattern.match(line) if not match: continue chunk_id, chunk_data = match.groups() chunk_id = chunk_id.decode() - + + if nudge_new_chat_id and chunk_id == nudge_new_chat_id: + nudge_new_chat = chunk_data.decode() + continue + try: data = json.loads(chunk_data) if chunk_data != b"{}" else {} except json.JSONDecodeError: @@ -550,7 +644,7 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): if chunk_id == reward_id and isinstance(data, dict) and "unclaimedRewardInfo" in data: reward_info = data log_debug(f"Found reward info") - + # Process initial setup elif chunk_id == "1": yield PlainTextResponse(line.decode(errors="ignore")) @@ -558,7 +652,8 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): left_stream = data.get("leftStream", {}) right_stream = data.get("rightStream", {}) if data.get("quickResponse", {}) != "$undefined": - quick_response_id = extract_ref_id(data.get("quickResponse", {}).get("stream", {}).get("next")) + quick_response_id = extract_ref_id( + data.get("quickResponse", {}).get("stream", {}).get("next")) if data.get("turnId", {}) != "$undefined": turn_id = extract_ref_id(data.get("turnId", {}).get("next")) @@ -592,7 +687,7 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): provider_info["variantUrl"] = selection.get("externalUrl") log_debug(f"Found variant stream ID: {variant_stream_id}") yield ProviderInfo.from_dict(provider_info) - + # Process target stream content elif target_stream_id and chunk_id == target_stream_id: yield PlainTextResponse(line.decode(errors="ignore")) @@ -600,15 +695,41 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): target_stream_id = extract_ref_id(data.get("next")) content = data.get("curr", "") if content: - async for chunk in process_content_chunk( - content, - chunk_id, - line_count, - for_target=True - ): - stream["target"].append(chunk) - is_started = True - yield chunk + # Handle special "$fa" / "$" reference + ref_name = extract_ref_name(content) + if ref_name and (ref_name in think_blocks or ref_name in image_blocks): + # Thinking block + if ref_name in think_blocks: + t_text = think_blocks[ref_name] + if t_text: + reasoning = Reasoning(t_text) + # thinking_content += t_text + stream["thinking"].append(reasoning) + # yield reasoning + + # Image-gen block + if ref_name in image_blocks: + img_block_text = image_blocks[ref_name] + async for chunk in process_content_chunk( + img_block_text, + ref_name, + line_count, + for_target=True + ): + stream["target"].append(chunk) + is_started = True + yield chunk + else: + # Normal textual chunk + async for chunk in process_content_chunk( + content, + chunk_id, + line_count, + for_target=True + ): + stream["target"].append(chunk) + is_started = True + yield chunk # Variant stream (comparison) elif variant_stream_id and chunk_id == variant_stream_id: yield PlainTextResponse("[Variant] " + line.decode(errors="ignore")) @@ -651,8 +772,6 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): ... elif chunk_id == left_message_id: ... - elif chunk_id == nudge_new_chat_id: - nudge_new_chat = data # Miscellaneous extra content elif isinstance(data, dict) and "curr" in data: content = data.get("curr", "") @@ -664,24 +783,23 @@ class Yupp(AsyncGeneratorProvider, ProviderModelMixin): for_target=False ): stream["extra"].append(chunk) - if isinstance(chunk,str) and " list[ImageRequest]: + cls, + session: StreamSession, + auth_result: AuthResult, + media: MediaListType, + ) -> List[ImageRequest]: """ Upload an image to the service and get the download URL @@ -143,11 +151,20 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): Returns: An ImageRequest object that contains the download URL, file name, and other data """ - async def upload_file(file, image_name=None): + + async def upload_file(file, image_name=None) -> ImageRequest: debug.log(f"Uploading file: {image_name}") file_data = {} data_bytes = to_bytes(file) + # Check Cache + hasher = hashlib.md5() + hasher.update(data_bytes) + image_hash = hasher.hexdigest() + cache_file = ImagesCache.get(image_hash) + if cls.image_cache and file: + debug.log("Using cached image") + return ImageRequest(cache_file) extension, mime_type = detect_file_type(data_bytes) if "image" in mime_type: # Convert the image to a PIL Image object @@ -181,30 +198,31 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): # Put the image bytes to the upload URL and check the status await asyncio.sleep(1) async with session.put( - file_data["upload_url"], - data=data_bytes, - headers={ - **UPLOAD_HEADERS, - "Content-Type": file_data["mime_type"], - "x-ms-blob-type": "BlockBlob", - "x-ms-version": "2020-04-08", - "Origin": "https://chatgpt.com", - } + file_data["upload_url"], + data=data_bytes, + headers={ + **UPLOAD_HEADERS, + "Content-Type": file_data["mime_type"], + "x-ms-blob-type": "BlockBlob", + "x-ms-version": "2020-04-08", + "Origin": "https://chatgpt.com", + } ) as response: await raise_for_status(response) # Post the file ID to the service and get the download URL async with session.post( - f"{cls.url}/backend-api/files/{file_data['file_id']}/uploaded", - json={}, - headers=auth_result.headers + f"{cls.url}/backend-api/files/{file_data['file_id']}/uploaded", + json={}, + headers=auth_result.headers ) as response: cls._update_request_args(auth_result, session) await raise_for_status(response, "Get download url failed") uploaded_data = await response.json() file_data["download_url"] = uploaded_data["download_url"] + ImagesCache[image_hash] = file_data.copy() return ImageRequest(file_data) - medias = [] + medias: List["ImageRequest"] = [] for item in media: item = item if isinstance(item, tuple) else (item,) __uploaded_media = await upload_file(*item) @@ -242,7 +260,8 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): "id": str(uuid.uuid4()), "author": {"role": message["role"]}, "content": {"content_type": "text", "parts": [to_string(message["content"])]}, - "metadata": {"serialization_metadata": {"custom_symbol_offsets": []}, **({"system_hints": system_hints} if system_hints else {})}, + "metadata": {"serialization_metadata": {"custom_symbol_offsets": []}, + **({"system_hints": system_hints} if system_hints else {})}, "create_time": time.time(), } for message in messages] # Check if there is an image response @@ -256,11 +275,11 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): "size_bytes": image_request.get("file_size"), "width": image_request.get("width"), } - for image_request in image_requests + for image_request in image_requests # Add For Images Only if image_request.get("use_case") == "multimodal" ], - messages[-1]["content"]["parts"][0]] + messages[-1]["content"]["parts"][0]] } # Add the metadata object with the attachments messages[-1]["metadata"] = { @@ -278,12 +297,14 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): else {} ), } - for image_request in image_requests] + for image_request in image_requests] } return messages @classmethod - async def get_generated_image(cls, session: StreamSession, auth_result: AuthResult, element: Union[dict, str], prompt: str = None, conversation_id: str = None) -> ImagePreview|ImageResponse|None: + async def get_generated_image(cls, session: StreamSession, auth_result: AuthResult, element: Union[dict, str], + prompt: str = None, conversation_id: str = None, + status: Optional[str] = None) -> ImagePreview | ImageResponse | None: download_urls = [] is_sediment = False if prompt is None: @@ -292,7 +313,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): except KeyError: pass if "asset_pointer" in element: - element = element["asset_pointer"] + element = element["asset_pointer"] if isinstance(element, str) and element.startswith("file-service://"): element = element.split("file-service://", 1)[-1] elif isinstance(element, str) and element.startswith("sediment://"): @@ -303,7 +324,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if is_sediment: url = f"{cls.url}/backend-api/conversation/{conversation_id}/attachment/{element}/download" else: - url =f"{cls.url}/backend-api/files/{element}/download" + url = f"{cls.url}/backend-api/files/{element}/download" try: async with session.get(url, headers=auth_result.headers) as response: cls._update_request_args(auth_result, session) @@ -319,27 +340,31 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): debug.error("OpenaiChat: Download image failed") debug.error(e) if download_urls: - return ImagePreview(download_urls, prompt, {"headers": auth_result.headers}) if is_sediment else ImageResponse(download_urls, prompt, {"headers": auth_result.headers}) + # status = None, finished_successfully + if is_sediment and status is None: + return ImagePreview(download_urls, prompt, {"status": status, "headers": auth_result.headers}) + else: + return ImageResponse(download_urls, prompt, {"status": status, "headers": auth_result.headers}) @classmethod async def create_authed( - cls, - model: str, - messages: Messages, - auth_result: AuthResult, - proxy: str = None, - timeout: int = 360, - auto_continue: bool = False, - action: Optional[str] = None, - conversation: Conversation = None, - media: MediaListType = None, - return_conversation: bool = True, - web_search: bool = False, - prompt: str = None, - conversation_mode: Optional[dict] = None, - temporary: Optional[bool] = None, - conversation_id: Optional[str] = None, - **kwargs + cls, + model: str, + messages: Messages, + auth_result: AuthResult, + proxy: str = None, + timeout: int = 360, + auto_continue: bool = False, + action: Optional[str] = None, + conversation: Conversation = None, + media: MediaListType = None, + return_conversation: bool = True, + web_search: bool = False, + prompt: str = None, + conversation_mode: Optional[dict] = None, + temporary: Optional[bool] = None, + conversation_id: Optional[str] = None, + **kwargs ) -> AsyncResult: """ Create an asynchronous generator for the conversation. @@ -367,12 +392,12 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if action is None: action = "next" async with StreamSession( - proxy=proxy, - impersonate="chrome", - timeout=timeout + proxy=proxy, + impersonate="chrome", + timeout=timeout ) as session: image_requests = None - media = merge_media(media, messages) + media = merge_media(media, messages) if not cls.needs_auth and not media: if cls._headers is None: cls._create_request_args(cls._cookies) @@ -436,18 +461,19 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if temporary: data["history_and_training_disabled"] = True async with session.post( - prepare_url, - json=data, - headers=cls._headers + prepare_url, + json=data, + headers=cls._headers ) as response: await raise_for_status(response) conduit_token = (await response.json())["conduit_token"] async with session.post( - f"{cls.url}/backend-anon/sentinel/chat-requirements" - if cls._api_key is None else - f"{cls.url}/backend-api/sentinel/chat-requirements", - json={"p": None if not getattr(auth_result, "proof_token", None) else get_requirements_token(getattr(auth_result, "proof_token", None))}, - headers=cls._headers + f"{cls.url}/backend-anon/sentinel/chat-requirements" + if cls._api_key is None else + f"{cls.url}/backend-api/sentinel/chat-requirements", + json={"p": None if not getattr(auth_result, "proof_token", None) else get_requirements_token( + getattr(auth_result, "proof_token", None))}, + headers=cls._headers ) as response: if response.status in (401, 403): raise MissingAuthError(f"Response status: {response.status}") @@ -456,10 +482,10 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): await raise_for_status(response) chat_requirements = await response.json() need_turnstile = chat_requirements.get("turnstile", {}).get("required", False) - need_arkose = chat_requirements.get("arkose", {}).get("required", False) - chat_token = chat_requirements.get("token") + need_arkose = chat_requirements.get("arkose", {}).get("required", False) + chat_token = chat_requirements.get("token") - # if need_arkose and cls.request_config.arkose_token is None: + # if need_arkose and cls.request_config.arkose_token is None: # await get_request_config(proxy) # cls._create_request_args(auth_result.cookies, auth_result.headers) # cls._set_api_key(auth_result.access_token) @@ -476,23 +502,25 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): proof_token=proof_token ) # [debug.log(text) for text in ( - #f"Arkose: {'False' if not need_arkose else auth_result.arkose_token[:12]+'...'}", - #f"Proofofwork: {'False' if proofofwork is None else proofofwork[:12]+'...'}", - #f"AccessToken: {'False' if cls._api_key is None else cls._api_key[:12]+'...'}", + # f"Arkose: {'False' if not need_arkose else auth_result.arkose_token[:12]+'...'}", + # f"Proofofwork: {'False' if proofofwork is None else proofofwork[:12]+'...'}", + # f"AccessToken: {'False' if cls._api_key is None else cls._api_key[:12]+'...'}", # )] data = { "action": "next", "parent_message_id": conversation.message_id, "model": model, - "timezone_offset_min":-120, - "timezone":"Europe/Berlin", - "conversation_mode":{"kind":"primary_assistant"}, - "enable_message_followups":True, + "timezone_offset_min": -120, + "timezone": "Europe/Berlin", + "conversation_mode": {"kind": "primary_assistant"}, + "enable_message_followups": True, "system_hints": ["search"] if web_search else None, - "supports_buffering":True, - "supported_encodings":["v1"], - "client_contextual_info":{"is_dark_mode":False,"time_since_loaded":random.randint(20, 500),"page_height":578,"page_width":1850,"pixel_ratio":1,"screen_height":1080,"screen_width":1920}, - "paragen_cot_summary_display_override":"allow" + "supports_buffering": True, + "supported_encodings": ["v1"], + "client_contextual_info": {"is_dark_mode": False, "time_since_loaded": random.randint(20, 500), + "page_height": 578, "page_width": 1850, "pixel_ratio": 1, + "screen_height": 1080, "screen_width": 1920}, + "paragen_cot_summary_display_override": "allow" } if temporary: data["history_and_training_disabled"] = True @@ -512,7 +540,8 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): new_messages = [] else: new_messages.append(message) - data["messages"] = cls.create_messages(new_messages, image_requests, ["search"] if web_search else None) + data["messages"] = cls.create_messages(new_messages, image_requests, + ["search"] if web_search else None) yield JsonRequest.from_dict(data) headers = { **cls._headers, @@ -521,18 +550,18 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): "openai-sentinel-chat-requirements-token": chat_token, **({} if conduit_token is None else {"x-conduit-token": conduit_token}) } - #if cls.request_config.arkose_token: + # if cls.request_config.arkose_token: # headers["openai-sentinel-arkose-token"] = cls.request_config.arkose_token if proofofwork is not None: headers["openai-sentinel-proof-token"] = proofofwork if need_turnstile and getattr(auth_result, "turnstile_token", None) is not None: headers['openai-sentinel-turnstile-token'] = auth_result.turnstile_token async with session.post( - backend_anon_url - if cls._api_key is None else - backend_url, - json=data, - headers=headers + backend_anon_url + if cls._api_key is None else + backend_url, + json=data, + headers=headers ) as response: cls._update_request_args(auth_result, session) if response.status in (401, 403, 429, 500): @@ -548,10 +577,12 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if match.group(0) in matches: continue matches.append(match.group(0)) - generated_image = await cls.get_generated_image(session, auth_result, match.group(0), prompt) + generated_image = await cls.get_generated_image(session, auth_result, match.group(0), + prompt) if generated_image is not None: yield generated_image - async for chunk in cls.iter_messages_line(session, auth_result, line, conversation, sources, references): + async for chunk in cls.iter_messages_line(session, auth_result, line, conversation, sources, + references): if isinstance(chunk, str): chunk = chunk.replace("\ue203", "").replace("\ue204", "").replace("\ue206", "") buffer += chunk @@ -561,9 +592,9 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): def citation_replacer(match: re.Match[str]): ref_type = match.group(1) ref_index = int(match.group(2)) - if ((ref_type == "image" and is_image_embedding) or - is_video_embedding or - ref_type == "forecast"): + if ((ref_type == "image" and is_image_embedding) or + is_video_embedding or + ref_type == "forecast"): reference = references.get_reference({ "ref_index": ref_index, @@ -571,7 +602,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): }) if not reference: return "" - + if ref_type == "forecast": if reference.get("alt"): return reference.get("alt") @@ -580,11 +611,13 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if is_image_embedding and reference.get("content_url", ""): return f"![{reference.get('title', '')}]({reference.get('content_url')})" - + if is_video_embedding: - if reference.get("url", "") and reference.get("thumbnail_url", ""): + if reference.get("url", "") and reference.get("thumbnail_url", + ""): return f"[![{reference.get('title', '')}]({reference['thumbnail_url']})]({reference['url']})" - video_match = re.match(r"video\n(.*?)\nturn[0-9]+", match.group(0)) + video_match = re.match(r"video\n(.*?)\nturn[0-9]+", + match.group(0)) if video_match: return video_match.group(1) return "" @@ -595,9 +628,9 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): }) if source_index is not None and len(sources.list) > source_index: link = sources.list[source_index]["url"] - return f"[[{source_index+1}]]({link})" + return f"[[{source_index + 1}]]({link})" return f"" - + def products_replacer(match: re.Match[str]): try: products_data = json.loads(match.group(1)) @@ -612,25 +645,30 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): return "" sequence_content = match.group(1) - sequence_content = sequence_content.replace("\ue200", "").replace("\ue202", "\n").replace("\ue201", "") + sequence_content = sequence_content.replace("\ue200", "").replace("\ue202", + "\n").replace( + "\ue201", "") sequence_content = sequence_content.replace("navlist\n", "#### ") - + # Handle search, news, view and image citations is_image_embedding = sequence_content.startswith("i\nturn") is_video_embedding = sequence_content.startswith("video\n") sequence_content = re.sub( - r'(?:cite\nturn[0-9]+|forecast\nturn[0-9]+|video\n.*?\nturn[0-9]+|i?\n?turn[0-9]+)(search|news|view|image|forecast)(\d+)', - citation_replacer, + r'(?:cite\nturn[0-9]+|forecast\nturn[0-9]+|video\n.*?\nturn[0-9]+|i?\n?turn[0-9]+)(search|news|view|image|forecast)(\d+)', + citation_replacer, sequence_content ) - sequence_content = re.sub(r'products\n(.*)', products_replacer, sequence_content) - sequence_content = re.sub(r'product_entity\n\[".*","(.*)"\]', lambda x: x.group(1), sequence_content) + sequence_content = re.sub(r'products\n(.*)', products_replacer, + sequence_content) + sequence_content = re.sub(r'product_entity\n\[".*","(.*)"\]', + lambda x: x.group(1), sequence_content) return sequence_content - + # process only completed sequences and do not touch start of next not completed sequence - buffer = re.sub(r'\ue200(.*?)\ue201', sequence_replacer, buffer, flags=re.DOTALL) - - if buffer.find(u"\ue200") != -1: # still have uncompleted sequence + buffer = re.sub(r'\ue200(.*?)\ue201', sequence_replacer, buffer, + flags=re.DOTALL) + + if buffer.find(u"\ue200") != -1: # still have uncompleted sequence continue else: # do not yield to consume rest part of special sequence @@ -647,7 +685,8 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if sources.list: yield sources if conversation.generated_images: - yield ImageResponse(conversation.generated_images.urls, conversation.prompt, {"headers": auth_result.headers}) + yield ImageResponse(conversation.generated_images.urls, conversation.prompt, + {"headers": auth_result.headers}) conversation.generated_images = None conversation.prompt = None if return_conversation: @@ -667,7 +706,9 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): yield FinishReason(conversation.finish_reason) @classmethod - async def iter_messages_line(cls, session: StreamSession, auth_result: AuthResult, line: bytes, fields: Conversation, sources: OpenAISources, references: ContentReferences) -> AsyncIterator: + async def iter_messages_line(cls, session: StreamSession, auth_result: AuthResult, line: bytes, + fields: Conversation, sources: OpenAISources, + references: ContentReferences) -> AsyncIterator: if not line.startswith(b"data: "): return elif line.startswith(b"data: [DONE]"): @@ -706,9 +747,14 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): elif m.get("p") == "/message/metadata/image_gen_title": fields.prompt = m.get("v") elif m.get("p") == "/message/content/parts/0/asset_pointer": - generated_images = fields.generated_images = await cls.get_generated_image(session, auth_result, m.get("v"), fields.prompt, fields.conversation_id) + status = next(filter(lambda x: x.get("p") == '/message/status', v), {}).get('v', None) + generated_images = fields.generated_images = await cls.get_generated_image(session, auth_result, + m.get("v"), + fields.prompt, + fields.conversation_id, + status) if generated_images is not None: - if buffer: + if buffer: yield buffer yield generated_images elif m.get("p") == "/message/metadata/search_result_groups": @@ -735,41 +781,48 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if match and m.get("o") == "append" and isinstance(m.get("v"), dict): idx = int(match.group(1)) references.merge_reference(idx, m.get("v")) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/fallback_items$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/fallback_items$", + m.get("p")) and isinstance(m.get("v"), list): for link in m.get("v", []) or []: sources.add_source(link) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/items$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/items$", + m.get("p")) and isinstance(m.get("v"), list): for link in m.get("v", []) or []: sources.add_source(link) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/refs$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/refs$", + m.get("p")) and isinstance(m.get("v"), list): match = re.match(r"^/message/metadata/content_references/(\d+)/refs$", m.get("p")) if match: idx = int(match.group(1)) references.update_reference(idx, m.get("o"), "refs", m.get("v")) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/alt$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/alt$", + m.get("p")) and isinstance(m.get("v"), list): match = re.match(r"^/message/metadata/content_references/(\d+)/alt$", m.get("p")) if match: idx = int(match.group(1)) references.update_reference(idx, m.get("o"), "alt", m.get("v")) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/prompt_text$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/prompt_text$", + m.get("p")) and isinstance(m.get("v"), list): match = re.match(r"^/message/metadata/content_references/(\d+)/prompt_text$", m.get("p")) if match: idx = int(match.group(1)) references.update_reference(idx, m.get("o"), "prompt_text", m.get("v")) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/refs/\d+$", m.get("p")) and isinstance(m.get("v"), dict): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/refs/\d+$", + m.get("p")) and isinstance(m.get("v"), dict): match = re.match(r"^/message/metadata/content_references/(\d+)/refs/(\d+)$", m.get("p")) if match: reference_idx = int(match.group(1)) ref_idx = int(match.group(2)) references.update_reference(reference_idx, m.get("o"), "refs", m.get("v"), ref_idx) - elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/images$", m.get("p")) and isinstance(m.get("v"), list): + elif m.get("p") and re.match(r"^/message/metadata/content_references/\d+/images$", + m.get("p")) and isinstance(m.get("v"), list): match = re.match(r"^/message/metadata/content_references/(\d+)/images$", m.get("p")) if match: idx = int(match.group(1)) references.update_reference(idx, m.get("o"), "images", m.get("v")) elif m.get("p") == "/message/metadata/finished_text": fields.is_thinking = False - if buffer: + if buffer: yield buffer yield Reasoning(status=m.get("v")) elif m.get("p") == "/message/metadata" and fields.recipient == "all": @@ -785,10 +838,11 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): fields.recipient = m.get("recipient", fields.recipient) if fields.recipient == "all": c = m.get("content", {}) - if c.get("content_type") == "text" and m.get("author", {}).get("role") == "tool" and "initial_text" in m.get("metadata", {}): + if c.get("content_type") == "text" and m.get("author", {}).get( + "role") == "tool" and "initial_text" in m.get("metadata", {}): fields.is_thinking = True yield Reasoning(status=m.get("metadata", {}).get("initial_text")) - #if c.get("content_type") == "multimodal_text": + # if c.get("content_type") == "multimodal_text": # for part in c.get("parts"): # if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer": # yield await cls.get_generated_image(session, auth_result, part, fields.prompt, fields.conversation_id) @@ -803,13 +857,13 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): @classmethod async def synthesize(cls, params: dict) -> AsyncIterator[bytes]: async with StreamSession( - impersonate="chrome", - timeout=0 + impersonate="chrome", + timeout=0 ) as session: async with session.get( - f"{cls.url}/backend-api/synthesize", - params=params, - headers=cls._headers + f"{cls.url}/backend-api/synthesize", + params=params, + headers=cls._headers ) as response: await raise_for_status(response) async for chunk in response.iter_content(): @@ -817,15 +871,15 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): @classmethod async def login( - cls, - proxy: str = None, - api_key: str = None, - proof_token: str = None, - cookies: Cookies = None, - headers: dict = None, - **kwargs + cls, + proxy: str = None, + api_key: str = None, + proof_token: str = None, + cookies: Cookies = None, + headers: dict = None, + **kwargs ) -> AsyncIterator: - if cls._expires is not None and (cls._expires - 60*10) < time.time(): + if cls._expires is not None and (cls._expires - 60 * 10) < time.time(): cls._headers = cls._api_key = None if cls._headers is None or headers is not None: cls._headers = {} if headers is None else headers @@ -858,6 +912,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): async def nodriver_auth(cls, proxy: str = None): async with get_nodriver_session(proxy=proxy) as browser: page = await browser.get(cls.url) + def on_request(event: nodriver.cdp.network.RequestWillBeSent, page=None): if event.request.url == start_url or event.request.url.startswith(conversation_url): if cls.request_config.headers is None: @@ -866,9 +921,10 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): cls.request_config.headers[key.lower()] = value elif event.request.url in (backend_url, backend_anon_url): if "OpenAI-Sentinel-Proof-Token" in event.request.headers: - cls.request_config.proof_token = json.loads(base64.b64decode( - event.request.headers["OpenAI-Sentinel-Proof-Token"].split("gAAAAAB", 1)[-1].split("~")[0].encode() - ).decode()) + cls.request_config.proof_token = json.loads(base64.b64decode( + event.request.headers["OpenAI-Sentinel-Proof-Token"].split("gAAAAAB", 1)[-1].split("~")[ + 0].encode() + ).decode()) if "OpenAI-Sentinel-Turnstile-Token" in event.request.headers: cls.request_config.turnstile_token = event.request.headers["OpenAI-Sentinel-Turnstile-Token"] if "Authorization" in event.request.headers: @@ -881,6 +937,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): arkBody=event.request.post_data, userAgent=event.request.headers.get("User-Agent") ) + await page.send(nodriver.cdp.network.enable()) page.add_handler(nodriver.cdp.network.RequestWillBeSent, on_request) await page.reload() @@ -912,7 +969,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if cls._api_key is not None or not cls.needs_auth: break await asyncio.sleep(1) - debug.log(f"OpenaiChat: Access token: {'False' if cls._api_key is None else cls._api_key[:12]+'...'}") + debug.log(f"OpenaiChat: Access token: {'False' if cls._api_key is None else cls._api_key[:12] + '...'}") while True: if cls.request_config.proof_token: break @@ -970,11 +1027,14 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): if cls._cookies: cls._headers["cookie"] = format_cookies(cls._cookies) + class Conversation(JsonConversation): """ Class to encapsulate response fields. """ - def __init__(self, conversation_id: str = None, message_id: str = None, user_id: str = None, finish_reason: str = None, parent_message_id: str = None, is_thinking: bool = False): + + def __init__(self, conversation_id: str = None, message_id: str = None, user_id: str = None, + finish_reason: str = None, parent_message_id: str = None, is_thinking: bool = False): self.conversation_id = conversation_id self.message_id = message_id self.finish_reason = finish_reason @@ -987,8 +1047,9 @@ class Conversation(JsonConversation): self.prompt = None self.generated_images: ImagePreview = None + def get_cookies( - urls: Optional[Iterator[str]] = None + urls: Optional[Iterator[str]] = None ) -> Generator[Dict, Dict, Dict[str, str]]: params = {} if urls is not None: @@ -1000,6 +1061,7 @@ def get_cookies( json = yield cmd_dict return {c["name"]: c["value"] for c in json['cookies']} if 'cookies' in json else {} + class OpenAISources(ResponseType): list: List[Dict[str, str]] @@ -1025,7 +1087,7 @@ class OpenAISources(ResponseType): if existing_source and idx is not None: self.list[idx] = source return - + existing_source, idx = self.find_by_url(source["url"]) if existing_source and idx is not None: self.list[idx] = source @@ -1038,53 +1100,54 @@ class OpenAISources(ResponseType): if not self.list: return "" return "\n\n\n\n" + ("\n>\n".join([ - f"> [{idx+1}] {format_link(link['url'], link.get('title', ''))}" + f"> [{idx + 1}] {format_link(link['url'], link.get('title', ''))}" for idx, link in enumerate(self.list) ])) - - def get_ref_info(self, source: Dict[str, str]) -> dict[str, str|int] | None: + + def get_ref_info(self, source: Dict[str, str]) -> dict[str, str | int] | None: ref_index = source.get("ref_id", {}).get("ref_index", None) ref_type = source.get("ref_id", {}).get("ref_type", None) if isinstance(ref_index, int): return { - "ref_index": ref_index, + "ref_index": ref_index, "ref_type": ref_type, } - + for ref_info in source.get('refs') or []: ref_index = ref_info.get("ref_index", None) ref_type = ref_info.get("ref_type", None) if isinstance(ref_index, int): return { - "ref_index": ref_index, + "ref_index": ref_index, "ref_type": ref_type, } - + return None - def find_by_ref_info(self, ref_info: dict[str, str|int]): + def find_by_ref_info(self, ref_info: dict[str, str | int]): for idx, source in enumerate(self.list): source_ref_info = self.get_ref_info(source) - if (source_ref_info and - source_ref_info["ref_index"] == ref_info["ref_index"] and - source_ref_info["ref_type"] == ref_info["ref_type"]): - return source, idx + if (source_ref_info and + source_ref_info["ref_index"] == ref_info["ref_index"] and + source_ref_info["ref_type"] == ref_info["ref_type"]): + return source, idx return None, None - + def find_by_url(self, url: str): for idx, source in enumerate(self.list): if source["url"] == url: return source, idx - return None, None + return None, None - def get_index(self, ref_info: dict[str, str|int]) -> int | None: + def get_index(self, ref_info: dict[str, str | int]) -> int | None: _, index = self.find_by_ref_info(ref_info) if index is not None: - return index + return index return None + class ContentReferences: def __init__(self) -> None: self.list: List[Dict[str, Any]] = [] @@ -1098,16 +1161,16 @@ class ContentReferences: self.list[idx] = {**self.list[idx], **reference_part} - def update_reference(self, idx: int, operation: str, field: str, value: Any, ref_idx = None) -> None: + def update_reference(self, idx: int, operation: str, field: str, value: Any, ref_idx=None) -> None: while len(self.list) <= idx: self.list.append({}) - + if operation == "append" or operation == "add": if not isinstance(self.list[idx].get(field, None), list): self.list[idx][field] = [] if isinstance(value, list): self.list[idx][field].extend(value) - else: + else: self.list[idx][field].append(value) if operation == "replace" and ref_idx is not None: @@ -1123,10 +1186,10 @@ class ContentReferences: self.list[idx][field] = value def get_ref_info( - self, - source: Dict[str, str], - target_ref_info: Dict[str, Union[str, int]] - ) -> dict[str, str|int] | None: + self, + source: Dict[str, str], + target_ref_info: Dict[str, Union[str, int]] + ) -> dict[str, str | int] | None: for idx, ref_info in enumerate(source.get("refs", [])) or []: if not isinstance(ref_info, dict): continue @@ -1134,11 +1197,11 @@ class ContentReferences: ref_index = ref_info.get("ref_index", None) ref_type = ref_info.get("ref_type", None) if isinstance(ref_index, int) and isinstance(ref_type, str): - if (not target_ref_info or - (target_ref_info["ref_index"] == ref_index and - target_ref_info["ref_type"] == ref_type)): + if (not target_ref_info or + (target_ref_info["ref_index"] == ref_index and + target_ref_info["ref_type"] == ref_type)): return { - "ref_index": ref_index, + "ref_index": ref_index, "ref_type": ref_type, "idx": idx } @@ -1149,9 +1212,9 @@ class ContentReferences: for reference in self.list: reference_ref_info = self.get_ref_info(reference, ref_info) - if (not reference_ref_info or - reference_ref_info["ref_index"] != ref_info["ref_index"] or - reference_ref_info["ref_type"] != ref_info["ref_type"]): + if (not reference_ref_info or + reference_ref_info["ref_index"] != ref_info["ref_index"] or + reference_ref_info["ref_type"] != ref_info["ref_type"]): continue if ref_info["ref_type"] != "image": diff --git a/g4f/image/__init__.py b/g4f/image/__init__.py index c09f484e..7c918b34 100644 --- a/g4f/image/__init__.py +++ b/g4f/image/__init__.py @@ -9,6 +9,8 @@ from pathlib import Path from typing import Optional from urllib.parse import urlparse +import requests + try: from PIL import Image, ImageOps has_requirements = True @@ -383,6 +385,13 @@ def to_bytes(image: ImageType) -> bytes: return Path(path).read_bytes() else: raise FileNotFoundError(f"File not found: {path}") + else: + resp = requests.get(image, headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", + }) + if resp.ok and is_accepted_format(resp.content): + return resp.content + raise ValueError("Invalid image url. Expected bytes, str, or PIL Image.") else: raise ValueError("Invalid image format. Expected bytes, str, or PIL Image.") elif isinstance(image, Image.Image): diff --git a/g4f/requests/aiohttp.py b/g4f/requests/aiohttp.py index 644ae026..d3d33271 100644 --- a/g4f/requests/aiohttp.py +++ b/g4f/requests/aiohttp.py @@ -31,17 +31,21 @@ class StreamResponse(ClientResponse): except json.JSONDecodeError: continue -class StreamSession(): +class StreamSession: def __init__( self, - headers: dict = {}, + headers=None, timeout: int = None, connector: BaseConnector = None, proxy: str = None, - proxies: dict = {}, + proxies=None, impersonate = None, **kwargs ): + if proxies is None: + proxies = {} + if headers is None: + headers = {} if impersonate: headers = { **DEFAULT_HEADERS, @@ -49,7 +53,7 @@ class StreamSession(): } connect = None if isinstance(timeout, tuple): - connect, timeout = timeout; + connect, timeout = timeout if timeout is not None: timeout = ClientTimeout(timeout, connect) if proxy is None: