feat: introduce render_messages and enhance HAR/conversation handling

- **g4f/providers/helper.py** - Add `render_messages()` to normalise message contents that are lists of blocks. - **g4f/Provider/Blackbox.py** - Import `get_har_files` and `render_messages`. - Replace manual walk of `get_cookies_dir()` with `get_har_files()` in `_find_session_in_har`. - Simplify session‑parsing loop and exception logging; drop permissions check. - Build `current_messages` with `render_messages(messages)` instead of raw list. - **g4f/Provider/Cloudflare.py** - Swap `to_string` import for `render_messages`. - Add `"impersonate": "chrome"` to default `_args`. - Construct `data["messages"]` with `render_messages(messages)` and inline `"parts"`; remove `to_string()` calls. - Move `cache_file` write outside inner `try` to always save arguments. - **g4f/Provider/Copilot.py** - Defer `yield conversation` until after `conversation` is created when `return_conversation` is requested. - **g4f/Provider/openai/har_file.py** - Break out of `os.walk` after first directory in `get_har_files()` to avoid deep traversal. - **g4f/api/__init__.py** - Use `config.conversation` directly and set `return_conversation` when present. - **g4f/client/__init__.py** - Pass `conversation` to both `ChatCompletionChunk.model_construct()` and `ChatCompletion.model_construct()`. - **g4f/client/stubs.py** - Import `field_serializer` (with stub fallback). - Add serializers for `conversation` (objects and dicts) and for `content` fields. - Extend model constructors to accept/propagate `conversation`. - **g4f/cookies.py** - Insert ".huggingface.co" into `DOMAINS` list. - Stop recursive directory walk in `read_cookie_files()` with early `break`. - **g4f/gui/client/background.html** - Reorder error‑handling branches; reset `errorImage` in `onload`. - Revise `skipRefresh` logic and random image URL building. - **g4f/gui/server/backend_api.py** - Add `self.match_files` cache for repeated image searches. - Use `safe_search` for sanitised term matching and `min` comparison. - Limit walk to one directory level; support deterministic random selection via `random` query param. - **Miscellaneous** - Update imports where `render_messages` replaces `to_string`. - Ensure all modified providers iterate messages through `render_messages` for consistent formatting.
2025-12-06 02:30:41 -08:00 · 2025-04-17 07:14:34 +02:00 · 2025-04-17 07:14:34 +02:00 · 3ab36ebc64
commit 3ab36ebc64
parent 819f4d85ae
11 changed files with 125 additions and 86 deletions
--- a/g4f/Provider/Blackbox.py
+++ b/g4f/Provider/Blackbox.py
@ -14,9 +14,10 @@ from datetime import datetime, timedelta
 from ..typing import AsyncResult, Messages, MediaListType
 from ..requests.raise_for_status import raise_for_status
 from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
+from .openai.har_file import get_har_files
 from ..image import to_data_uri
 from ..cookies import get_cookies_dir
-from .helper import format_image_prompt
+from .helper import format_image_prompt, render_messages
 from ..providers.response import JsonConversation, ImageResponse
 from ..tools.media import merge_media
 from ..errors import RateLimitError
@ -428,53 +429,46 @@ class Blackbox(AsyncGeneratorProvider, ProviderModelMixin):
            Optional[dict]: Session data if found, None otherwise
        """
        try:
-            har_dir = get_cookies_dir()
-            if not os.access(har_dir, os.R_OK):
-                return None
-                
-            for root, _, files in os.walk(har_dir):
-                for file in files:
-                    if file.endswith(".har"):
-                        try:
-                            with open(os.path.join(root, file), 'rb') as f:
-                                har_data = json.load(f)
-                                
-                            for entry in har_data['log']['entries']:
-                                # Only look at blackbox API responses
-                                if 'blackbox.ai/api' in entry['request']['url']:
-                                    # Look for a response that has the right structure
-                                    if 'response' in entry and 'content' in entry['response']:
-                                        content = entry['response']['content']
-                                        # Look for both regular and Google auth session formats
-                                        if ('text' in content and 
-                                            isinstance(content['text'], str) and 
-                                            '"user"' in content['text'] and 
-                                            '"email"' in content['text'] and
-                                            '"expires"' in content['text']):
-                                            
-                                            try:
-                                                # Remove any HTML or other non-JSON content
-                                                text = content['text'].strip()
-                                                if text.startswith('{') and text.endswith('}'):
-                                                    # Replace escaped quotes
-                                                    text = text.replace('\\"', '"')
-                                                    har_session = json.loads(text)
-                                                    
-                                                    # Check if this is a valid session object
-                                                    if (isinstance(har_session, dict) and 
-                                                        'user' in har_session and 
-                                                        'email' in har_session['user'] and
-                                                        'expires' in har_session):
-                                                        
-                                                        debug.log(f"Blackbox: Found session in HAR file: {file}")
-                                                        return har_session
-                                            except json.JSONDecodeError as e:
-                                                # Only print error for entries that truly look like session data
-                                                if ('"user"' in content['text'] and 
-                                                    '"email"' in content['text']):
-                                                    debug.log(f"Blackbox: Error parsing likely session data: {e}")
-                        except Exception as e:
-                            debug.log(f"Blackbox: Error reading HAR file {file}: {e}")
+            for file in get_har_files():
+                try:
+                    with open(file, 'rb') as f:
+                        har_data = json.load(f)
+
+                    for entry in har_data['log']['entries']:
+                        # Only look at blackbox API responses
+                        if 'blackbox.ai/api' in entry['request']['url']:
+                            # Look for a response that has the right structure
+                            if 'response' in entry and 'content' in entry['response']:
+                                content = entry['response']['content']
+                                # Look for both regular and Google auth session formats
+                                if ('text' in content and 
+                                    isinstance(content['text'], str) and 
+                                    '"user"' in content['text'] and 
+                                    '"email"' in content['text'] and
+                                    '"expires"' in content['text']):
+                                    try:
+                                        # Remove any HTML or other non-JSON content
+                                        text = content['text'].strip()
+                                        if text.startswith('{') and text.endswith('}'):
+                                            # Replace escaped quotes
+                                            text = text.replace('\\"', '"')
+                                            har_session = json.loads(text)
+
+                                            # Check if this is a valid session object
+                                            if (isinstance(har_session, dict) and 
+                                                'user' in har_session and 
+                                                'email' in har_session['user'] and
+                                                'expires' in har_session):
+
+                                                debug.log(f"Blackbox: Found session in HAR file: {file}")
+                                                return har_session
+                                    except json.JSONDecodeError as e:
+                                        # Only print error for entries that truly look like session data
+                                        if ('"user"' in content['text'] and 
+                                            '"email"' in content['text']):
+                                            debug.log(f"Blackbox: Error parsing likely session data: {e}")
+                except Exception as e:
+                    debug.log(f"Blackbox: Error reading HAR file {file}: {e}")
            return None
        except Exception as e:
            debug.log(f"Blackbox: Error searching HAR files: {e}")
@ -573,7 +567,7 @@ class Blackbox(AsyncGeneratorProvider, ProviderModelMixin):
                conversation.message_history = []

            current_messages = []
-            for i, msg in enumerate(messages):
+            for i, msg in enumerate(render_messages(messages)):
                msg_id = conversation.chat_id if i == 0 and msg["role"] == "user" else cls.generate_id()
                current_msg = {
                    "id": msg_id,
--- a/g4f/Provider/Cloudflare.py
+++ b/g4f/Provider/Cloudflare.py
@ -9,7 +9,7 @@ from ..requests import Session, StreamSession, get_args_from_nodriver, raise_for
 from ..requests import DEFAULT_HEADERS, has_nodriver, has_curl_cffi
 from ..providers.response import FinishReason, Usage
 from ..errors import ResponseStatusError, ModelNotFoundError
-from .helper import to_string
+from .helper import render_messages

 class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin, AuthFileMixin):
    label = "Cloudflare AI"
@ -82,7 +82,7 @@ class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin, AuthFileMixin):
            elif has_nodriver:
                cls._args = await get_args_from_nodriver(cls.url, proxy, timeout, cookies)
            else:
-                cls._args = {"headers": DEFAULT_HEADERS, "cookies": {}}
+                cls._args = {"headers": DEFAULT_HEADERS, "cookies": {}, "impersonate": "chrome"}
        try:
            model = cls.get_model(model)
        except ModelNotFoundError:
@ -90,8 +90,7 @@ class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin, AuthFileMixin):
        data = {
            "messages": [{
                **message,
-                "content": to_string(message["content"]),
-                "parts": [{"type":"text", "text": to_string(message["content"])}]} for message in messages],
+                "parts": [{"type":"text", "text": message["content"]}]} for message in render_messages(messages)],
            "lora": None,
            "model": model,
            "max_tokens": max_tokens,
@ -120,5 +119,5 @@ class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin, AuthFileMixin):
                        yield Usage(**finish.get("usage"))
                        yield FinishReason(finish.get("finishReason"))

-                with cache_file.open("w") as f:
-                    json.dump(cls._args, f)
+        with cache_file.open("w") as f:
+            json.dump(cls._args, f)
--- a/g4f/Provider/Copilot.py
+++ b/g4f/Provider/Copilot.py
@ -116,8 +116,6 @@ class Copilot(AsyncGeneratorProvider, ProviderModelMixin):
                response.raise_for_status()
                conversation_id = response.json().get("id")
                conversation = Conversation(conversation_id)
-                if return_conversation:
-                    yield conversation
                if prompt is None:
                    prompt = format_prompt_max_length(messages, 10000)
                debug.log(f"Copilot: Created conversation: {conversation_id}")
@ -126,6 +124,8 @@ class Copilot(AsyncGeneratorProvider, ProviderModelMixin):
                if prompt is None:
                    prompt = get_last_user_message(messages)
                debug.log(f"Copilot: Use conversation: {conversation_id}")
+            if return_conversation:
+                yield conversation

            uploaded_images = []
            for media, _ in merge_media(media, messages):
--- a/g4f/Provider/openai/har_file.py
+++ b/g4f/Provider/openai/har_file.py
@ -49,6 +49,7 @@ def get_har_files():
        for file in files:
            if file.endswith(".har"):
                harPath.append(os.path.join(root, file))
+        break
    if not harPath:
        raise NoValidHarFileError("No .har file found")
    harPath.sort(key=lambda x: os.path.getmtime(x))
--- a/g4f/api/init.py
+++ b/g4f/api/init.py
@ -309,9 +309,9 @@ class Api:
                if credentials is not None and credentials.credentials != "secret":
                    config.api_key = credentials.credentials

-                conversation = None
+                conversation = config.conversation
                return_conversation = config.return_conversation
-                if conversation is not None:
+                if conversation:
                    conversation = JsonConversation(**conversation)
                    return_conversation = True
                elif config.conversation_id is not None and config.provider is not None:
--- a/g4f/client/init.py
+++ b/g4f/client/init.py
@ -217,7 +217,7 @@ async def async_iter_response(

        if stream:
            chat_completion = ChatCompletionChunk.model_construct(
-                None, finish_reason, completion_id, int(time.time()), usage=usage
+                None, finish_reason, completion_id, int(time.time()), usage=usage, conversation=conversation
            )
        else:
            if response_format is not None and "type" in response_format:
@ -228,7 +228,7 @@ async def async_iter_response(
                **filter_none(
                    tool_calls=[ToolCallModel.model_construct(**tool_call) for tool_call in tool_calls]
                ) if tool_calls is not None else {},
-                conversation=None if conversation is None else conversation.get_dict()
+                conversation=conversation
            )
        if provider is not None:
            chat_completion.provider = provider.name
--- a/g4f/client/stubs.py
+++ b/g4f/client/stubs.py
@ -10,7 +10,7 @@ from ..client.helper import filter_markdown
 from .helper import filter_none

 try:
-    from pydantic import BaseModel
+    from pydantic import BaseModel, field_serializer
 except ImportError:
    class BaseModel():
        @classmethod
@ -19,6 +19,9 @@ except ImportError:
            for key, value in data.items():
                setattr(new, key, value)
            return new
+    class field_serializer():
+        def __init__(self, field_name):
+            self.field_name = field_name

 class BaseModel(BaseModel):
    @classmethod
@ -72,6 +75,7 @@ class ChatCompletionChunk(BaseModel):
    provider: Optional[str]
    choices: List[ChatCompletionDeltaChoice]
    usage: UsageModel
+    conversation: dict

    @classmethod
    def model_construct(
@ -80,7 +84,8 @@ class ChatCompletionChunk(BaseModel):
        finish_reason: str,
        completion_id: str = None,
        created: int = None,
-        usage: UsageModel = None
+        usage: UsageModel = None,
+        conversation: dict = None
    ):
        return super().model_construct(
            id=f"chatcmpl-{completion_id}" if completion_id else None,
@ -92,9 +97,15 @@ class ChatCompletionChunk(BaseModel):
                ChatCompletionDelta.model_construct(content),
                finish_reason
            )],
-            **filter_none(usage=usage)
+            **filter_none(usage=usage, conversation=conversation)
        )

+    @field_serializer('conversation')
+    def serialize_conversation(self, conversation: dict):
+        if hasattr(conversation, "get_dict"):
+            return conversation.get_dict()
+        return conversation
+
 class ChatCompletionMessage(BaseModel):
    role: str
    content: str
@ -104,6 +115,10 @@ class ChatCompletionMessage(BaseModel):
    def model_construct(cls, content: str, tool_calls: list = None):
        return super().model_construct(role="assistant", content=content, **filter_none(tool_calls=tool_calls))

+    @field_serializer('content')
+    def serialize_content(self, content: str):
+        return str(content)
+
    def save(self, filepath: str, allowd_types = None):
        if hasattr(self.content, "data"):
            os.rename(self.content.data.replace("/media", images_dir), filepath)
@ -160,6 +175,12 @@ class ChatCompletion(BaseModel):
            **filter_none(usage=usage, conversation=conversation)
        )

+    @field_serializer('conversation')
+    def serialize_conversation(self, conversation: dict):
+        if hasattr(conversation, "get_dict"):
+            return conversation.get_dict()
+        return conversation
+
 class ChatCompletionDelta(BaseModel):
    role: str
    content: str
@ -168,6 +189,10 @@ class ChatCompletionDelta(BaseModel):
    def model_construct(cls, content: Optional[str]):
        return super().model_construct(role="assistant", content=content)

+    @field_serializer('content')
+    def serialize_content(self, content: str):
+        return str(content)
+
 class ChatCompletionDeltaChoice(BaseModel):
    index: int
    delta: ChatCompletionDelta
--- a/g4f/cookies.py
+++ b/g4f/cookies.py
@ -56,12 +56,11 @@ DOMAINS = [
    ".google.com",
    "www.whiterabbitneo.com",
    "huggingface.co",
+    ".huggingface.co"
    "chat.reka.ai",
    "chatgpt.com",
    ".cerebras.ai",
    "github.com",
-    "huggingface.co",
-    ".huggingface.co"
 ]

 if has_browser_cookie3 and os.environ.get('DBUS_SESSION_BUS_ADDRESS') == "/dev/null":
@ -152,6 +151,7 @@ def read_cookie_files(dirPath: str = None):
                harFiles.append(os.path.join(root, file))
            elif file.endswith(".json"):
                cookieFiles.append(os.path.join(root, file))
+        break

    CookiesConfig.cookies = {}
    for path in harFiles:
--- a/g4f/gui/client/background.html
+++ b/g4f/gui/client/background.html
@ -169,15 +169,15 @@
                if (errorVideo < 3 || !refreshOnHide) {
                    return;
                }
+                if (skipRefresh > 0) {
+                    skipRefresh -= 1;
+                    return;
+                }
                if (errorImage < 3) {
                    imageFeed.src = "/search/image+g4f?skip=" + skipImage;
                    skipImage++;
                    return;
                }
-                if (skipRefresh > 0) {
-                    skipRefresh -= 1;
-                    return;
-                }
                if (images.length > 0) {
                    imageFeed.classList.remove("hidden");
                    imageFeed.src = images.shift();
@ -194,10 +194,13 @@
            imageFeed.onload = () => {
                imageFeed.classList.remove("hidden");
                gradient.classList.add("hidden");
+                errorImage = 0;
            };
            imageFeed.onclick = () => {
                imageFeed.src = "/search/image?random=" + Math.random();
-                skipRefresh = 2;
+                if (skipRefresh < 4) {
+                    skipRefresh += 1;
+                }
            };
        })();
    </script>
--- a/g4f/gui/server/backend_api.py
+++ b/g4f/gui/server/backend_api.py
@ -341,28 +341,35 @@ class Backend_Api(Api):
                    return redirect(source_url)
                raise

+        self.match_files = {}
+
        @app.route('/search/<search>', methods=['GET'])
        def find_media(search: str):
-            search = [secure_filename(chunk.lower()) for chunk in search.split("+")]
+            safe_search = [secure_filename(chunk.lower()) for chunk in search.split("+")]
            if not os.access(images_dir, os.R_OK):
                return jsonify({"error": {"message": "Not found"}}), 404
-            match_files = {}
-            for root, _, files in os.walk(images_dir):
-                for file in files:
-                    mime_type = is_allowed_extension(file)
-                    if mime_type is not None:
-                        mime_type = secure_filename(mime_type)
-                        for tag in search:
-                            if tag in mime_type:
-                                match_files[file] = match_files.get(file, 0) + 1
-                                break
-                    for tag in search:
-                        if tag in file.lower():
-                            match_files[file] = match_files.get(file, 0) + 1
-            match_files = [file for file, count in match_files.items() if count >= request.args.get("min", len(search))]
+            if search not in self.match_files:
+                self.match_files[search] = {}
+                for root, _, files in os.walk(images_dir):
+                    for file in files:
+                        mime_type = is_allowed_extension(file)
+                        if mime_type is not None:
+                            mime_type = secure_filename(mime_type)
+                            for tag in safe_search:
+                                if tag in mime_type:
+                                    self.match_files[search][file] = self.match_files[search].get(file, 0) + 1
+                                    break
+                        for tag in safe_search:
+                            if tag in file.lower():
+                                self.match_files[search][file] = self.match_files[search].get(file, 0) + 1
+                    break
+            match_files = [file for file, count in self.match_files[search].items() if count >= request.args.get("min", len(safe_search))]
            if int(request.args.get("skip", 0)) >= len(match_files):
                return jsonify({"error": {"message": "Not found"}}), 404
            if (request.args.get("random", False)):
+                seed = request.args.get("random")
+                if seed not in ["true", "True", "1"]:
+                   random.seed(seed)
                return redirect(f"/media/{random.choice(match_files)}"), 302
            return redirect(f"/media/{match_files[int(request.args.get('skip', 0))]}", 302)

--- a/g4f/providers/helper.py
+++ b/g4f/providers/helper.py
@ -24,6 +24,16 @@ def to_string(value) -> str:
        return "".join([to_string(v) for v in value if v.get("type", "text") == "text"])
    return str(value)

+def render_messages(messages: Messages) -> Iterator:
+    for idx, message in enumerate(messages):
+        if isinstance(message, dict) and isinstance(message.get("content"), list):
+            yield {
+                **message,
+                "content": to_string(message["content"]),
+            }
+        else:
+            yield message
+
 def format_prompt(messages: Messages, add_special_tokens: bool = False, do_continue: bool = False, include_system: bool = True) -> str:
    """
    Format a series of messages into a single string, optionally adding special tokens.