feat: introduce render_messages and enhance HAR/conversation handling

- **g4f/providers/helper.py**
  - Add `render_messages()` to normalise message contents that are lists of blocks.

- **g4f/Provider/Blackbox.py**
  - Import `get_har_files` and `render_messages`.
  - Replace manual walk of `get_cookies_dir()` with `get_har_files()` in `_find_session_in_har`.
  - Simplify session‑parsing loop and exception logging; drop permissions check.
  - Build `current_messages` with `render_messages(messages)` instead of raw list.

- **g4f/Provider/Cloudflare.py**
  - Swap `to_string` import for `render_messages`.
  - Add `"impersonate": "chrome"` to default `_args`.
  - Construct `data["messages"]` with `render_messages(messages)` and inline `"parts"`; remove `to_string()` calls.
  - Move `cache_file` write outside inner `try` to always save arguments.

- **g4f/Provider/Copilot.py**
  - Defer `yield conversation` until after `conversation` is created when `return_conversation` is requested.

- **g4f/Provider/openai/har_file.py**
  - Break out of `os.walk` after first directory in `get_har_files()` to avoid deep traversal.

- **g4f/api/__init__.py**
  - Use `config.conversation` directly and set `return_conversation` when present.

- **g4f/client/__init__.py**
  - Pass `conversation` to both `ChatCompletionChunk.model_construct()` and `ChatCompletion.model_construct()`.

- **g4f/client/stubs.py**
  - Import `field_serializer` (with stub fallback).
  - Add serializers for `conversation` (objects and dicts) and for `content` fields.
  - Extend model constructors to accept/propagate `conversation`.

- **g4f/cookies.py**
  - Insert ".huggingface.co" into `DOMAINS` list.
  - Stop recursive directory walk in `read_cookie_files()` with early `break`.

- **g4f/gui/client/background.html**
  - Reorder error‑handling branches; reset `errorImage` in `onload`.
  - Revise `skipRefresh` logic and random image URL building.

- **g4f/gui/server/backend_api.py**
  - Add `self.match_files` cache for repeated image searches.
  - Use `safe_search` for sanitised term matching and `min` comparison.
  - Limit walk to one directory level; support deterministic random selection via `random` query param.

- **Miscellaneous**
  - Update imports where `render_messages` replaces `to_string`.
  - Ensure all modified providers iterate messages through `render_messages` for consistent formatting.
This commit is contained in:
hlohaus 2025-04-17 07:14:34 +02:00
parent 819f4d85ae
commit 3ab36ebc64
11 changed files with 125 additions and 86 deletions

View file

@ -14,9 +14,10 @@ from datetime import datetime, timedelta
from ..typing import AsyncResult, Messages, MediaListType
from ..requests.raise_for_status import raise_for_status
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
from .openai.har_file import get_har_files
from ..image import to_data_uri
from ..cookies import get_cookies_dir
from .helper import format_image_prompt
from .helper import format_image_prompt, render_messages
from ..providers.response import JsonConversation, ImageResponse
from ..tools.media import merge_media
from ..errors import RateLimitError
@ -428,53 +429,46 @@ class Blackbox(AsyncGeneratorProvider, ProviderModelMixin):
Optional[dict]: Session data if found, None otherwise
"""
try:
har_dir = get_cookies_dir()
if not os.access(har_dir, os.R_OK):
return None
for root, _, files in os.walk(har_dir):
for file in files:
if file.endswith(".har"):
try:
with open(os.path.join(root, file), 'rb') as f:
har_data = json.load(f)
for entry in har_data['log']['entries']:
# Only look at blackbox API responses
if 'blackbox.ai/api' in entry['request']['url']:
# Look for a response that has the right structure
if 'response' in entry and 'content' in entry['response']:
content = entry['response']['content']
# Look for both regular and Google auth session formats
if ('text' in content and
isinstance(content['text'], str) and
'"user"' in content['text'] and
'"email"' in content['text'] and
'"expires"' in content['text']):
try:
# Remove any HTML or other non-JSON content
text = content['text'].strip()
if text.startswith('{') and text.endswith('}'):
# Replace escaped quotes
text = text.replace('\\"', '"')
har_session = json.loads(text)
# Check if this is a valid session object
if (isinstance(har_session, dict) and
'user' in har_session and
'email' in har_session['user'] and
'expires' in har_session):
debug.log(f"Blackbox: Found session in HAR file: {file}")
return har_session
except json.JSONDecodeError as e:
# Only print error for entries that truly look like session data
if ('"user"' in content['text'] and
'"email"' in content['text']):
debug.log(f"Blackbox: Error parsing likely session data: {e}")
except Exception as e:
debug.log(f"Blackbox: Error reading HAR file {file}: {e}")
for file in get_har_files():
try:
with open(file, 'rb') as f:
har_data = json.load(f)
for entry in har_data['log']['entries']:
# Only look at blackbox API responses
if 'blackbox.ai/api' in entry['request']['url']:
# Look for a response that has the right structure
if 'response' in entry and 'content' in entry['response']:
content = entry['response']['content']
# Look for both regular and Google auth session formats
if ('text' in content and
isinstance(content['text'], str) and
'"user"' in content['text'] and
'"email"' in content['text'] and
'"expires"' in content['text']):
try:
# Remove any HTML or other non-JSON content
text = content['text'].strip()
if text.startswith('{') and text.endswith('}'):
# Replace escaped quotes
text = text.replace('\\"', '"')
har_session = json.loads(text)
# Check if this is a valid session object
if (isinstance(har_session, dict) and
'user' in har_session and
'email' in har_session['user'] and
'expires' in har_session):
debug.log(f"Blackbox: Found session in HAR file: {file}")
return har_session
except json.JSONDecodeError as e:
# Only print error for entries that truly look like session data
if ('"user"' in content['text'] and
'"email"' in content['text']):
debug.log(f"Blackbox: Error parsing likely session data: {e}")
except Exception as e:
debug.log(f"Blackbox: Error reading HAR file {file}: {e}")
return None
except Exception as e:
debug.log(f"Blackbox: Error searching HAR files: {e}")
@ -573,7 +567,7 @@ class Blackbox(AsyncGeneratorProvider, ProviderModelMixin):
conversation.message_history = []
current_messages = []
for i, msg in enumerate(messages):
for i, msg in enumerate(render_messages(messages)):
msg_id = conversation.chat_id if i == 0 and msg["role"] == "user" else cls.generate_id()
current_msg = {
"id": msg_id,

View file

@ -9,7 +9,7 @@ from ..requests import Session, StreamSession, get_args_from_nodriver, raise_for
from ..requests import DEFAULT_HEADERS, has_nodriver, has_curl_cffi
from ..providers.response import FinishReason, Usage
from ..errors import ResponseStatusError, ModelNotFoundError
from .helper import to_string
from .helper import render_messages
class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin, AuthFileMixin):
label = "Cloudflare AI"
@ -82,7 +82,7 @@ class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin, AuthFileMixin):
elif has_nodriver:
cls._args = await get_args_from_nodriver(cls.url, proxy, timeout, cookies)
else:
cls._args = {"headers": DEFAULT_HEADERS, "cookies": {}}
cls._args = {"headers": DEFAULT_HEADERS, "cookies": {}, "impersonate": "chrome"}
try:
model = cls.get_model(model)
except ModelNotFoundError:
@ -90,8 +90,7 @@ class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin, AuthFileMixin):
data = {
"messages": [{
**message,
"content": to_string(message["content"]),
"parts": [{"type":"text", "text": to_string(message["content"])}]} for message in messages],
"parts": [{"type":"text", "text": message["content"]}]} for message in render_messages(messages)],
"lora": None,
"model": model,
"max_tokens": max_tokens,
@ -120,5 +119,5 @@ class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin, AuthFileMixin):
yield Usage(**finish.get("usage"))
yield FinishReason(finish.get("finishReason"))
with cache_file.open("w") as f:
json.dump(cls._args, f)
with cache_file.open("w") as f:
json.dump(cls._args, f)

View file

@ -116,8 +116,6 @@ class Copilot(AsyncGeneratorProvider, ProviderModelMixin):
response.raise_for_status()
conversation_id = response.json().get("id")
conversation = Conversation(conversation_id)
if return_conversation:
yield conversation
if prompt is None:
prompt = format_prompt_max_length(messages, 10000)
debug.log(f"Copilot: Created conversation: {conversation_id}")
@ -126,6 +124,8 @@ class Copilot(AsyncGeneratorProvider, ProviderModelMixin):
if prompt is None:
prompt = get_last_user_message(messages)
debug.log(f"Copilot: Use conversation: {conversation_id}")
if return_conversation:
yield conversation
uploaded_images = []
for media, _ in merge_media(media, messages):

View file

@ -49,6 +49,7 @@ def get_har_files():
for file in files:
if file.endswith(".har"):
harPath.append(os.path.join(root, file))
break
if not harPath:
raise NoValidHarFileError("No .har file found")
harPath.sort(key=lambda x: os.path.getmtime(x))

View file

@ -309,9 +309,9 @@ class Api:
if credentials is not None and credentials.credentials != "secret":
config.api_key = credentials.credentials
conversation = None
conversation = config.conversation
return_conversation = config.return_conversation
if conversation is not None:
if conversation:
conversation = JsonConversation(**conversation)
return_conversation = True
elif config.conversation_id is not None and config.provider is not None:

View file

@ -217,7 +217,7 @@ async def async_iter_response(
if stream:
chat_completion = ChatCompletionChunk.model_construct(
None, finish_reason, completion_id, int(time.time()), usage=usage
None, finish_reason, completion_id, int(time.time()), usage=usage, conversation=conversation
)
else:
if response_format is not None and "type" in response_format:
@ -228,7 +228,7 @@ async def async_iter_response(
**filter_none(
tool_calls=[ToolCallModel.model_construct(**tool_call) for tool_call in tool_calls]
) if tool_calls is not None else {},
conversation=None if conversation is None else conversation.get_dict()
conversation=conversation
)
if provider is not None:
chat_completion.provider = provider.name

View file

@ -10,7 +10,7 @@ from ..client.helper import filter_markdown
from .helper import filter_none
try:
from pydantic import BaseModel
from pydantic import BaseModel, field_serializer
except ImportError:
class BaseModel():
@classmethod
@ -19,6 +19,9 @@ except ImportError:
for key, value in data.items():
setattr(new, key, value)
return new
class field_serializer():
def __init__(self, field_name):
self.field_name = field_name
class BaseModel(BaseModel):
@classmethod
@ -72,6 +75,7 @@ class ChatCompletionChunk(BaseModel):
provider: Optional[str]
choices: List[ChatCompletionDeltaChoice]
usage: UsageModel
conversation: dict
@classmethod
def model_construct(
@ -80,7 +84,8 @@ class ChatCompletionChunk(BaseModel):
finish_reason: str,
completion_id: str = None,
created: int = None,
usage: UsageModel = None
usage: UsageModel = None,
conversation: dict = None
):
return super().model_construct(
id=f"chatcmpl-{completion_id}" if completion_id else None,
@ -92,9 +97,15 @@ class ChatCompletionChunk(BaseModel):
ChatCompletionDelta.model_construct(content),
finish_reason
)],
**filter_none(usage=usage)
**filter_none(usage=usage, conversation=conversation)
)
@field_serializer('conversation')
def serialize_conversation(self, conversation: dict):
if hasattr(conversation, "get_dict"):
return conversation.get_dict()
return conversation
class ChatCompletionMessage(BaseModel):
role: str
content: str
@ -104,6 +115,10 @@ class ChatCompletionMessage(BaseModel):
def model_construct(cls, content: str, tool_calls: list = None):
return super().model_construct(role="assistant", content=content, **filter_none(tool_calls=tool_calls))
@field_serializer('content')
def serialize_content(self, content: str):
return str(content)
def save(self, filepath: str, allowd_types = None):
if hasattr(self.content, "data"):
os.rename(self.content.data.replace("/media", images_dir), filepath)
@ -160,6 +175,12 @@ class ChatCompletion(BaseModel):
**filter_none(usage=usage, conversation=conversation)
)
@field_serializer('conversation')
def serialize_conversation(self, conversation: dict):
if hasattr(conversation, "get_dict"):
return conversation.get_dict()
return conversation
class ChatCompletionDelta(BaseModel):
role: str
content: str
@ -168,6 +189,10 @@ class ChatCompletionDelta(BaseModel):
def model_construct(cls, content: Optional[str]):
return super().model_construct(role="assistant", content=content)
@field_serializer('content')
def serialize_content(self, content: str):
return str(content)
class ChatCompletionDeltaChoice(BaseModel):
index: int
delta: ChatCompletionDelta

View file

@ -56,12 +56,11 @@ DOMAINS = [
".google.com",
"www.whiterabbitneo.com",
"huggingface.co",
".huggingface.co"
"chat.reka.ai",
"chatgpt.com",
".cerebras.ai",
"github.com",
"huggingface.co",
".huggingface.co"
]
if has_browser_cookie3 and os.environ.get('DBUS_SESSION_BUS_ADDRESS') == "/dev/null":
@ -152,6 +151,7 @@ def read_cookie_files(dirPath: str = None):
harFiles.append(os.path.join(root, file))
elif file.endswith(".json"):
cookieFiles.append(os.path.join(root, file))
break
CookiesConfig.cookies = {}
for path in harFiles:

View file

@ -169,15 +169,15 @@
if (errorVideo < 3 || !refreshOnHide) {
return;
}
if (skipRefresh > 0) {
skipRefresh -= 1;
return;
}
if (errorImage < 3) {
imageFeed.src = "/search/image+g4f?skip=" + skipImage;
skipImage++;
return;
}
if (skipRefresh > 0) {
skipRefresh -= 1;
return;
}
if (images.length > 0) {
imageFeed.classList.remove("hidden");
imageFeed.src = images.shift();
@ -194,10 +194,13 @@
imageFeed.onload = () => {
imageFeed.classList.remove("hidden");
gradient.classList.add("hidden");
errorImage = 0;
};
imageFeed.onclick = () => {
imageFeed.src = "/search/image?random=" + Math.random();
skipRefresh = 2;
if (skipRefresh < 4) {
skipRefresh += 1;
}
};
})();
</script>

View file

@ -341,28 +341,35 @@ class Backend_Api(Api):
return redirect(source_url)
raise
self.match_files = {}
@app.route('/search/<search>', methods=['GET'])
def find_media(search: str):
search = [secure_filename(chunk.lower()) for chunk in search.split("+")]
safe_search = [secure_filename(chunk.lower()) for chunk in search.split("+")]
if not os.access(images_dir, os.R_OK):
return jsonify({"error": {"message": "Not found"}}), 404
match_files = {}
for root, _, files in os.walk(images_dir):
for file in files:
mime_type = is_allowed_extension(file)
if mime_type is not None:
mime_type = secure_filename(mime_type)
for tag in search:
if tag in mime_type:
match_files[file] = match_files.get(file, 0) + 1
break
for tag in search:
if tag in file.lower():
match_files[file] = match_files.get(file, 0) + 1
match_files = [file for file, count in match_files.items() if count >= request.args.get("min", len(search))]
if search not in self.match_files:
self.match_files[search] = {}
for root, _, files in os.walk(images_dir):
for file in files:
mime_type = is_allowed_extension(file)
if mime_type is not None:
mime_type = secure_filename(mime_type)
for tag in safe_search:
if tag in mime_type:
self.match_files[search][file] = self.match_files[search].get(file, 0) + 1
break
for tag in safe_search:
if tag in file.lower():
self.match_files[search][file] = self.match_files[search].get(file, 0) + 1
break
match_files = [file for file, count in self.match_files[search].items() if count >= request.args.get("min", len(safe_search))]
if int(request.args.get("skip", 0)) >= len(match_files):
return jsonify({"error": {"message": "Not found"}}), 404
if (request.args.get("random", False)):
seed = request.args.get("random")
if seed not in ["true", "True", "1"]:
random.seed(seed)
return redirect(f"/media/{random.choice(match_files)}"), 302
return redirect(f"/media/{match_files[int(request.args.get('skip', 0))]}", 302)

View file

@ -24,6 +24,16 @@ def to_string(value) -> str:
return "".join([to_string(v) for v in value if v.get("type", "text") == "text"])
return str(value)
def render_messages(messages: Messages) -> Iterator:
for idx, message in enumerate(messages):
if isinstance(message, dict) and isinstance(message.get("content"), list):
yield {
**message,
"content": to_string(message["content"]),
}
else:
yield message
def format_prompt(messages: Messages, add_special_tokens: bool = False, do_continue: bool = False, include_system: bool = True) -> str:
"""
Format a series of messages into a single string, optionally adding special tokens.