Refactor MarkItDown and OpenaiChat classes for improved media handling and optional parameters; enhance is_data_an_media function to support binary/octet-stream return type for unsupported URLs.

2025-12-06 02:30:41 -08:00 · 2025-10-31 18:16:19 +01:00 · 2025-10-31 18:16:19 +01:00 · 2317bd5a83
commit 2317bd5a83
parent 35e3fa95f3
4 changed files with 19 additions and 25 deletions
--- a/g4f/Provider/audio/MarkItDown.py
+++ b/g4f/Provider/audio/MarkItDown.py
@ -2,7 +2,6 @@ from __future__ import annotations
 import os
 import asyncio
 from typing import Any
 try:
    from ...integration.markitdown import MarkItDown as MaItDo, StreamInfo
@ -23,7 +22,6 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
        model: str,
        messages: Messages,
        media: MediaListType = None,
        llm_client: Any = None,
        **kwargs
    ) -> AsyncResult:
        if media is None:
@ -34,12 +32,10 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
        for file, filename in media:
            text = None
            try:
-                result = md.convert(
+                if isinstance(file, str) and file.startswith(("http://", "https://")):
-                    file,
+                    result = md.convert_url(file)
-                    stream_info=StreamInfo(filename=filename) if filename else None,
+                else:
-                    llm_client=llm_client,
+                    result = md.convert(file, stream_info=StreamInfo(filename=filename) if filename else None)
                    llm_model=model
                )
                if asyncio.iscoroutine(result.text_content):
                    text = await result.text_content
                else:
@ -47,11 +43,7 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
            except TypeError:
                copyfile = get_tempfile(file, filename)
                try:
-                    result = md.convert(
+                    result = md.convert(copyfile)
                        copyfile, 
                        llm_client=llm_client,
                        llm_model=model
                    )
                    if asyncio.iscoroutine(result.text_content):
                        text = await result.text_content
                    else:
--- a/g4f/Provider/needs_auth/OpenaiChat.py
+++ b/g4f/Provider/needs_auth/OpenaiChat.py
@ -24,7 +24,7 @@ from ...requests import StreamSession
 from ...requests import get_nodriver_session
 from ...image import ImageRequest, to_image, to_bytes, is_accepted_format, detect_file_type
 from ...errors import MissingAuthError, NoValidHarFileError, ModelNotFoundError
-from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse, ImagePreview, ResponseType, format_link
+from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse, ImagePreview, ResponseType, JsonRequest, format_link
 from ...providers.response import TitleGeneration, RequestLogin, Reasoning
 from ...tools.media import merge_media
 from ..helper import format_cookies, format_media_prompt, to_string
@ -330,14 +330,15 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
        proxy: str = None,
        timeout: int = 360,
        auto_continue: bool = False,
-        action: str = "next",
+        action: Optional[str] = None,
        conversation: Conversation = None,
        media: MediaListType = None,
        return_conversation: bool = True,
        web_search: bool = False,
        prompt: str = None,
-        conversation_mode=None,
+        conversation_mode: Optional[dict] = None,
-        temporary=False,
+        temporary: Optional[bool] = None,
        conversation_id: Optional[str] = None,
        **kwargs
    ) -> AsyncResult:
        """
@ -351,7 +352,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
            api_key (str): Access token for authentication.
            auto_continue (bool): Flag to automatically continue the conversation.
            action (str): Type of action ('next', 'continue', 'variant').
            conversation_id (str): ID of the conversation.
            media (MediaListType): Images to include in the conversation.
            return_conversation (bool): Flag to include response fields in the output.
            **kwargs: Additional keyword arguments.
@ -362,6 +362,10 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
        Raises:
            RuntimeError: If an error occurs during processing.
        """
        if temporary is None:
            temporary = action is not None and conversation_id is not None
        if action is None:
            action = "next"
        async with StreamSession(
            proxy=proxy,
            impersonate="chrome",
@ -431,7 +435,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
                    }
                    if temporary:
                        data["history_and_training_disabled"] = True
                    async with session.post(
                        prepare_url,
                        json=data,
@ -494,7 +497,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
                if temporary:
                    data["history_and_training_disabled"] = True
-                if conversation.conversation_id is not None:
+                if conversation.conversation_id is not None and not temporary:
                    data["conversation_id"] = conversation.conversation_id
                    debug.log(f"OpenaiChat: Use conversation: {conversation.conversation_id}")
                prompt = conversation.prompt = format_media_prompt(messages, prompt)
@ -510,6 +513,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
                            else:
                                new_messages.append(message)
                    data["messages"] = cls.create_messages(new_messages, image_requests, ["search"] if web_search else None)
                yield JsonRequest.from_dict(data)
                headers = {
                    **cls._headers,
                    "accept": "text/event-stream",
--- a/g4f/image/init.py
+++ b/g4f/image/init.py
@ -107,11 +107,12 @@ def is_data_an_media(data, filename: str = None) -> str:
        return content_type
    if isinstance(data, bytes):
        return is_accepted_format(data)
-    if isinstance(data, str) and data.startswith("http"):
+    if isinstance(data, str) and data.startswith(("http://", "https://")):
        path = urlparse(data).path
        extension = get_extension(path)
        if extension is not None:
            return EXTENSIONS_MAP[extension]
        return "binary/octet-stream"
    return is_data_uri_an_image(data)
 def is_valid_media(data: ImageType = None, filename: str = None) -> str:
--- a/g4f/integration/markitdown/_youtube_converter.py
+++ b/g4f/integration/markitdown/_youtube_converter.py
@ -75,7 +75,6 @@ class YouTubeConverter(DocumentConverter):
    ) -> DocumentConverterResult:
        # Parse the stream
        encoding = "utf-8" if stream_info.charset is None else stream_info.charset
        print(file_stream)
        soup = bs4.BeautifulSoup(file_stream, "html.parser", from_encoding=encoding)
        # Read the meta tags
@ -96,8 +95,6 @@ class YouTubeConverter(DocumentConverter):
                        metadata[key] = content
                    break
        print(f"Extracted metadata keys: {list(metadata.keys())}")
        # Try reading the description
        try:
            for script in soup(["script"]):