Refactor MarkItDown and OpenaiChat classes for improved media handling and optional parameters; enhance is_data_an_media function to support binary/octet-stream return type for unsupported URLs.

This commit is contained in:
hlohaus 2025-10-31 18:16:19 +01:00
parent 35e3fa95f3
commit 2317bd5a83
4 changed files with 19 additions and 25 deletions

View file

@ -2,7 +2,6 @@ from __future__ import annotations
import os import os
import asyncio import asyncio
from typing import Any
try: try:
from ...integration.markitdown import MarkItDown as MaItDo, StreamInfo from ...integration.markitdown import MarkItDown as MaItDo, StreamInfo
@ -23,7 +22,6 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
model: str, model: str,
messages: Messages, messages: Messages,
media: MediaListType = None, media: MediaListType = None,
llm_client: Any = None,
**kwargs **kwargs
) -> AsyncResult: ) -> AsyncResult:
if media is None: if media is None:
@ -34,12 +32,10 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
for file, filename in media: for file, filename in media:
text = None text = None
try: try:
result = md.convert( if isinstance(file, str) and file.startswith(("http://", "https://")):
file, result = md.convert_url(file)
stream_info=StreamInfo(filename=filename) if filename else None, else:
llm_client=llm_client, result = md.convert(file, stream_info=StreamInfo(filename=filename) if filename else None)
llm_model=model
)
if asyncio.iscoroutine(result.text_content): if asyncio.iscoroutine(result.text_content):
text = await result.text_content text = await result.text_content
else: else:
@ -47,11 +43,7 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
except TypeError: except TypeError:
copyfile = get_tempfile(file, filename) copyfile = get_tempfile(file, filename)
try: try:
result = md.convert( result = md.convert(copyfile)
copyfile,
llm_client=llm_client,
llm_model=model
)
if asyncio.iscoroutine(result.text_content): if asyncio.iscoroutine(result.text_content):
text = await result.text_content text = await result.text_content
else: else:

View file

@ -24,7 +24,7 @@ from ...requests import StreamSession
from ...requests import get_nodriver_session from ...requests import get_nodriver_session
from ...image import ImageRequest, to_image, to_bytes, is_accepted_format, detect_file_type from ...image import ImageRequest, to_image, to_bytes, is_accepted_format, detect_file_type
from ...errors import MissingAuthError, NoValidHarFileError, ModelNotFoundError from ...errors import MissingAuthError, NoValidHarFileError, ModelNotFoundError
from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse, ImagePreview, ResponseType, format_link from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse, ImagePreview, ResponseType, JsonRequest, format_link
from ...providers.response import TitleGeneration, RequestLogin, Reasoning from ...providers.response import TitleGeneration, RequestLogin, Reasoning
from ...tools.media import merge_media from ...tools.media import merge_media
from ..helper import format_cookies, format_media_prompt, to_string from ..helper import format_cookies, format_media_prompt, to_string
@ -330,14 +330,15 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
proxy: str = None, proxy: str = None,
timeout: int = 360, timeout: int = 360,
auto_continue: bool = False, auto_continue: bool = False,
action: str = "next", action: Optional[str] = None,
conversation: Conversation = None, conversation: Conversation = None,
media: MediaListType = None, media: MediaListType = None,
return_conversation: bool = True, return_conversation: bool = True,
web_search: bool = False, web_search: bool = False,
prompt: str = None, prompt: str = None,
conversation_mode=None, conversation_mode: Optional[dict] = None,
temporary=False, temporary: Optional[bool] = None,
conversation_id: Optional[str] = None,
**kwargs **kwargs
) -> AsyncResult: ) -> AsyncResult:
""" """
@ -351,7 +352,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
api_key (str): Access token for authentication. api_key (str): Access token for authentication.
auto_continue (bool): Flag to automatically continue the conversation. auto_continue (bool): Flag to automatically continue the conversation.
action (str): Type of action ('next', 'continue', 'variant'). action (str): Type of action ('next', 'continue', 'variant').
conversation_id (str): ID of the conversation.
media (MediaListType): Images to include in the conversation. media (MediaListType): Images to include in the conversation.
return_conversation (bool): Flag to include response fields in the output. return_conversation (bool): Flag to include response fields in the output.
**kwargs: Additional keyword arguments. **kwargs: Additional keyword arguments.
@ -362,6 +362,10 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
Raises: Raises:
RuntimeError: If an error occurs during processing. RuntimeError: If an error occurs during processing.
""" """
if temporary is None:
temporary = action is not None and conversation_id is not None
if action is None:
action = "next"
async with StreamSession( async with StreamSession(
proxy=proxy, proxy=proxy,
impersonate="chrome", impersonate="chrome",
@ -431,7 +435,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
} }
if temporary: if temporary:
data["history_and_training_disabled"] = True data["history_and_training_disabled"] = True
async with session.post( async with session.post(
prepare_url, prepare_url,
json=data, json=data,
@ -494,7 +497,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
if temporary: if temporary:
data["history_and_training_disabled"] = True data["history_and_training_disabled"] = True
if conversation.conversation_id is not None: if conversation.conversation_id is not None and not temporary:
data["conversation_id"] = conversation.conversation_id data["conversation_id"] = conversation.conversation_id
debug.log(f"OpenaiChat: Use conversation: {conversation.conversation_id}") debug.log(f"OpenaiChat: Use conversation: {conversation.conversation_id}")
prompt = conversation.prompt = format_media_prompt(messages, prompt) prompt = conversation.prompt = format_media_prompt(messages, prompt)
@ -510,6 +513,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
else: else:
new_messages.append(message) new_messages.append(message)
data["messages"] = cls.create_messages(new_messages, image_requests, ["search"] if web_search else None) data["messages"] = cls.create_messages(new_messages, image_requests, ["search"] if web_search else None)
yield JsonRequest.from_dict(data)
headers = { headers = {
**cls._headers, **cls._headers,
"accept": "text/event-stream", "accept": "text/event-stream",

View file

@ -107,11 +107,12 @@ def is_data_an_media(data, filename: str = None) -> str:
return content_type return content_type
if isinstance(data, bytes): if isinstance(data, bytes):
return is_accepted_format(data) return is_accepted_format(data)
if isinstance(data, str) and data.startswith("http"): if isinstance(data, str) and data.startswith(("http://", "https://")):
path = urlparse(data).path path = urlparse(data).path
extension = get_extension(path) extension = get_extension(path)
if extension is not None: if extension is not None:
return EXTENSIONS_MAP[extension] return EXTENSIONS_MAP[extension]
return "binary/octet-stream"
return is_data_uri_an_image(data) return is_data_uri_an_image(data)
def is_valid_media(data: ImageType = None, filename: str = None) -> str: def is_valid_media(data: ImageType = None, filename: str = None) -> str:

View file

@ -75,7 +75,6 @@ class YouTubeConverter(DocumentConverter):
) -> DocumentConverterResult: ) -> DocumentConverterResult:
# Parse the stream # Parse the stream
encoding = "utf-8" if stream_info.charset is None else stream_info.charset encoding = "utf-8" if stream_info.charset is None else stream_info.charset
print(file_stream)
soup = bs4.BeautifulSoup(file_stream, "html.parser", from_encoding=encoding) soup = bs4.BeautifulSoup(file_stream, "html.parser", from_encoding=encoding)
# Read the meta tags # Read the meta tags
@ -96,8 +95,6 @@ class YouTubeConverter(DocumentConverter):
metadata[key] = content metadata[key] = content
break break
print(f"Extracted metadata keys: {list(metadata.keys())}")
# Try reading the description # Try reading the description
try: try:
for script in soup(["script"]): for script in soup(["script"]):