mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-05 18:20:35 -08:00
Refactor MarkItDown and OpenaiChat classes for improved media handling and optional parameters; enhance is_data_an_media function to support binary/octet-stream return type for unsupported URLs.
This commit is contained in:
parent
35e3fa95f3
commit
2317bd5a83
4 changed files with 19 additions and 25 deletions
|
|
@ -2,7 +2,6 @@ from __future__ import annotations
|
|||
|
||||
import os
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from ...integration.markitdown import MarkItDown as MaItDo, StreamInfo
|
||||
|
|
@ -23,7 +22,6 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
|
|||
model: str,
|
||||
messages: Messages,
|
||||
media: MediaListType = None,
|
||||
llm_client: Any = None,
|
||||
**kwargs
|
||||
) -> AsyncResult:
|
||||
if media is None:
|
||||
|
|
@ -34,12 +32,10 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
|
|||
for file, filename in media:
|
||||
text = None
|
||||
try:
|
||||
result = md.convert(
|
||||
file,
|
||||
stream_info=StreamInfo(filename=filename) if filename else None,
|
||||
llm_client=llm_client,
|
||||
llm_model=model
|
||||
)
|
||||
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
||||
result = md.convert_url(file)
|
||||
else:
|
||||
result = md.convert(file, stream_info=StreamInfo(filename=filename) if filename else None)
|
||||
if asyncio.iscoroutine(result.text_content):
|
||||
text = await result.text_content
|
||||
else:
|
||||
|
|
@ -47,11 +43,7 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
|
|||
except TypeError:
|
||||
copyfile = get_tempfile(file, filename)
|
||||
try:
|
||||
result = md.convert(
|
||||
copyfile,
|
||||
llm_client=llm_client,
|
||||
llm_model=model
|
||||
)
|
||||
result = md.convert(copyfile)
|
||||
if asyncio.iscoroutine(result.text_content):
|
||||
text = await result.text_content
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from ...requests import StreamSession
|
|||
from ...requests import get_nodriver_session
|
||||
from ...image import ImageRequest, to_image, to_bytes, is_accepted_format, detect_file_type
|
||||
from ...errors import MissingAuthError, NoValidHarFileError, ModelNotFoundError
|
||||
from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse, ImagePreview, ResponseType, format_link
|
||||
from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse, ImagePreview, ResponseType, JsonRequest, format_link
|
||||
from ...providers.response import TitleGeneration, RequestLogin, Reasoning
|
||||
from ...tools.media import merge_media
|
||||
from ..helper import format_cookies, format_media_prompt, to_string
|
||||
|
|
@ -330,14 +330,15 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
|
|||
proxy: str = None,
|
||||
timeout: int = 360,
|
||||
auto_continue: bool = False,
|
||||
action: str = "next",
|
||||
action: Optional[str] = None,
|
||||
conversation: Conversation = None,
|
||||
media: MediaListType = None,
|
||||
return_conversation: bool = True,
|
||||
web_search: bool = False,
|
||||
prompt: str = None,
|
||||
conversation_mode=None,
|
||||
temporary=False,
|
||||
conversation_mode: Optional[dict] = None,
|
||||
temporary: Optional[bool] = None,
|
||||
conversation_id: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> AsyncResult:
|
||||
"""
|
||||
|
|
@ -351,7 +352,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
|
|||
api_key (str): Access token for authentication.
|
||||
auto_continue (bool): Flag to automatically continue the conversation.
|
||||
action (str): Type of action ('next', 'continue', 'variant').
|
||||
conversation_id (str): ID of the conversation.
|
||||
media (MediaListType): Images to include in the conversation.
|
||||
return_conversation (bool): Flag to include response fields in the output.
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
|
@ -362,6 +362,10 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
|
|||
Raises:
|
||||
RuntimeError: If an error occurs during processing.
|
||||
"""
|
||||
if temporary is None:
|
||||
temporary = action is not None and conversation_id is not None
|
||||
if action is None:
|
||||
action = "next"
|
||||
async with StreamSession(
|
||||
proxy=proxy,
|
||||
impersonate="chrome",
|
||||
|
|
@ -431,7 +435,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
|
|||
}
|
||||
if temporary:
|
||||
data["history_and_training_disabled"] = True
|
||||
|
||||
async with session.post(
|
||||
prepare_url,
|
||||
json=data,
|
||||
|
|
@ -494,7 +497,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
|
|||
if temporary:
|
||||
data["history_and_training_disabled"] = True
|
||||
|
||||
if conversation.conversation_id is not None:
|
||||
if conversation.conversation_id is not None and not temporary:
|
||||
data["conversation_id"] = conversation.conversation_id
|
||||
debug.log(f"OpenaiChat: Use conversation: {conversation.conversation_id}")
|
||||
prompt = conversation.prompt = format_media_prompt(messages, prompt)
|
||||
|
|
@ -510,6 +513,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
|
|||
else:
|
||||
new_messages.append(message)
|
||||
data["messages"] = cls.create_messages(new_messages, image_requests, ["search"] if web_search else None)
|
||||
yield JsonRequest.from_dict(data)
|
||||
headers = {
|
||||
**cls._headers,
|
||||
"accept": "text/event-stream",
|
||||
|
|
|
|||
|
|
@ -107,11 +107,12 @@ def is_data_an_media(data, filename: str = None) -> str:
|
|||
return content_type
|
||||
if isinstance(data, bytes):
|
||||
return is_accepted_format(data)
|
||||
if isinstance(data, str) and data.startswith("http"):
|
||||
if isinstance(data, str) and data.startswith(("http://", "https://")):
|
||||
path = urlparse(data).path
|
||||
extension = get_extension(path)
|
||||
if extension is not None:
|
||||
return EXTENSIONS_MAP[extension]
|
||||
return "binary/octet-stream"
|
||||
return is_data_uri_an_image(data)
|
||||
|
||||
def is_valid_media(data: ImageType = None, filename: str = None) -> str:
|
||||
|
|
|
|||
|
|
@ -75,7 +75,6 @@ class YouTubeConverter(DocumentConverter):
|
|||
) -> DocumentConverterResult:
|
||||
# Parse the stream
|
||||
encoding = "utf-8" if stream_info.charset is None else stream_info.charset
|
||||
print(file_stream)
|
||||
soup = bs4.BeautifulSoup(file_stream, "html.parser", from_encoding=encoding)
|
||||
|
||||
# Read the meta tags
|
||||
|
|
@ -96,8 +95,6 @@ class YouTubeConverter(DocumentConverter):
|
|||
metadata[key] = content
|
||||
break
|
||||
|
||||
print(f"Extracted metadata keys: {list(metadata.keys())}")
|
||||
|
||||
# Try reading the description
|
||||
try:
|
||||
for script in soup(["script"]):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue