From 8eaaf5db9503ffffe1dd76a5831e9c4924816a85 Mon Sep 17 00:00:00 2001 From: hlohaus <983577+hlohaus@users.noreply.github.com> Date: Sun, 23 Mar 2025 05:27:52 +0100 Subject: [PATCH] Add HuggingFaceMedia provider with Video Generation Add Support for Video Response in UI Improve Support for Audio Response in UI Fix ModelNotSupported errors in HuggingSpace providers --- g4f/Provider/PollinationsAI.py | 14 +- g4f/Provider/__init__.py | 2 +- g4f/Provider/hf/HuggingChat.py | 7 +- g4f/Provider/hf/HuggingFaceAPI.py | 26 ++- g4f/Provider/hf/HuggingFaceInference.py | 11 +- g4f/Provider/hf/HuggingFaceMedia.py | 175 ++++++++++++++++++ g4f/Provider/hf/__init__.py | 7 + g4f/Provider/hf/models.py | 8 +- .../hf_space/BlackForestLabs_Flux1Dev.py | 1 - .../hf_space/BlackForestLabs_Flux1Schnell.py | 2 - .../hf_space/CohereForAI_C4AI_Command.py | 7 +- g4f/Provider/needs_auth/OpenaiChat.py | 16 +- g4f/api/__init__.py | 6 +- g4f/client/__init__.py | 4 +- g4f/gui/client/qrcode.html | 5 +- g4f/gui/client/static/js/chat.v1.js | 21 ++- g4f/gui/server/api.py | 23 +-- g4f/gui/server/backend_api.py | 35 ++-- g4f/image/copy_images.py | 83 ++++++--- g4f/models.py | 6 - g4f/requests/raise_for_status.py | 25 +-- 21 files changed, 356 insertions(+), 128 deletions(-) create mode 100644 g4f/Provider/hf/HuggingFaceMedia.py diff --git a/g4f/Provider/PollinationsAI.py b/g4f/Provider/PollinationsAI.py index c105dee0..dc24c967 100644 --- a/g4f/Provider/PollinationsAI.py +++ b/g4f/Provider/PollinationsAI.py @@ -14,7 +14,8 @@ from ..image import to_data_uri, is_data_an_audio, to_input_audio from ..errors import ModelNotFoundError from ..requests.raise_for_status import raise_for_status from ..requests.aiohttp import get_connector -from ..providers.response import ImageResponse, ImagePreview, FinishReason, Usage, Audio, ToolCalls +from ..image.copy_images import save_response_media +from ..providers.response import FinishReason, Usage, ToolCalls from .. import debug DEFAULT_HEADERS = { @@ -239,8 +240,9 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin): async with ClientSession(headers=DEFAULT_HEADERS, connector=get_connector(proxy=proxy)) as session: async with session.get(url, allow_redirects=True) as response: await raise_for_status(response) - image_url = str(response.url) - yield ImageResponse(image_url, prompt) + async for chunk in save_response_media(response, prompt): + yield chunk + return @classmethod async def _generate_text( @@ -305,10 +307,10 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin): }) async with session.post(url, json=data) as response: await raise_for_status(response) - if response.headers["content-type"] == "audio/mpeg": - yield Audio(await response.read()) + async for chunk in save_response_media(response, messages[-1]["content"]): + yield chunk return - elif response.headers["content-type"].startswith("text/plain"): + if response.headers["content-type"].startswith("text/plain"): yield await response.text() return elif response.headers["content-type"].startswith("text/event-stream"): diff --git a/g4f/Provider/__init__.py b/g4f/Provider/__init__.py index 00b7e8d1..6e18582c 100644 --- a/g4f/Provider/__init__.py +++ b/g4f/Provider/__init__.py @@ -9,7 +9,7 @@ from .deprecated import * from .needs_auth import * from .not_working import * from .local import * -from .hf import HuggingFace, HuggingChat, HuggingFaceAPI, HuggingFaceInference +from .hf import HuggingFace, HuggingChat, HuggingFaceAPI, HuggingFaceInference, HuggingFaceMedia from .hf_space import * from .mini_max import HailuoAI, MiniMax from .template import OpenaiTemplate, BackendApi diff --git a/g4f/Provider/hf/HuggingChat.py b/g4f/Provider/hf/HuggingChat.py index 8bbf03f5..7e10f4e3 100644 --- a/g4f/Provider/hf/HuggingChat.py +++ b/g4f/Provider/hf/HuggingChat.py @@ -24,7 +24,7 @@ from ...requests import get_args_from_nodriver, DEFAULT_HEADERS from ...requests.raise_for_status import raise_for_status from ...providers.response import JsonConversation, ImageResponse, Sources, TitleGeneration, Reasoning, RequestLogin from ...cookies import get_cookies -from .models import default_model, fallback_models, image_models, model_aliases, llama_models +from .models import default_model, default_vision_model, fallback_models, image_models, model_aliases from ... import debug class Conversation(JsonConversation): @@ -41,6 +41,7 @@ class HuggingChat(AsyncAuthedProvider, ProviderModelMixin): supports_stream = True needs_auth = True default_model = default_model + default_vision_model = default_vision_model model_aliases = model_aliases image_models = image_models text_models = fallback_models @@ -107,8 +108,8 @@ class HuggingChat(AsyncAuthedProvider, ProviderModelMixin): ) -> AsyncResult: if not has_curl_cffi: raise MissingRequirementsError('Install "curl_cffi" package | pip install -U curl_cffi') - if model == llama_models["name"]: - model = llama_models["text"] if media is None else llama_models["vision"] + if not model and media is not None: + model = cls.default_vision_model model = cls.get_model(model) session = Session(**auth_result.get_dict()) diff --git a/g4f/Provider/hf/HuggingFaceAPI.py b/g4f/Provider/hf/HuggingFaceAPI.py index bf9dd7b7..97a9bb87 100644 --- a/g4f/Provider/hf/HuggingFaceAPI.py +++ b/g4f/Provider/hf/HuggingFaceAPI.py @@ -6,27 +6,30 @@ from ...providers.types import Messages from ...typing import MediaListType from ...requests import StreamSession, raise_for_status from ...errors import ModelNotSupportedError -from ...providers.helper import get_last_user_message from ...providers.response import ProviderInfo from ..template.OpenaiTemplate import OpenaiTemplate -from .models import model_aliases, vision_models, default_vision_model, llama_models, text_models +from .models import model_aliases, vision_models, default_llama_model, default_vision_model, text_models from ... import debug class HuggingFaceAPI(OpenaiTemplate): - label = "HuggingFace (Inference API)" + label = "HuggingFace (Text Generation)" parent = "HuggingFace" url = "https://api-inference.huggingface.com" api_base = "https://api-inference.huggingface.co/v1" working = True needs_auth = True - default_model = default_vision_model + default_model = default_llama_model default_vision_model = default_vision_model vision_models = vision_models model_aliases = model_aliases fallback_models = text_models + vision_models - provider_mapping: dict[str, dict] = {} + provider_mapping: dict[str, dict] = { + "google/gemma-3-27b-it": { + "hf-inference/models/google/gemma-3-27b-it": { + "task": "conversational", + "providerId": "google/gemma-3-27b-it"}}} @classmethod def get_model(cls, model: str, **kwargs) -> str: @@ -47,7 +50,9 @@ class HuggingFaceAPI(OpenaiTemplate): if [ provider for provider in model.get("inferenceProviderMapping") - if provider.get("task") == "conversational"]] + if provider.get("status") == "live" and provider.get("task") == "conversational" + ] + ] + list(cls.provider_mapping.keys()) else: cls.models = cls.fallback_models return cls.models @@ -78,11 +83,12 @@ class HuggingFaceAPI(OpenaiTemplate): media: MediaListType = None, **kwargs ): - if model == llama_models["name"]: - model = llama_models["text"] if media is None else llama_models["vision"] - if model in cls.model_aliases: - model = cls.model_aliases[model] + if not model and media is not None: + model = cls.default_vision_model + model = cls.get_model(model) provider_mapping = await cls.get_mapping(model, api_key) + if not provider_mapping: + raise ModelNotSupportedError(f"Model is not supported: {model} in: {cls.__name__}") for provider_key in provider_mapping: api_path = provider_key if provider_key == "novita" else f"{provider_key}/v1" api_base = f"https://router.huggingface.co/{api_path}" diff --git a/g4f/Provider/hf/HuggingFaceInference.py b/g4f/Provider/hf/HuggingFaceInference.py index 2699e49b..f23254af 100644 --- a/g4f/Provider/hf/HuggingFaceInference.py +++ b/g4f/Provider/hf/HuggingFaceInference.py @@ -10,6 +10,7 @@ from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin, format_p from ...errors import ModelNotSupportedError, ResponseError from ...requests import StreamSession, raise_for_status from ...providers.response import FinishReason, ImageResponse +from ...image.copy_images import save_response_media from ..helper import format_image_prompt, get_last_user_message from .models import default_model, default_image_model, model_aliases, text_models, image_models, vision_models from ... import debug @@ -176,12 +177,10 @@ class HuggingFaceInference(AsyncGeneratorProvider, ProviderModelMixin): debug.log(f"Special token: {is_special}") yield FinishReason("stop" if is_special else "length") else: - if response.headers["content-type"].startswith("image/"): - base64_data = base64.b64encode(b"".join([chunk async for chunk in response.iter_content()])) - url = f"data:{response.headers['content-type']};base64,{base64_data.decode()}" - yield ImageResponse(url, inputs) - else: - yield (await response.json())[0]["generated_text"].strip() + async for chunk in save_response_media(response, prompt): + yield chunk + return + yield (await response.json())[0]["generated_text"].strip() def format_prompt_mistral(messages: Messages, do_continue: bool = False) -> str: system_messages = [message["content"] for message in messages if message["role"] == "system"] diff --git a/g4f/Provider/hf/HuggingFaceMedia.py b/g4f/Provider/hf/HuggingFaceMedia.py new file mode 100644 index 00000000..f819f09c --- /dev/null +++ b/g4f/Provider/hf/HuggingFaceMedia.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import random +import requests + +from ...providers.types import Messages +from ...requests import StreamSession, raise_for_status +from ...errors import ModelNotSupportedError +from ...providers.helper import format_image_prompt +from ...providers.base_provider import AsyncGeneratorProvider, ProviderModelMixin +from ...providers.response import ProviderInfo, ImageResponse, VideoResponse +from ...image.copy_images import save_response_media +from ... import debug + +class HuggingFaceMedia(AsyncGeneratorProvider, ProviderModelMixin): + label = "HuggingFace (Image / Video Generation)" + parent = "HuggingFace" + url = "https://huggingface.co" + working = True + needs_auth = True + + tasks = ["text-to-image", "text-to-video"] + provider_mapping: dict[str, dict] = {} + task_mapping: dict[str, str] = {} + + @classmethod + def get_models(cls, **kwargs) -> list[str]: + if not cls.models: + url = "https://huggingface.co/api/models?inference=warm&expand[]=inferenceProviderMapping" + response = requests.get(url) + if response.ok: + models = response.json() + cls.models = [ + model["id"] + for model in models + if [ + provider + for provider in model.get("inferenceProviderMapping") + if provider.get("status") == "live" and provider.get("task") in cls.tasks + ] + ] + cls.task_mapping = { + model["id"]: [ + provider.get("task") + for provider in model.get("inferenceProviderMapping") + ].pop() + for model in models + } + else: + cls.models = [] + return cls.models + + @classmethod + async def get_mapping(cls, model: str, api_key: str = None): + if model in cls.provider_mapping: + return cls.provider_mapping[model] + headers = { + 'Content-Type': 'application/json', + } + if api_key is not None: + headers["Authorization"] = f"Bearer {api_key}" + async with StreamSession( + timeout=30, + headers=headers, + ) as session: + async with session.get(f"https://huggingface.co/api/models/{model}?expand[]=inferenceProviderMapping") as response: + await raise_for_status(response) + model_data = await response.json() + cls.provider_mapping[model] = {key: value for key, value in model_data.get("inferenceProviderMapping").items() if value["status"] == "live"} + return cls.provider_mapping[model] + + @classmethod + async def create_async_generator( + cls, + model: str, + messages: Messages, + api_key: str = None, + extra_data: dict = {}, + prompt: str = None, + proxy: str = None, + timeout: int = 0, + **kwargs + ): + provider_mapping = await cls.get_mapping(model, api_key) + headers = { + 'Accept-Encoding': 'gzip, deflate', + 'Content-Type': 'application/json', + } + new_mapping = { + "hf-free" if key == "hf-inference" else key: value for key, value in provider_mapping.items() + if key in ["replicate", "together", "hf-inference"] + } + provider_mapping = {**new_mapping, **provider_mapping} + last_response = None + for provider_key, provider in provider_mapping.items(): + yield ProviderInfo(**{**cls.get_dict(), "label": f"HuggingFace ({provider_key})", "url": f"{cls.url}/{model}"}) + + api_base = f"https://router.huggingface.co/{provider_key}" + task = provider["task"] + provider_id = provider["providerId"] + if task not in cls.tasks: + raise ModelNotSupportedError(f"Model is not supported: {model} in: {cls.__name__} task: {task}") + + prompt = format_image_prompt(messages, prompt) + if task == "text-to-video": + extra_data = { + "num_inference_steps": 20, + "video_size": "landscape_16_9", + **extra_data + } + else: + extra_data = { + "width": 1024, + "height": 1024, + **extra_data + } + if provider_key == "fal-ai": + url = f"{api_base}/{provider_id}" + data = { + "prompt": prompt, + "image_size": "square_hd", + **extra_data + } + elif provider_key == "replicate": + url = f"{api_base}/v1/models/{provider_id}/prediction" + data = { + "input": { + "prompt": prompt, + **extra_data + } + } + elif provider_key in ("hf-inference", "hf-free"): + api_base = "https://api-inference.huggingface.co" + url = f"{api_base}/models/{provider_id}" + data = { + "inputs": prompt, + "parameters": { + "seed": random.randint(0, 2**32), + **extra_data + } + } + elif task == "text-to-image": + url = f"{api_base}/v1/images/generations" + data = { + "response_format": "url", + "prompt": prompt, + "model": provider_id, + **extra_data + } + + async with StreamSession( + headers=headers if provider_key == "free" or api_key is None else {**headers, "Authorization": f"Bearer {api_key}"}, + proxy=proxy, + timeout=timeout + ) as session: + async with session.post(url, json=data) as response: + if response.status in (400, 401, 402): + last_response = response + debug.error(f"{cls.__name__}: Error {response.status} with {provider_key} and {provider_id}") + continue + if response.status == 404: + raise ModelNotSupportedError(f"Model is not supported: {model}") + await raise_for_status(response) + async for chunk in save_response_media(response, prompt): + yield chunk + return + result = await response.json() + if "video" in result: + yield VideoResponse(result["video"]["url"], prompt) + elif task == "text-to-image": + yield ImageResponse([item["url"] for item in result.get("images", result.get("data"))], prompt) + elif task == "text-to-video": + yield VideoResponse(result["output"], prompt) + return + await raise_for_status(last_response) \ No newline at end of file diff --git a/g4f/Provider/hf/__init__.py b/g4f/Provider/hf/__init__.py index c482ef81..66839427 100644 --- a/g4f/Provider/hf/__init__.py +++ b/g4f/Provider/hf/__init__.py @@ -9,6 +9,7 @@ from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin from .HuggingChat import HuggingChat from .HuggingFaceAPI import HuggingFaceAPI from .HuggingFaceInference import HuggingFaceInference +from .HuggingFaceMedia import HuggingFaceMedia from .models import model_aliases, vision_models, default_vision_model from ... import debug @@ -51,6 +52,12 @@ class HuggingFace(AsyncGeneratorProvider, ProviderModelMixin): debug.error(f"{cls.__name__} {type(e).__name__}; {e}") if not cls.image_models: cls.get_models() + try: + async for chunk in HuggingFaceMedia.create_async_generator(model, messages, **kwargs): + yield chunk + return + except ModelNotSupportedError: + pass if model in cls.image_models: if "api_key" not in kwargs: async for chunk in HuggingChat.create_async_generator(model, messages, **kwargs): diff --git a/g4f/Provider/hf/models.py b/g4f/Provider/hf/models.py index fe251dc5..cbd46020 100644 --- a/g4f/Provider/hf/models.py +++ b/g4f/Provider/hf/models.py @@ -47,9 +47,5 @@ extra_models = [ "NousResearch/Hermes-3-Llama-3.1-8B", ] default_vision_model = "meta-llama/Llama-3.2-11B-Vision-Instruct" -vision_models = [default_vision_model, "Qwen/Qwen2-VL-7B-Instruct"] -llama_models = { - "name": "llama-3", - "text": "meta-llama/Llama-3.3-70B-Instruct", - "vision": "meta-llama/Llama-3.2-11B-Vision-Instruct", -} \ No newline at end of file +default_llama_model = "meta-llama/Llama-3.3-70B-Instruct" +vision_models = [default_vision_model, "Qwen/Qwen2-VL-7B-Instruct"] \ No newline at end of file diff --git a/g4f/Provider/hf_space/BlackForestLabs_Flux1Dev.py b/g4f/Provider/hf_space/BlackForestLabs_Flux1Dev.py index c13102b7..4e48299f 100644 --- a/g4f/Provider/hf_space/BlackForestLabs_Flux1Dev.py +++ b/g4f/Provider/hf_space/BlackForestLabs_Flux1Dev.py @@ -67,7 +67,6 @@ class BlackForestLabs_Flux1Dev(AsyncGeneratorProvider, ProviderModelMixin): zerogpu_uuid: str = "[object Object]", **kwargs ) -> AsyncResult: - model = cls.get_model(model) async with StreamSession(impersonate="chrome", proxy=proxy) as session: prompt = format_image_prompt(messages, prompt) data = [prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps] diff --git a/g4f/Provider/hf_space/BlackForestLabs_Flux1Schnell.py b/g4f/Provider/hf_space/BlackForestLabs_Flux1Schnell.py index bdf4539a..374f5cd6 100644 --- a/g4f/Provider/hf_space/BlackForestLabs_Flux1Schnell.py +++ b/g4f/Provider/hf_space/BlackForestLabs_Flux1Schnell.py @@ -37,8 +37,6 @@ class BlackForestLabs_Flux1Schnell(AsyncGeneratorProvider, ProviderModelMixin): randomize_seed: bool = True, **kwargs ) -> AsyncResult: - - model = cls.get_model(model) width = max(32, width - (width % 8)) height = max(32, height - (height % 8)) prompt = format_image_prompt(messages, prompt) diff --git a/g4f/Provider/hf_space/CohereForAI_C4AI_Command.py b/g4f/Provider/hf_space/CohereForAI_C4AI_Command.py index c7320b70..2b36a8c3 100644 --- a/g4f/Provider/hf_space/CohereForAI_C4AI_Command.py +++ b/g4f/Provider/hf_space/CohereForAI_C4AI_Command.py @@ -24,9 +24,14 @@ class CohereForAI_C4AI_Command(AsyncGeneratorProvider, ProviderModelMixin): "command-r": "command-r", "command-r7b": "command-r7b-12-2024", } - models = list(model_aliases.keys()) + @classmethod + def get_model(cls, model: str, **kwargs) -> str: + if model in cls.model_aliases.values(): + return model + return super().get_model(model, **kwargs) + @classmethod async def create_async_generator( cls, model: str, messages: Messages, diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index 8c8ec70d..cc910ae0 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -203,7 +203,21 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): Returns: A list of messages with the user input and the image, if any """ - # Create a message object with the user role and the content + # merged_messages = [] + # last_message = None + # for message in messages: + # current_message = last_message + # if current_message is not None: + # if current_message["role"] == message["role"]: + # current_message["content"] += "\n" + message["content"] + # else: + # merged_messages.append(current_message) + # last_message = message.copy() + # else: + # last_message = message.copy() + # if last_message is not None: + # merged_messages.append(last_message) + messages = [{ "id": str(uuid.uuid4()), "author": {"role": message["role"]}, diff --git a/g4f/api/__init__.py b/g4f/api/__init__.py index 5dbc361b..2496cdfe 100644 --- a/g4f/api/__init__.py +++ b/g4f/api/__init__.py @@ -39,7 +39,7 @@ from g4f.client import AsyncClient, ChatCompletion, ImagesResponse, convert_to_p from g4f.providers.response import BaseConversation, JsonConversation from g4f.client.helper import filter_none from g4f.image import is_data_an_media -from g4f.image.copy_images import images_dir, copy_images, get_source_url +from g4f.image.copy_images import images_dir, copy_media, get_source_url from g4f.errors import ProviderNotFoundError, ModelNotFoundError, MissingAuthError, NoValidHarFileError from g4f.cookies import read_cookie_files, get_cookies_dir from g4f.Provider import ProviderType, ProviderUtils, __providers__ @@ -594,10 +594,10 @@ class Api: ssl = False if source_url is not None: try: - await copy_images( + await copy_media( [source_url], target=target, ssl=ssl) - debug.log(f"Image copied from {source_url}") + debug.log(f"File copied from {source_url}") except Exception as e: debug.error(f"Download failed: {source_url}\n{type(e).__name__}: {e}") return RedirectResponse(url=source_url) diff --git a/g4f/client/__init__.py b/g4f/client/__init__.py index d51f5f27..7238ecb5 100644 --- a/g4f/client/__init__.py +++ b/g4f/client/__init__.py @@ -9,7 +9,7 @@ import aiohttp import base64 from typing import Union, AsyncIterator, Iterator, Awaitable, Optional -from ..image.copy_images import copy_images +from ..image.copy_images import copy_media from ..typing import Messages, ImageType from ..providers.types import ProviderType, BaseRetryProvider from ..providers.response import * @@ -532,7 +532,7 @@ class Images: images = await asyncio.gather(*[get_b64_from_url(image) for image in response.get_list()]) else: # Save locally for None (default) case - images = await copy_images(response.get_list(), response.get("cookies"), proxy) + images = await copy_media(response.get_list(), response.get("cookies"), proxy) images = [Image.model_construct(url=f"/images/{os.path.basename(image)}", revised_prompt=response.alt) for image in images] return ImagesResponse.model_construct( diff --git a/g4f/gui/client/qrcode.html b/g4f/gui/client/qrcode.html index f92f59c8..4f82ce84 100644 --- a/g4f/gui/client/qrcode.html +++ b/g4f/gui/client/qrcode.html @@ -15,7 +15,7 @@
-