diff --git a/g4f/Provider/Copilot.py b/g4f/Provider/Copilot.py index 50563278..0f799f53 100644 --- a/g4f/Provider/Copilot.py +++ b/g4f/Provider/Copilot.py @@ -30,6 +30,7 @@ from ..image import to_bytes, is_accepted_format from .helper import get_last_user_message from ..files import get_bucket_dir from ..tools.files import read_bucket +from ..cookies import get_cookies from pathlib import Path from .. import debug @@ -54,31 +55,38 @@ def extract_bucket_items(messages: Messages) -> list[dict]: class Copilot(AsyncAuthedProvider, ProviderModelMixin): label = "Microsoft Copilot" url = "https://copilot.microsoft.com" + cookie_domain = ".microsoft.com" + anon_cookie_name = "__Host-copilot-anon" - working = True - supports_stream = True + working = has_curl_cffi + use_nodriver = has_nodriver active_by_default = True default_model = "Copilot" - models = [default_model, "Think Deeper", "Smart (GPT-5)"] + models = [default_model, "Think Deeper", "Smart (GPT-5)", "Study"] model_aliases = { "o1": "Think Deeper", "gpt-4": default_model, "gpt-4o": default_model, "gpt-5": "GPT-5", + "study": "Study", } websocket_url = "wss://copilot.microsoft.com/c/api/chat?api-version=2" conversation_url = f"{url}/c/api/conversations" _access_token: str = None + _useridentitytype: str = None _cookies: dict = {} @classmethod - async def on_auth_async(cls, **kwargs) -> AsyncIterator: + async def on_auth_async(cls, api_key: str = None, **kwargs) -> AsyncIterator: + cookies = cls.cookies_to_dict() + if api_key: + cookies[cls.anon_cookie_name] = api_key yield AuthResult( - api_key=cls._access_token, - cookies=cls.cookies_to_dict() + access_token=cls._access_token, + cookies=cls.cookies_to_dict() or get_cookies(cls.cookie_domain, False) ) @classmethod @@ -89,7 +97,7 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): auth_result: AuthResult, **kwargs ) -> AsyncResult: - cls._access_token = getattr(auth_result, "api_key") + cls._access_token = getattr(auth_result, "access_token", None) cls._cookies = getattr(auth_result, "cookies") async for chunk in cls.create(model, messages, **kwargs): yield chunk @@ -110,8 +118,6 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): media: MediaListType = None, conversation: BaseConversation = None, return_conversation: bool = True, - useridentitytype: str = "google", - api_key: str = None, **kwargs ) -> AsyncResult: if not has_curl_cffi: @@ -120,20 +126,19 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): websocket_url = cls.websocket_url headers = None if cls._access_token or cls.needs_auth: - if api_key is not None: - cls._access_token = api_key if cls._access_token is None: try: - cls._access_token, cls._cookies = readHAR(cls.url) + cls._access_token, cls._useridentitytype, cls._cookies = readHAR(cls.url) except NoValidHarFileError as h: debug.log(f"Copilot: {h}") if has_nodriver: yield RequestLogin(cls.label, os.environ.get("G4F_LOGIN_URL", "")) - cls._access_token, cls._cookies = await get_access_token_and_cookies(cls.url, proxy) + cls._access_token, cls._useridentitytype, cls._cookies = await get_access_token_and_cookies(cls.url, proxy) else: raise h - websocket_url = f"{websocket_url}&accessToken={quote(cls._access_token)}&X-UserIdentityType={quote(useridentitytype)}" + websocket_url = f"{websocket_url}&accessToken={quote(cls._access_token)}" + (f"&X-UserIdentityType={quote(cls._useridentitytype)}" if cls._useridentitytype else "") headers = {"authorization": f"Bearer {cls._access_token}"} + async with AsyncSession( timeout=timeout, @@ -142,31 +147,64 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): headers=headers, cookies=cls._cookies, ) as session: - if cls._access_token is not None: - cls._cookies = session.cookies.jar if hasattr(session.cookies, "jar") else session.cookies - response = await session.get("https://copilot.microsoft.com/c/api/user?api-version=2", headers={"x-useridentitytype": useridentitytype}) - if response.status_code == 401: - raise MissingAuthError("Status 401: Invalid access token") - response.raise_for_status() - user = response.json().get('firstName') - if user is None: - if cls.needs_auth: - raise MissingAuthError("No user found, please login first") - cls._access_token = None - else: - debug.log(f"Copilot: User: {user}") + cls._cookies = session.cookies.jar if hasattr(session.cookies, "jar") else session.cookies if conversation is None: - response = await session.post(cls.conversation_url, headers={"x-useridentitytype": useridentitytype} if cls._access_token else {}) + # har_file = os.path.join(os.path.dirname(__file__), "copilot", "copilot.microsoft.com.har") + # with open(har_file, "r") as f: + # har_entries = json.load(f).get("log", {}).get("entries", []) + # conversationId = "" + # for har_entry in har_entries: + # if har_entry.get("request"): + # if "/c/api/" in har_entry.get("request").get("url", ""): + # try: + # response = await getattr(session, har_entry.get("request").get("method").lower())( + # har_entry.get("request").get("url", "").replace("cvqBJw7kyPAp1RoMTmzC6", conversationId), + # data=har_entry.get("request").get("postData", {}).get("text"), + # headers={header["name"]: header["value"] for header in har_entry.get("request").get("headers")} + # ) + # response.raise_for_status() + # if response.headers.get("content-type", "").startswith("application/json"): + # conversationId = response.json().get("currentConversationId", conversationId) + # except Exception as e: + # debug.log(f"Copilot: Failed request to {har_entry.get('request').get('url', '')}: {e}") + data = { + "timeZone": "America/Los_Angeles", + "startNewConversation": True, + "teenSupportEnabled": True, + "correctPersonalizationSetting": True, + "performUserMerge": True, + "deferredDataUseCapable": True + } + response = await session.post( + "https://copilot.microsoft.com/c/api/start", + headers={ + "content-type": "application/json", + **({"x-useridentitytype": cls._useridentitytype} if cls._useridentitytype else {}), + **(headers or {}) + }, + json=data + ) response.raise_for_status() - conversation_id = response.json().get("id") - conversation = Conversation(conversation_id) - debug.log(f"Copilot: Created conversation: {conversation_id}") + conversation = Conversation(response.json().get("currentConversationId")) + debug.log(f"Copilot: Created conversation: {conversation.conversation_id}") else: - conversation_id = conversation.conversation_id - debug.log(f"Copilot: Use conversation: {conversation_id}") + debug.log(f"Copilot: Use conversation: {conversation.conversation_id}") if return_conversation: yield conversation + # response = await session.get("https://copilot.microsoft.com/c/api/user?api-version=4", headers={"x-useridentitytype": useridentitytype} if cls._access_token else {}) + # if response.status_code == 401: + # raise MissingAuthError("Status 401: Invalid session") + # response.raise_for_status() + # print(response.json()) + # user = response.json().get('firstName') + # if user is None: + # if cls.needs_auth: + # raise MissingAuthError("No user found, please login first") + # cls._access_token = None + # else: + # debug.log(f"Copilot: User: {user}") + uploaded_attachments = [] if cls._access_token is not None: # Upload regular media (images) @@ -178,7 +216,7 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): headers={ "content-type": is_accepted_format(data), "content-length": str(len(data)), - **({"x-useridentitytype": useridentitytype} if cls._access_token else {}) + **({"x-useridentitytype": cls._useridentitytype} if cls._useridentitytype else {}) }, data=data ) @@ -201,7 +239,7 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): response = await session.post( "https://copilot.microsoft.com/c/api/attachments", multipart=data, - headers={"x-useridentitytype": useridentitytype} + headers={"x-useridentitytype": cls._useridentitytype} if cls._useridentitytype else {} ) response.raise_for_status() data = response.json() @@ -225,7 +263,7 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): mode = "chat" await wss.send(json.dumps({ "event": "send", - "conversationId": conversation_id, + "conversationId": conversation.conversation_id, "content": [*uploaded_attachments, { "type": "text", "text": prompt, @@ -285,6 +323,7 @@ async def get_access_token_and_cookies(url: str, proxy: str = None): try: page = await browser.get(url) access_token = None + useridentitytype = None while access_token is None: for _ in range(2): await asyncio.sleep(3) @@ -292,9 +331,12 @@ async def get_access_token_and_cookies(url: str, proxy: str = None): (() => { for (var i = 0; i < localStorage.length; i++) { try { - item = JSON.parse(localStorage.getItem(localStorage.key(i))); + const key = localStorage.key(i); + const item = JSON.parse(localStorage.getItem(key)); if (item?.body?.access_token) { - return item.body.access_token; + return ["" + item?.body?.access_token, "google"]; + } else if (key.includes("chatai")) { + return "" + item.secret; } } catch(e) {} } @@ -302,16 +344,24 @@ async def get_access_token_and_cookies(url: str, proxy: str = None): """) if access_token is None: await asyncio.sleep(1) + continue + if isinstance(access_token, list): + access_token, useridentitytype = access_token + access_token = access_token.get("value") if isinstance(access_token, dict) else access_token + useridentitytype = useridentitytype.get("value") if isinstance(useridentitytype, dict) else None + print(f"Got access token: {access_token[:10]}..., useridentitytype: {useridentitytype}") + break cookies = {} for c in await page.send(nodriver.cdp.network.get_cookies([url])): cookies[c.name] = c.value stop_browser() - return access_token, cookies + return access_token, useridentitytype, cookies finally: stop_browser() def readHAR(url: str): api_key = None + useridentitytype = None cookies = None for path in get_har_files(): with open(path, 'rb') as file: @@ -325,9 +375,11 @@ def readHAR(url: str): v_headers = get_headers(v) if "authorization" in v_headers: api_key = v_headers["authorization"].split(maxsplit=1).pop() + if "x-useridentitytype" in v_headers: + useridentitytype = v_headers["x-useridentitytype"] if v['request']['cookies']: cookies = {c['name']: c['value'] for c in v['request']['cookies']} if api_key is None: raise NoValidHarFileError("No access token found in .har files") - return api_key, cookies \ No newline at end of file + return api_key, useridentitytype, cookies \ No newline at end of file diff --git a/g4f/Provider/__init__.py b/g4f/Provider/__init__.py index 61b9dd33..1b78b6ea 100644 --- a/g4f/Provider/__init__.py +++ b/g4f/Provider/__init__.py @@ -39,6 +39,7 @@ except ImportError as e: from .deprecated.ARTA import ARTA from .deprecated.Blackbox import Blackbox from .deprecated.DuckDuckGo import DuckDuckGo +from .deprecated.Kimi import Kimi from .deprecated.PerplexityLabs import PerplexityLabs from .ApiAirforce import ApiAirforce @@ -48,7 +49,6 @@ from .Copilot import Copilot from .DeepInfra import DeepInfra from .EasyChat import EasyChat from .GLM import GLM -from .Kimi import Kimi from .LambdaChat import LambdaChat from .Mintlify import Mintlify from .OIVSCodeSer import OIVSCodeSer2, OIVSCodeSer0501 diff --git a/g4f/Provider/Kimi.py b/g4f/Provider/deprecated/Kimi.py similarity index 90% rename from g4f/Provider/Kimi.py rename to g4f/Provider/deprecated/Kimi.py index bbcd8b12..b3ebcc05 100644 --- a/g4f/Provider/Kimi.py +++ b/g4f/Provider/deprecated/Kimi.py @@ -3,16 +3,16 @@ from __future__ import annotations import random from typing import AsyncIterator -from .base_provider import AsyncAuthedProvider, ProviderModelMixin -from ..providers.helper import get_last_user_message -from ..requests import StreamSession, sse_stream, raise_for_status -from ..providers.response import AuthResult, TitleGeneration, JsonConversation, FinishReason -from ..typing import AsyncResult, Messages -from ..errors import MissingAuthError +from ..base_provider import AsyncAuthedProvider, ProviderModelMixin +from ...providers.helper import get_last_user_message +from ...requests import StreamSession, sse_stream, raise_for_status +from ...providers.response import AuthResult, TitleGeneration, JsonConversation, FinishReason +from ...typing import AsyncResult, Messages +from ...errors import MissingAuthError class Kimi(AsyncAuthedProvider, ProviderModelMixin): url = "https://www.kimi.com" - working = True + working = False active_by_default = True default_model = "kimi-k2" models = [default_model] diff --git a/g4f/Provider/needs_auth/CopilotAccount.py b/g4f/Provider/needs_auth/CopilotAccount.py index 3c72ae79..eee1fb41 100644 --- a/g4f/Provider/needs_auth/CopilotAccount.py +++ b/g4f/Provider/needs_auth/CopilotAccount.py @@ -29,7 +29,7 @@ class CopilotAccount(Copilot): debug.log(f"Copilot: {h}") if has_nodriver: yield RequestLogin(cls.label, os.environ.get("G4F_LOGIN_URL", "")) - cls._access_token, cls._cookies = await get_access_token_and_cookies(cls.url, proxy) + cls._access_token, cls._useridentitytype, cls._cookies = await get_access_token_and_cookies(cls.url, proxy) else: raise h yield AuthResult( diff --git a/g4f/Provider/template/OpenaiTemplate.py b/g4f/Provider/template/OpenaiTemplate.py index a3d0b7e8..4da92211 100644 --- a/g4f/Provider/template/OpenaiTemplate.py +++ b/g4f/Provider/template/OpenaiTemplate.py @@ -125,9 +125,9 @@ class OpenaiTemplate(AsyncGeneratorProvider, ProviderModelMixin, RaiseErrorMixin yield ImageResponse([image["url"] for image in data["data"]], prompt) return - if stream: + if stream or stream is None: kwargs.setdefault("stream_options", {"include_usage": True}) - extra_parameters = filter_none(**{key: kwargs.get(key) for key in extra_parameters}) + extra_parameters = {key: kwargs[key] for key in extra_parameters if key in kwargs} if extra_body is None: extra_body = {} data = filter_none( diff --git a/g4f/cli/client.py b/g4f/cli/client.py index d1acb1c7..fb18180d 100644 --- a/g4f/cli/client.py +++ b/g4f/cli/client.py @@ -55,7 +55,7 @@ class ConversationManager: try: with open(self.file_path, 'r', encoding='utf-8') as f: data = json.load(f) - if self.model is None: + if self.provider is None and self.model is None: self.model = data.get("model") if self.provider is None: self.provider = data.get("provider") diff --git a/g4f/tools/run_tools.py b/g4f/tools/run_tools.py index 1e36f718..6639d71b 100644 --- a/g4f/tools/run_tools.py +++ b/g4f/tools/run_tools.py @@ -3,12 +3,19 @@ from __future__ import annotations import os import re import json +import math import asyncio import time import datetime from pathlib import Path from typing import Optional, AsyncIterator, Iterator, Dict, Any, Tuple, List, Union +try: + from aiofile import async_open + has_aiofile = True +except ImportError: + has_aiofile = False + from ..typing import Messages from ..providers.helper import filter_none from ..providers.asyncio import to_async_iterator @@ -254,25 +261,45 @@ async def async_iter_run_tools( response = to_async_iterator(provider.async_create_function(model=model, messages=messages, **kwargs)) try: - model_info = model + usage_model = model + usage_provider = provider.__name__ + completion_tokens = 0 + usage = None async for chunk in response: - if isinstance(chunk, ProviderInfo): - model_info = getattr(chunk, 'model', model_info) + if isinstance(chunk, FinishReason): + if sources is not None: + yield sources + sources = None + yield chunk + continue + elif isinstance(chunk, Sources): + sources = None + elif isinstance(chunk, str): + completion_tokens += 1 + elif isinstance(chunk, ProviderInfo): + usage_model = getattr(chunk, "model", usage_model) + usage_provider = getattr(chunk, "name", usage_provider) elif isinstance(chunk, Usage): - usage = {"user": kwargs.get("user"), "model": model_info, "provider": provider.get_parent(), **chunk.get_dict()} - usage_dir = Path(get_cookies_dir()) / ".usage" - usage_file = usage_dir / f"{datetime.date.today()}.jsonl" - usage_dir.mkdir(parents=True, exist_ok=True) - with usage_file.open("a" if usage_file.exists() else "w") as f: - f.write(f"{json.dumps(usage)}\n") + usage = chunk yield chunk - provider.live += 1 + if has_aiofile: + if usage is None: + usage = get_usage(messages, completion_tokens) + yield usage + usage = {"user": kwargs.get("user"), "model": usage_model, "provider": usage_provider, **usage.get_dict()} + usage_dir = Path(get_cookies_dir()) / ".usage" + usage_file = usage_dir / f"{datetime.date.today()}.jsonl" + usage_dir.mkdir(parents=True, exist_ok=True) + async with async_open(usage_file, "a") as f: + await f.write(f"{json.dumps(usage)}\n") + if completion_tokens > 0: + provider.live += 1 except: provider.live -= 1 raise # Yield sources if available - if sources: + if sources is not None: yield sources def iter_run_tools( @@ -340,10 +367,13 @@ def iter_run_tools( messages[-1]["content"] = last_message + BUCKET_INSTRUCTIONS # Process response chunks - thinking_start_time = 0 - processor = ThinkingProcessor() - model_info = model try: + thinking_start_time = 0 + processor = ThinkingProcessor() + usage_model = model + usage_provider = provider.__name__ + completion_tokens = 0 + usage = None for chunk in provider.create_function(model=model, messages=messages, provider=provider, **kwargs): if isinstance(chunk, FinishReason): if sources is not None: @@ -353,28 +383,58 @@ def iter_run_tools( continue elif isinstance(chunk, Sources): sources = None + elif isinstance(chunk, str): + completion_tokens += 1 elif isinstance(chunk, ProviderInfo): - model_info = getattr(chunk, 'model', model_info) + usage_model = getattr(chunk, "model", usage_model) + usage_provider = getattr(chunk, "name", usage_provider) elif isinstance(chunk, Usage): - usage = {"user": kwargs.get("user"), "model": model_info, "provider": provider.get_parent(), **chunk.get_dict()} - usage_dir = Path(get_cookies_dir()) / ".usage" - usage_file = usage_dir / f"{datetime.date.today()}.jsonl" - usage_dir.mkdir(parents=True, exist_ok=True) - with usage_file.open("a" if usage_file.exists() else "w") as f: - f.write(f"{json.dumps(usage)}\n") + usage = chunk if not isinstance(chunk, str): yield chunk continue thinking_start_time, results = processor.process_thinking_chunk(chunk, thinking_start_time) - for result in results: yield result - - provider.live += 1 + if usage is None: + usage = get_usage(messages, completion_tokens) + yield usage + usage = {"user": kwargs.get("user"), "model": usage_model, "provider": usage_provider, **usage.get_dict()} + usage_dir = Path(get_cookies_dir()) / ".usage" + usage_file = usage_dir / f"{datetime.date.today()}.jsonl" + usage_dir.mkdir(parents=True, exist_ok=True) + with usage_file.open("a") as f: + f.write(f"{json.dumps(usage)}\n") + if completion_tokens > 0: + provider.live += 1 except: provider.live -= 1 raise if sources is not None: yield sources + +def caculate_prompt_tokens(messages: Messages) -> int: + """Calculate the total number of tokens in messages""" + token_count = 1 # Bos Token + for message in messages: + if isinstance(message.get("content"), str): + token_count += math.floor(len(message["content"].encode("utf-8")) / 4) + token_count += 4 # Role and start/end message token + elif isinstance(message.get("content"), list): + for item in message["content"]: + if isinstance(item, str): + token_count += math.floor(len(item.encode("utf-8")) / 4) + elif isinstance(item, dict) and "text" in item and isinstance(item["text"], str): + token_count += math.floor(len(item["text"].encode("utf-8")) / 4) + token_count += 4 # Role and start/end message token + return token_count + +def get_usage(messages: Messages, completion_tokens: int) -> Usage: + prompt_tokens = caculate_prompt_tokens(messages) + return Usage( + completion_tokens=completion_tokens, + prompt_tokens=prompt_tokens, + total_tokens=prompt_tokens + completion_tokens + ) \ No newline at end of file