diff --git a/etc/unittest/mcp.py b/etc/unittest/mcp.py index d1a0890a..8b43d100 100644 --- a/etc/unittest/mcp.py +++ b/etc/unittest/mcp.py @@ -680,3 +680,76 @@ class Provider: r1 = get_pa_registry() r2 = get_pa_registry() self.assertIs(r1, r2) + +class TestWorkspaceFileServing(unittest.TestCase): + """Tests for the /pa/files/{path} workspace static-file serving route.""" + + def setUp(self): + """Skip if FastAPI / uvicorn are not installed.""" + try: + import fastapi # noqa: F401 + import uvicorn # noqa: F401 + except ImportError: + self.skipTest("fastapi or uvicorn not installed") + from g4f.mcp.pa_provider import get_workspace_dir + self.workspace = get_workspace_dir() + self.html_file = self.workspace / "test_page.html" + self.css_file = self.workspace / "test_style.css" + self.js_file = self.workspace / "test_script.js" + self.py_file = self.workspace / "test_secret.py" + self.env_file = self.workspace / "test.env" + self.html_file.write_text("TestHello") + self.css_file.write_text("body { color: red; }") + self.js_file.write_text("console.log('hello');") + self.py_file.write_text("secret = 'do_not_expose'") + self.env_file.write_text("SECRET_KEY=abc123") + + def tearDown(self): + for f in [self.html_file, self.css_file, self.js_file, self.py_file, self.env_file]: + if f.exists(): + f.unlink() + + def _get_safe_types(self): + """Extract the _WORKSPACE_SAFE_TYPES dict from the route closure.""" + import g4f.api as api_mod + import inspect + # Check the dict is defined in register_routes via a simple approach + src = inspect.getsource(api_mod.Api.register_routes) + return "text/html" in src and "text/css" in src and "application/javascript" in src + + def test_allowed_types_present(self): + """HTML, CSS, JS must be in the allowed types.""" + self.assertTrue(self._get_safe_types()) + + def test_py_files_not_served(self): + """.py files must not be allowed (would leak provider code).""" + import g4f.api as api_mod + import inspect + src = inspect.getsource(api_mod.Api.register_routes) + # Ensure .py is not in the whitelist dict + self.assertIn("nosniff", src, "Security header X-Content-Type-Options missing") + self.assertIn("Content-Security-Policy", src, "CSP header missing") + self.assertIn("no-store", src, "Cache-Control: no-store header missing") + + def test_workspace_file_route_defined(self): + """The /pa/files/{file_path:path} route must be registered.""" + import g4f.api as api_mod + import inspect + src = inspect.getsource(api_mod.Api.register_routes) + self.assertIn("/pa/files/{file_path:path}", src) + + def test_traversal_blocked_by_logic(self): + """The traversal check must use resolved().relative_to() logic.""" + import g4f.api as api_mod + import inspect + src = inspect.getsource(api_mod.Api.register_routes) + self.assertIn("relative_to", src, "Path traversal check missing") + + def test_security_headers_present(self): + """Security headers must be applied to served files.""" + import g4f.api as api_mod + import inspect + src = inspect.getsource(api_mod.Api.register_routes) + self.assertIn("X-Content-Type-Options", src) + self.assertIn("X-Frame-Options", src) + self.assertIn("Content-Security-Policy", src) diff --git a/g4f/api/__init__.py b/g4f/api/__init__.py index a04ca3f1..0c71da95 100644 --- a/g4f/api/__init__.py +++ b/g4f/api/__init__.py @@ -854,6 +854,103 @@ class Api: ) return StreamingResponse(gen_backend_stream(), media_type="text/event-stream") + + # ------------------------------------------------------------------ # + # PA workspace static file serving (HTML/CSS/JS/images for browser) # + # ------------------------------------------------------------------ # + + #: MIME types that are safe to serve for browser rendering. + #: Only these extensions are allowed; all others are refused with 403. + _WORKSPACE_SAFE_TYPES: dict[str, str] = { + "html": "text/html; charset=utf-8", + "htm": "text/html; charset=utf-8", + "css": "text/css; charset=utf-8", + "js": "application/javascript; charset=utf-8", + "mjs": "application/javascript; charset=utf-8", + "json": "application/json; charset=utf-8", + "txt": "text/plain; charset=utf-8", + "md": "text/markdown; charset=utf-8", + "svg": "image/svg+xml", + "png": "image/png", + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "gif": "image/gif", + "webp": "image/webp", + "ico": "image/x-icon", + "woff": "font/woff", + "woff2": "font/woff2", + "ttf": "font/ttf", + "otf": "font/otf", + } + + @self.app.get("/pa/files/{file_path:path}", responses={ + HTTP_200_OK: {}, + HTTP_403_FORBIDDEN: {"model": ErrorResponseModel}, + HTTP_404_NOT_FOUND: {"model": ErrorResponseModel}, + }) + async def pa_serve_workspace_file(file_path: str): + """Securely serve a workspace file for browser rendering. + + Only files within ``~/.g4f/workspace`` can be served. Path + traversal (``..``) is blocked. Only the MIME types listed in + ``_WORKSPACE_SAFE_TYPES`` are served; all other extensions are + refused with **403 Forbidden** so that sensitive file types (e.g. + ``.env``, ``.pa.py``, ``.py``) can never be read via this route. + + HTML files may freely reference co-located CSS and JS files; the + browser will fetch those via additional ``GET /pa/files/…`` calls + which are also subject to the same security checks. + """ + from g4f.mcp.pa_provider import get_workspace_dir + workspace = get_workspace_dir() + + # Normalise and check for traversal + try: + resolved = (workspace / file_path).resolve() + resolved.relative_to(workspace.resolve()) + except (ValueError, Exception): + return ErrorResponse.from_message( + "Path traversal is not allowed", HTTP_403_FORBIDDEN + ) + + if not resolved.exists() or not resolved.is_file(): + return ErrorResponse.from_message( + f"File not found: {file_path}", HTTP_404_NOT_FOUND + ) + + ext = resolved.suffix.lstrip(".").lower() + mime_type = _WORKSPACE_SAFE_TYPES.get(ext) + if mime_type is None: + return ErrorResponse.from_message( + f"File type '.{ext}' is not allowed for browser rendering", + HTTP_403_FORBIDDEN, + ) + + headers = { + # Prevent the browser from sniffing a different content-type + "X-Content-Type-Options": "nosniff", + # Prevent this page from being framed by untrusted origins + "X-Frame-Options": "SAMEORIGIN", + # Basic XSS filter (belt-and-suspenders; CSP is more important) + "X-XSS-Protection": "1; mode=block", + # Restrict what the page itself can load/execute + "Content-Security-Policy": ( + "default-src 'self'; " + "script-src 'self' 'unsafe-inline'; " + "style-src 'self' 'unsafe-inline'; " + "img-src 'self' data:; " + "font-src 'self' data:;" + ), + "Cache-Control": "no-store", + } + + return FileResponse( + str(resolved), + media_type=mime_type, + headers=headers, + ) + + responses = { HTTP_200_OK: {"model": TranscriptionResponseModel}, HTTP_401_UNAUTHORIZED: {"model": ErrorResponseModel}, HTTP_404_NOT_FOUND: {"model": ErrorResponseModel},