Spaces:
Paused
Paused
Commit
·
405ca38
1
Parent(s):
a4ca470
Add Git LFS support and migrate binary files
Browse files- .dockerignore +5 -0
- .env.example +68 -0
- .github/workflows/build.yml +124 -0
- .gitignore +192 -0
- .vscode/settings.json +11 -0
- Dockerfile +99 -0
- LICENSE +21 -0
- README_DEPLOYMENT.md +70 -0
- SECURITY.md +19 -0
- app.py +17 -0
- assets/examples/test.png +3 -0
- assets/web-ui.png +3 -0
- docker-compose.yml +80 -0
- requirements.txt +10 -0
- src/__init__.py +0 -0
- src/agent/__init__.py +0 -0
- src/agent/browser_use/browser_use_agent.py +169 -0
- src/agent/deep_research/deep_research_agent.py +1261 -0
- src/browser/__init__.py +0 -0
- src/browser/custom_browser.py +109 -0
- src/browser/custom_context.py +22 -0
- src/controller/__init__.py +0 -0
- src/controller/custom_controller.py +182 -0
- src/utils/__init__.py +0 -0
- src/utils/config.py +100 -0
- src/utils/llm_provider.py +354 -0
- src/utils/mcp_client.py +254 -0
- src/utils/utils.py +39 -0
- src/webui/__init__.py +0 -0
- src/webui/components/__init__.py +0 -0
- src/webui/components/agent_settings_tab.py +269 -0
- src/webui/components/browser_settings_tab.py +161 -0
- src/webui/components/browser_use_agent_tab.py +1083 -0
- src/webui/components/deep_research_agent_tab.py +457 -0
- src/webui/components/load_save_config_tab.py +50 -0
- src/webui/interface.py +95 -0
- src/webui/webui_manager.py +122 -0
- supervisord.conf +80 -0
- tests/test_agents.py +400 -0
- tests/test_controller.py +131 -0
- tests/test_llm_api.py +159 -0
- tests/test_playwright.py +31 -0
- webui.py +19 -0
.dockerignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data
|
| 2 |
+
tmp
|
| 3 |
+
results
|
| 4 |
+
|
| 5 |
+
.env
|
.env.example
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OPENAI_ENDPOINT=https://api.openai.com/v1
|
| 2 |
+
OPENAI_API_KEY=
|
| 3 |
+
|
| 4 |
+
ANTHROPIC_API_KEY=
|
| 5 |
+
ANTHROPIC_ENDPOINT=https://api.anthropic.com
|
| 6 |
+
|
| 7 |
+
GOOGLE_API_KEY=
|
| 8 |
+
|
| 9 |
+
AZURE_OPENAI_ENDPOINT=
|
| 10 |
+
AZURE_OPENAI_API_KEY=
|
| 11 |
+
AZURE_OPENAI_API_VERSION=2025-01-01-preview
|
| 12 |
+
|
| 13 |
+
DEEPSEEK_ENDPOINT=https://api.deepseek.com
|
| 14 |
+
DEEPSEEK_API_KEY=
|
| 15 |
+
|
| 16 |
+
MISTRAL_API_KEY=
|
| 17 |
+
MISTRAL_ENDPOINT=https://api.mistral.ai/v1
|
| 18 |
+
|
| 19 |
+
OLLAMA_ENDPOINT=http://localhost:11434
|
| 20 |
+
|
| 21 |
+
ALIBABA_ENDPOINT=https://dashscope.aliyuncs.com/compatible-mode/v1
|
| 22 |
+
ALIBABA_API_KEY=
|
| 23 |
+
|
| 24 |
+
MOONSHOT_ENDPOINT=https://api.moonshot.cn/v1
|
| 25 |
+
MOONSHOT_API_KEY=
|
| 26 |
+
|
| 27 |
+
UNBOUND_ENDPOINT=https://api.getunbound.ai
|
| 28 |
+
UNBOUND_API_KEY=
|
| 29 |
+
|
| 30 |
+
SiliconFLOW_ENDPOINT=https://api.siliconflow.cn/v1/
|
| 31 |
+
SiliconFLOW_API_KEY=
|
| 32 |
+
|
| 33 |
+
IBM_ENDPOINT=https://us-south.ml.cloud.ibm.com
|
| 34 |
+
IBM_API_KEY=
|
| 35 |
+
IBM_PROJECT_ID=
|
| 36 |
+
|
| 37 |
+
GROK_ENDPOINT="https://api.x.ai/v1"
|
| 38 |
+
GROK_API_KEY=
|
| 39 |
+
|
| 40 |
+
#set default LLM
|
| 41 |
+
DEFAULT_LLM=openai
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# Set to false to disable anonymized telemetry
|
| 45 |
+
ANONYMIZED_TELEMETRY=false
|
| 46 |
+
|
| 47 |
+
# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
|
| 48 |
+
BROWSER_USE_LOGGING_LEVEL=info
|
| 49 |
+
|
| 50 |
+
# Browser settings
|
| 51 |
+
BROWSER_PATH=
|
| 52 |
+
BROWSER_USER_DATA=
|
| 53 |
+
BROWSER_DEBUGGING_PORT=9222
|
| 54 |
+
BROWSER_DEBUGGING_HOST=localhost
|
| 55 |
+
# Set to true to keep browser open between AI tasks
|
| 56 |
+
KEEP_BROWSER_OPEN=true
|
| 57 |
+
USE_OWN_BROWSER=false
|
| 58 |
+
BROWSER_CDP=
|
| 59 |
+
# Display settings
|
| 60 |
+
# Format: WIDTHxHEIGHTxDEPTH
|
| 61 |
+
RESOLUTION=1920x1080x24
|
| 62 |
+
# Width in pixels
|
| 63 |
+
RESOLUTION_WIDTH=1920
|
| 64 |
+
# Height in pixels
|
| 65 |
+
RESOLUTION_HEIGHT=1080
|
| 66 |
+
|
| 67 |
+
# VNC settings
|
| 68 |
+
VNC_PASSWORD=youvncpassword
|
.github/workflows/build.yml
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Build Docker Image
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
release:
|
| 5 |
+
types: [published]
|
| 6 |
+
push:
|
| 7 |
+
branches: [main]
|
| 8 |
+
|
| 9 |
+
env:
|
| 10 |
+
GITHUB_CR_REPO: ghcr.io/${{ github.repository }}
|
| 11 |
+
|
| 12 |
+
jobs:
|
| 13 |
+
build:
|
| 14 |
+
runs-on: ubuntu-latest
|
| 15 |
+
strategy:
|
| 16 |
+
fail-fast: false
|
| 17 |
+
matrix:
|
| 18 |
+
platform:
|
| 19 |
+
- linux/amd64
|
| 20 |
+
- linux/arm64
|
| 21 |
+
steps:
|
| 22 |
+
- name: Prepare
|
| 23 |
+
run: |
|
| 24 |
+
platform=${{ matrix.platform }}
|
| 25 |
+
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
| 26 |
+
|
| 27 |
+
- name: Docker meta
|
| 28 |
+
id: meta
|
| 29 |
+
uses: docker/metadata-action@v5
|
| 30 |
+
with:
|
| 31 |
+
images: |
|
| 32 |
+
${{ env.GITHUB_CR_REPO }}
|
| 33 |
+
|
| 34 |
+
- name: Login to GHCR
|
| 35 |
+
uses: docker/login-action@v3
|
| 36 |
+
with:
|
| 37 |
+
registry: ghcr.io
|
| 38 |
+
username: ${{ github.repository_owner }}
|
| 39 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
| 40 |
+
|
| 41 |
+
- name: Set up QEMU
|
| 42 |
+
uses: docker/setup-qemu-action@v3
|
| 43 |
+
|
| 44 |
+
- name: Set up Docker Buildx
|
| 45 |
+
uses: docker/setup-buildx-action@v3
|
| 46 |
+
|
| 47 |
+
- name: Build and push by digest
|
| 48 |
+
id: build
|
| 49 |
+
uses: docker/build-push-action@v6
|
| 50 |
+
with:
|
| 51 |
+
platforms: ${{ matrix.platform }}
|
| 52 |
+
labels: ${{ steps.meta.outputs.labels }}
|
| 53 |
+
tags: |
|
| 54 |
+
${{ env.GITHUB_CR_REPO }}
|
| 55 |
+
build-args: |
|
| 56 |
+
TARGETPLATFORM=${{ matrix.platform }}
|
| 57 |
+
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
|
| 58 |
+
|
| 59 |
+
- name: Export digest
|
| 60 |
+
run: |
|
| 61 |
+
mkdir -p ${{ runner.temp }}/digests
|
| 62 |
+
digest="${{ steps.build.outputs.digest }}"
|
| 63 |
+
touch "${{ runner.temp }}/digests/${digest#sha256:}"
|
| 64 |
+
|
| 65 |
+
- name: Upload digest
|
| 66 |
+
uses: actions/upload-artifact@v4
|
| 67 |
+
with:
|
| 68 |
+
name: digests-${{ env.PLATFORM_PAIR }}
|
| 69 |
+
path: ${{ runner.temp }}/digests/*
|
| 70 |
+
if-no-files-found: error
|
| 71 |
+
retention-days: 1
|
| 72 |
+
|
| 73 |
+
merge:
|
| 74 |
+
runs-on: ubuntu-latest
|
| 75 |
+
needs:
|
| 76 |
+
- build
|
| 77 |
+
steps:
|
| 78 |
+
- name: Download digests
|
| 79 |
+
uses: actions/download-artifact@v4
|
| 80 |
+
with:
|
| 81 |
+
path: ${{ runner.temp }}/digests
|
| 82 |
+
pattern: digests-*
|
| 83 |
+
merge-multiple: true
|
| 84 |
+
|
| 85 |
+
- name: Login to GHCR
|
| 86 |
+
uses: docker/login-action@v3
|
| 87 |
+
with:
|
| 88 |
+
registry: ghcr.io
|
| 89 |
+
username: ${{ github.repository_owner }}
|
| 90 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
| 91 |
+
|
| 92 |
+
- name: Set up Docker Buildx
|
| 93 |
+
uses: docker/setup-buildx-action@v3
|
| 94 |
+
|
| 95 |
+
- name: Docker meta
|
| 96 |
+
id: meta
|
| 97 |
+
uses: docker/metadata-action@v5
|
| 98 |
+
with:
|
| 99 |
+
images: |
|
| 100 |
+
${{ env.GITHUB_CR_REPO }}
|
| 101 |
+
tags: |
|
| 102 |
+
type=ref,event=branch
|
| 103 |
+
type=ref,event=pr
|
| 104 |
+
type=semver,pattern={{version}}
|
| 105 |
+
type=semver,pattern={{major}}
|
| 106 |
+
|
| 107 |
+
- name: Docker tags
|
| 108 |
+
run: |
|
| 109 |
+
tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")
|
| 110 |
+
if [ -z "$tags" ]; then
|
| 111 |
+
echo "DOCKER_METADATA_OUTPUT_VERSION=${{ github.ref_name }}" >> $GITHUB_ENV
|
| 112 |
+
tags="-t ${{ env.GITHUB_CR_REPO }}:${{ github.ref_name }}"
|
| 113 |
+
fi
|
| 114 |
+
echo "DOCKER_METADATA_TAGS=$tags" >> $GITHUB_ENV
|
| 115 |
+
|
| 116 |
+
- name: Create manifest list and push
|
| 117 |
+
working-directory: ${{ runner.temp }}/digests
|
| 118 |
+
run: |
|
| 119 |
+
docker buildx imagetools create ${{ env.DOCKER_METADATA_TAGS }} \
|
| 120 |
+
$(printf '${{ env.GITHUB_CR_REPO }}@sha256:%s ' *)
|
| 121 |
+
|
| 122 |
+
- name: Inspect image
|
| 123 |
+
run: |
|
| 124 |
+
docker buildx imagetools inspect ${{ env.GITHUB_CR_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION }}
|
.gitignore
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 110 |
+
.pdm.toml
|
| 111 |
+
.pdm-python
|
| 112 |
+
.pdm-build/
|
| 113 |
+
|
| 114 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 115 |
+
__pypackages__/
|
| 116 |
+
|
| 117 |
+
# Celery stuff
|
| 118 |
+
celerybeat-schedule
|
| 119 |
+
celerybeat.pid
|
| 120 |
+
|
| 121 |
+
# SageMath parsed files
|
| 122 |
+
*.sage.py
|
| 123 |
+
|
| 124 |
+
# Environments
|
| 125 |
+
.env
|
| 126 |
+
.venv
|
| 127 |
+
env/
|
| 128 |
+
venv/
|
| 129 |
+
ENV/
|
| 130 |
+
env.bak/
|
| 131 |
+
venv.bak/
|
| 132 |
+
test_env/
|
| 133 |
+
myenv
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# Spyder project settings
|
| 137 |
+
.spyderproject
|
| 138 |
+
.spyproject
|
| 139 |
+
|
| 140 |
+
# Rope project settings
|
| 141 |
+
.ropeproject
|
| 142 |
+
|
| 143 |
+
# mkdocs documentation
|
| 144 |
+
/site
|
| 145 |
+
|
| 146 |
+
# mypy
|
| 147 |
+
.mypy_cache/
|
| 148 |
+
.dmypy.json
|
| 149 |
+
dmypy.json
|
| 150 |
+
|
| 151 |
+
# Pyre type checker
|
| 152 |
+
.pyre/
|
| 153 |
+
|
| 154 |
+
# pytype static type analyzer
|
| 155 |
+
.pytype/
|
| 156 |
+
|
| 157 |
+
# Cython debug symbols
|
| 158 |
+
cython_debug/
|
| 159 |
+
|
| 160 |
+
# PyCharm
|
| 161 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 162 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 163 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 164 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 165 |
+
.idea/
|
| 166 |
+
temp
|
| 167 |
+
tmp
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
.DS_Store
|
| 171 |
+
|
| 172 |
+
private_example.py
|
| 173 |
+
private_example
|
| 174 |
+
|
| 175 |
+
browser_cookies.json
|
| 176 |
+
cookies.json
|
| 177 |
+
AgentHistory.json
|
| 178 |
+
cv_04_24.pdf
|
| 179 |
+
AgentHistoryList.json
|
| 180 |
+
*.gif
|
| 181 |
+
|
| 182 |
+
# For Sharing (.pem files)
|
| 183 |
+
.gradio/
|
| 184 |
+
|
| 185 |
+
# For Docker
|
| 186 |
+
data/
|
| 187 |
+
|
| 188 |
+
# For Config Files (Current Settings)
|
| 189 |
+
.config.pkl
|
| 190 |
+
*.pdf
|
| 191 |
+
|
| 192 |
+
workflow
|
.vscode/settings.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python.analysis.typeCheckingMode": "basic",
|
| 3 |
+
"[python]": {
|
| 4 |
+
"editor.defaultFormatter": "charliermarsh.ruff",
|
| 5 |
+
"editor.formatOnSave": true,
|
| 6 |
+
"editor.codeActionsOnSave": {
|
| 7 |
+
"source.fixAll.ruff": "explicit",
|
| 8 |
+
"source.organizeImports.ruff": "explicit"
|
| 9 |
+
}
|
| 10 |
+
}
|
| 11 |
+
}
|
Dockerfile
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Set platform for multi-arch builds (Docker Buildx will set this)
|
| 4 |
+
ARG TARGETPLATFORM
|
| 5 |
+
ARG NODE_MAJOR=20
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
wget \
|
| 10 |
+
netcat-traditional \
|
| 11 |
+
gnupg \
|
| 12 |
+
curl \
|
| 13 |
+
unzip \
|
| 14 |
+
xvfb \
|
| 15 |
+
libgconf-2-4 \
|
| 16 |
+
libxss1 \
|
| 17 |
+
libnss3 \
|
| 18 |
+
libnspr4 \
|
| 19 |
+
libasound2 \
|
| 20 |
+
libatk1.0-0 \
|
| 21 |
+
libatk-bridge2.0-0 \
|
| 22 |
+
libcups2 \
|
| 23 |
+
libdbus-1-3 \
|
| 24 |
+
libdrm2 \
|
| 25 |
+
libgbm1 \
|
| 26 |
+
libgtk-3-0 \
|
| 27 |
+
libxcomposite1 \
|
| 28 |
+
libxdamage1 \
|
| 29 |
+
libxfixes3 \
|
| 30 |
+
libxrandr2 \
|
| 31 |
+
xdg-utils \
|
| 32 |
+
fonts-liberation \
|
| 33 |
+
dbus \
|
| 34 |
+
xauth \
|
| 35 |
+
x11vnc \
|
| 36 |
+
tigervnc-tools \
|
| 37 |
+
supervisor \
|
| 38 |
+
net-tools \
|
| 39 |
+
procps \
|
| 40 |
+
git \
|
| 41 |
+
python3-numpy \
|
| 42 |
+
fontconfig \
|
| 43 |
+
fonts-dejavu \
|
| 44 |
+
fonts-dejavu-core \
|
| 45 |
+
fonts-dejavu-extra \
|
| 46 |
+
vim \
|
| 47 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 48 |
+
|
| 49 |
+
# Install noVNC
|
| 50 |
+
RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
|
| 51 |
+
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
|
| 52 |
+
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
|
| 53 |
+
|
| 54 |
+
# Install Node.js using NodeSource PPA
|
| 55 |
+
RUN mkdir -p /etc/apt/keyrings \
|
| 56 |
+
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
| 57 |
+
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
|
| 58 |
+
&& apt-get update \
|
| 59 |
+
&& apt-get install nodejs -y \
|
| 60 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 61 |
+
|
| 62 |
+
# Verify Node.js and npm installation (optional, but good for debugging)
|
| 63 |
+
RUN node -v && npm -v && npx -v
|
| 64 |
+
|
| 65 |
+
# Set up working directory
|
| 66 |
+
WORKDIR /app
|
| 67 |
+
|
| 68 |
+
# Copy requirements and install Python dependencies
|
| 69 |
+
COPY requirements.txt .
|
| 70 |
+
|
| 71 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 72 |
+
|
| 73 |
+
# Install playwright browsers and dependencies
|
| 74 |
+
# playwright documentation suggests PLAYWRIGHT_BROWSERS_PATH is still relevant
|
| 75 |
+
# or that playwright installs to a similar default location that Playwright would.
|
| 76 |
+
# Let's assume playwright respects PLAYWRIGHT_BROWSERS_PATH or its default install location is findable.
|
| 77 |
+
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-browsers
|
| 78 |
+
RUN mkdir -p $PLAYWRIGHT_BROWSERS_PATH
|
| 79 |
+
|
| 80 |
+
# Install recommended: Google Chrome (instead of just Chromium for better undetectability)
|
| 81 |
+
# The 'playwright install chrome' command might download and place it.
|
| 82 |
+
# The '--with-deps' equivalent for playwright install is to run 'playwright install-deps chrome' after.
|
| 83 |
+
# RUN playwright install chrome --with-deps
|
| 84 |
+
|
| 85 |
+
# Alternative: Install Chromium if Google Chrome is problematic in certain environments
|
| 86 |
+
RUN playwright install chromium --with-deps
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# Copy the application code
|
| 90 |
+
COPY . .
|
| 91 |
+
|
| 92 |
+
# Set up supervisor configuration
|
| 93 |
+
RUN mkdir -p /var/log/supervisor
|
| 94 |
+
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
| 95 |
+
|
| 96 |
+
EXPOSE 7788 6080 5901 9222
|
| 97 |
+
|
| 98 |
+
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
| 99 |
+
#CMD ["/bin/bash"]
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 Browser Use Inc.
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README_DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Browser Automation WebUI - Deployment Guide
|
| 2 |
+
|
| 3 |
+
## Deploying to Hugging Face Spaces
|
| 4 |
+
|
| 5 |
+
### Prerequisites
|
| 6 |
+
- A Hugging Face account
|
| 7 |
+
- Your code pushed to a Git repository
|
| 8 |
+
|
| 9 |
+
### Steps to Deploy
|
| 10 |
+
|
| 11 |
+
1. **Create a new Space on Hugging Face**
|
| 12 |
+
- Go to https://huggingface.co/spaces
|
| 13 |
+
- Click "Create new Space"
|
| 14 |
+
- Choose "Gradio" as the SDK
|
| 15 |
+
- Select your repository or create a new one
|
| 16 |
+
|
| 17 |
+
2. **File Structure for Deployment**
|
| 18 |
+
```
|
| 19 |
+
web-ui/
|
| 20 |
+
├── app.py # Main entry point (created)
|
| 21 |
+
├── requirements.txt # Dependencies
|
| 22 |
+
├── src/ # Source code
|
| 23 |
+
└── README.md # Documentation
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
3. **Key Files for Deployment**
|
| 27 |
+
- `app.py`: Main entry point that Gradio will use
|
| 28 |
+
- `requirements.txt`: All necessary dependencies
|
| 29 |
+
- `src/`: Your source code directory
|
| 30 |
+
|
| 31 |
+
### Troubleshooting the "Failed to canonicalize script path" Error
|
| 32 |
+
|
| 33 |
+
This error typically occurs when:
|
| 34 |
+
- Gradio can't find the main entry point
|
| 35 |
+
- Import paths are not properly configured
|
| 36 |
+
- File structure doesn't match deployment expectations
|
| 37 |
+
|
| 38 |
+
**Solution**: The `app.py` file has been created to serve as the proper entry point for Gradio deployment.
|
| 39 |
+
|
| 40 |
+
### Environment Variables
|
| 41 |
+
|
| 42 |
+
If your app requires environment variables, you can set them in the Hugging Face Space settings:
|
| 43 |
+
- Go to your Space settings
|
| 44 |
+
- Navigate to "Repository secrets"
|
| 45 |
+
- Add any required environment variables
|
| 46 |
+
|
| 47 |
+
### Local Testing
|
| 48 |
+
|
| 49 |
+
To test the deployment locally before pushing:
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
cd web-ui
|
| 53 |
+
python app.py
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
This should start the Gradio interface without the canonicalization error.
|
| 57 |
+
|
| 58 |
+
### Common Issues and Solutions
|
| 59 |
+
|
| 60 |
+
1. **Import Errors**: Make sure all imports use relative paths from the project root
|
| 61 |
+
2. **Missing Dependencies**: Ensure all packages are listed in `requirements.txt`
|
| 62 |
+
3. **Path Issues**: The `app.py` file includes proper path configuration
|
| 63 |
+
|
| 64 |
+
### Deployment Checklist
|
| 65 |
+
|
| 66 |
+
- [ ] `app.py` exists and is properly configured
|
| 67 |
+
- [ ] All dependencies are in `requirements.txt`
|
| 68 |
+
- [ ] All import paths are correct
|
| 69 |
+
- [ ] Environment variables are configured (if needed)
|
| 70 |
+
- [ ] Local testing works without errors
|
SECURITY.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Reporting Security Issues
|
| 2 |
+
|
| 3 |
+
If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure.
|
| 4 |
+
|
| 5 |
+
**Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.**
|
| 6 |
+
|
| 7 |
+
Instead, please open a new [Github security advisory](https://github.com/browser-use/web-ui/security/advisories/new).
|
| 8 |
+
|
| 9 |
+
Please include as much of the information listed below as you can to help me better understand and resolve the issue:
|
| 10 |
+
|
| 11 |
+
* The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
|
| 12 |
+
* Full paths of source file(s) related to the manifestation of the issue
|
| 13 |
+
* The location of the affected source code (tag/branch/commit or direct URL)
|
| 14 |
+
* Any special configuration required to reproduce the issue
|
| 15 |
+
* Step-by-step instructions to reproduce the issue
|
| 16 |
+
* Proof-of-concept or exploit code (if possible)
|
| 17 |
+
* Impact of the issue, including how an attacker might exploit the issue
|
| 18 |
+
|
| 19 |
+
This information will help me triage your report more quickly.
|
app.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
# Add the current directory to Python path to ensure imports work
|
| 5 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 6 |
+
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
from src.webui.interface import create_ui
|
| 11 |
+
|
| 12 |
+
# Create the Gradio app
|
| 13 |
+
demo = create_ui(theme_name="Ocean")
|
| 14 |
+
|
| 15 |
+
# For deployment, we need to expose the app directly
|
| 16 |
+
if __name__ == "__main__":
|
| 17 |
+
demo.launch()
|
assets/examples/test.png
ADDED
|
Git LFS Details
|
assets/web-ui.png
ADDED
|
Git LFS Details
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
# debug: docker compose run --rm -it browser-use-webui bash
|
| 3 |
+
browser-use-webui:
|
| 4 |
+
# image: ghcr.io/browser-use/web-ui # Using precompiled image
|
| 5 |
+
build:
|
| 6 |
+
context: .
|
| 7 |
+
dockerfile: Dockerfile
|
| 8 |
+
args:
|
| 9 |
+
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
| 10 |
+
ports:
|
| 11 |
+
- "7788:7788"
|
| 12 |
+
- "6080:6080"
|
| 13 |
+
- "5901:5901"
|
| 14 |
+
- "9222:9222"
|
| 15 |
+
environment:
|
| 16 |
+
# LLM API Keys & Endpoints
|
| 17 |
+
- OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
|
| 18 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
| 19 |
+
- ANTHROPIC_ENDPOINT=${ANTHROPIC_ENDPOINT:-https://api.anthropic.com}
|
| 20 |
+
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
| 21 |
+
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
|
| 22 |
+
- AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
|
| 23 |
+
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
|
| 24 |
+
- AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION:-2025-01-01-preview}
|
| 25 |
+
- DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
|
| 26 |
+
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
|
| 27 |
+
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://localhost:11434}
|
| 28 |
+
- MISTRAL_ENDPOINT=${MISTRAL_ENDPOINT:-https://api.mistral.ai/v1}
|
| 29 |
+
- MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
|
| 30 |
+
- ALIBABA_ENDPOINT=${ALIBABA_ENDPOINT:-https://dashscope.aliyuncs.com/compatible-mode/v1}
|
| 31 |
+
- ALIBABA_API_KEY=${ALIBABA_API_KEY:-}
|
| 32 |
+
- MOONSHOT_ENDPOINT=${MOONSHOT_ENDPOINT:-https://api.moonshot.cn/v1}
|
| 33 |
+
- MOONSHOT_API_KEY=${MOONSHOT_API_KEY:-}
|
| 34 |
+
- UNBOUND_ENDPOINT=${UNBOUND_ENDPOINT:-https://api.getunbound.ai}
|
| 35 |
+
- UNBOUND_API_KEY=${UNBOUND_API_KEY:-}
|
| 36 |
+
- SiliconFLOW_ENDPOINT=${SiliconFLOW_ENDPOINT:-https://api.siliconflow.cn/v1/}
|
| 37 |
+
- SiliconFLOW_API_KEY=${SiliconFLOW_API_KEY:-}
|
| 38 |
+
- IBM_ENDPOINT=${IBM_ENDPOINT:-https://us-south.ml.cloud.ibm.com}
|
| 39 |
+
- IBM_API_KEY=${IBM_API_KEY:-}
|
| 40 |
+
- IBM_PROJECT_ID=${IBM_PROJECT_ID:-}
|
| 41 |
+
|
| 42 |
+
# Application Settings
|
| 43 |
+
- ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
|
| 44 |
+
- BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
|
| 45 |
+
|
| 46 |
+
# Browser Settings
|
| 47 |
+
- BROWSER_PATH=
|
| 48 |
+
- BROWSER_USER_DATA=
|
| 49 |
+
- BROWSER_DEBUGGING_PORT=${BROWSER_DEBUGGING_PORT:-9222}
|
| 50 |
+
- BROWSER_DEBUGGING_HOST=localhost
|
| 51 |
+
- USE_OWN_BROWSER=false
|
| 52 |
+
- KEEP_BROWSER_OPEN=true
|
| 53 |
+
- BROWSER_CDP=${BROWSER_CDP:-} # e.g., http://localhost:9222
|
| 54 |
+
|
| 55 |
+
# Display Settings
|
| 56 |
+
- DISPLAY=:99
|
| 57 |
+
# This ENV is used by the Dockerfile during build time if playwright respects it.
|
| 58 |
+
# It's not strictly needed at runtime by docker-compose unless your app or scripts also read it.
|
| 59 |
+
- PLAYWRIGHT_BROWSERS_PATH=/ms-browsers # Matches Dockerfile ENV
|
| 60 |
+
- RESOLUTION=${RESOLUTION:-1920x1080x24}
|
| 61 |
+
- RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
|
| 62 |
+
- RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
|
| 63 |
+
|
| 64 |
+
# VNC Settings
|
| 65 |
+
- VNC_PASSWORD=${VNC_PASSWORD:-youvncpassword}
|
| 66 |
+
|
| 67 |
+
volumes:
|
| 68 |
+
- /tmp/.X11-unix:/tmp/.X11-unix
|
| 69 |
+
# - ./my_chrome_data:/app/data/chrome_data # Optional: persist browser data
|
| 70 |
+
restart: unless-stopped
|
| 71 |
+
shm_size: '2gb'
|
| 72 |
+
cap_add:
|
| 73 |
+
- SYS_ADMIN
|
| 74 |
+
tmpfs:
|
| 75 |
+
- /tmp
|
| 76 |
+
healthcheck:
|
| 77 |
+
test: ["CMD", "nc", "-z", "localhost", "5901"] # VNC port
|
| 78 |
+
interval: 10s
|
| 79 |
+
timeout: 5s
|
| 80 |
+
retries: 3
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
browser-use==0.1.48
|
| 2 |
+
pyperclip==1.9.0
|
| 3 |
+
gradio==5.27.0
|
| 4 |
+
json-repair
|
| 5 |
+
langchain-mistralai==0.2.4
|
| 6 |
+
MainContentExtractor==0.0.4
|
| 7 |
+
langchain-ibm==0.3.10
|
| 8 |
+
langchain_mcp_adapters==0.0.9
|
| 9 |
+
langgraph==0.3.34
|
| 10 |
+
langchain-community
|
src/__init__.py
ADDED
|
File without changes
|
src/agent/__init__.py
ADDED
|
File without changes
|
src/agent/browser_use/browser_use_agent.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
# from lmnr.sdk.decorators import observe
|
| 8 |
+
from browser_use.agent.gif import create_history_gif
|
| 9 |
+
from browser_use.agent.service import Agent, AgentHookFunc
|
| 10 |
+
from browser_use.agent.views import (
|
| 11 |
+
ActionResult,
|
| 12 |
+
AgentHistory,
|
| 13 |
+
AgentHistoryList,
|
| 14 |
+
AgentStepInfo,
|
| 15 |
+
ToolCallingMethod,
|
| 16 |
+
)
|
| 17 |
+
from browser_use.browser.views import BrowserStateHistory
|
| 18 |
+
from browser_use.utils import time_execution_async
|
| 19 |
+
from dotenv import load_dotenv
|
| 20 |
+
from browser_use.agent.message_manager.utils import is_model_without_tool_support
|
| 21 |
+
|
| 22 |
+
load_dotenv()
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
SKIP_LLM_API_KEY_VERIFICATION = (
|
| 26 |
+
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class BrowserUseAgent(Agent):
|
| 31 |
+
def _set_tool_calling_method(self) -> ToolCallingMethod | None:
|
| 32 |
+
tool_calling_method = self.settings.tool_calling_method
|
| 33 |
+
if tool_calling_method == 'auto':
|
| 34 |
+
if is_model_without_tool_support(self.model_name):
|
| 35 |
+
return 'raw'
|
| 36 |
+
elif self.chat_model_library == 'ChatGoogleGenerativeAI':
|
| 37 |
+
return None
|
| 38 |
+
elif self.chat_model_library == 'ChatOpenAI':
|
| 39 |
+
return 'function_calling'
|
| 40 |
+
elif self.chat_model_library == 'AzureChatOpenAI':
|
| 41 |
+
return 'function_calling'
|
| 42 |
+
else:
|
| 43 |
+
return None
|
| 44 |
+
else:
|
| 45 |
+
return tool_calling_method
|
| 46 |
+
|
| 47 |
+
@time_execution_async("--run (agent)")
|
| 48 |
+
async def run(
|
| 49 |
+
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
|
| 50 |
+
on_step_end: AgentHookFunc | None = None
|
| 51 |
+
) -> AgentHistoryList:
|
| 52 |
+
"""Execute the task with maximum number of steps"""
|
| 53 |
+
|
| 54 |
+
loop = asyncio.get_event_loop()
|
| 55 |
+
|
| 56 |
+
# Set up the Ctrl+C signal handler with callbacks specific to this agent
|
| 57 |
+
from browser_use.utils import SignalHandler
|
| 58 |
+
|
| 59 |
+
signal_handler = SignalHandler(
|
| 60 |
+
loop=loop,
|
| 61 |
+
pause_callback=self.pause,
|
| 62 |
+
resume_callback=self.resume,
|
| 63 |
+
custom_exit_callback=None, # No special cleanup needed on forced exit
|
| 64 |
+
exit_on_second_int=True,
|
| 65 |
+
)
|
| 66 |
+
signal_handler.register()
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
self._log_agent_run()
|
| 70 |
+
|
| 71 |
+
# Execute initial actions if provided
|
| 72 |
+
if self.initial_actions:
|
| 73 |
+
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
|
| 74 |
+
self.state.last_result = result
|
| 75 |
+
|
| 76 |
+
for step in range(max_steps):
|
| 77 |
+
# Check if waiting for user input after Ctrl+C
|
| 78 |
+
if self.state.paused:
|
| 79 |
+
signal_handler.wait_for_resume()
|
| 80 |
+
signal_handler.reset()
|
| 81 |
+
|
| 82 |
+
# Check if we should stop due to too many failures
|
| 83 |
+
if self.state.consecutive_failures >= self.settings.max_failures:
|
| 84 |
+
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
|
| 85 |
+
break
|
| 86 |
+
|
| 87 |
+
# Check control flags before each step
|
| 88 |
+
if self.state.stopped:
|
| 89 |
+
logger.info('Agent stopped')
|
| 90 |
+
break
|
| 91 |
+
|
| 92 |
+
while self.state.paused:
|
| 93 |
+
await asyncio.sleep(0.2) # Small delay to prevent CPU spinning
|
| 94 |
+
if self.state.stopped: # Allow stopping while paused
|
| 95 |
+
break
|
| 96 |
+
|
| 97 |
+
if on_step_start is not None:
|
| 98 |
+
await on_step_start(self)
|
| 99 |
+
|
| 100 |
+
step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
|
| 101 |
+
await self.step(step_info)
|
| 102 |
+
|
| 103 |
+
if on_step_end is not None:
|
| 104 |
+
await on_step_end(self)
|
| 105 |
+
|
| 106 |
+
if self.state.history.is_done():
|
| 107 |
+
if self.settings.validate_output and step < max_steps - 1:
|
| 108 |
+
if not await self._validate_output():
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
await self.log_completion()
|
| 112 |
+
break
|
| 113 |
+
else:
|
| 114 |
+
error_message = 'Failed to complete task in maximum steps'
|
| 115 |
+
|
| 116 |
+
self.state.history.history.append(
|
| 117 |
+
AgentHistory(
|
| 118 |
+
model_output=None,
|
| 119 |
+
result=[ActionResult(error=error_message, include_in_memory=True)],
|
| 120 |
+
state=BrowserStateHistory(
|
| 121 |
+
url='',
|
| 122 |
+
title='',
|
| 123 |
+
tabs=[],
|
| 124 |
+
interacted_element=[],
|
| 125 |
+
screenshot=None,
|
| 126 |
+
),
|
| 127 |
+
metadata=None,
|
| 128 |
+
)
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
logger.info(f'❌ {error_message}')
|
| 132 |
+
|
| 133 |
+
return self.state.history
|
| 134 |
+
|
| 135 |
+
except KeyboardInterrupt:
|
| 136 |
+
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
|
| 137 |
+
logger.info('Got KeyboardInterrupt during execution, returning current history')
|
| 138 |
+
return self.state.history
|
| 139 |
+
|
| 140 |
+
finally:
|
| 141 |
+
# Unregister signal handlers before cleanup
|
| 142 |
+
signal_handler.unregister()
|
| 143 |
+
|
| 144 |
+
if self.settings.save_playwright_script_path:
|
| 145 |
+
logger.info(
|
| 146 |
+
f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
|
| 147 |
+
)
|
| 148 |
+
try:
|
| 149 |
+
# Extract sensitive data keys if sensitive_data is provided
|
| 150 |
+
keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
|
| 151 |
+
# Pass browser and context config to the saving method
|
| 152 |
+
self.state.history.save_as_playwright_script(
|
| 153 |
+
self.settings.save_playwright_script_path,
|
| 154 |
+
sensitive_data_keys=keys,
|
| 155 |
+
browser_config=self.browser.config,
|
| 156 |
+
context_config=self.browser_context.config,
|
| 157 |
+
)
|
| 158 |
+
except Exception as script_gen_err:
|
| 159 |
+
# Log any error during script generation/saving
|
| 160 |
+
logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
|
| 161 |
+
|
| 162 |
+
await self.close()
|
| 163 |
+
|
| 164 |
+
if self.settings.generate_gif:
|
| 165 |
+
output_path: str = 'agent_history.gif'
|
| 166 |
+
if isinstance(self.settings.generate_gif, str):
|
| 167 |
+
output_path = self.settings.generate_gif
|
| 168 |
+
|
| 169 |
+
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
|
src/agent/deep_research/deep_research_agent.py
ADDED
|
@@ -0,0 +1,1261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
import threading
|
| 6 |
+
import uuid
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any, Dict, List, Optional, TypedDict
|
| 9 |
+
|
| 10 |
+
from browser_use.browser.browser import BrowserConfig
|
| 11 |
+
from langchain_community.tools.file_management import (
|
| 12 |
+
ListDirectoryTool,
|
| 13 |
+
ReadFileTool,
|
| 14 |
+
WriteFileTool,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# Langchain imports
|
| 18 |
+
from langchain_core.messages import (
|
| 19 |
+
AIMessage,
|
| 20 |
+
BaseMessage,
|
| 21 |
+
HumanMessage,
|
| 22 |
+
SystemMessage,
|
| 23 |
+
ToolMessage,
|
| 24 |
+
)
|
| 25 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 26 |
+
from langchain_core.tools import StructuredTool, Tool
|
| 27 |
+
|
| 28 |
+
# Langgraph imports
|
| 29 |
+
from langgraph.graph import StateGraph
|
| 30 |
+
from pydantic import BaseModel, Field
|
| 31 |
+
|
| 32 |
+
from browser_use.browser.context import BrowserContextConfig
|
| 33 |
+
|
| 34 |
+
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
| 35 |
+
from src.browser.custom_browser import CustomBrowser
|
| 36 |
+
from src.controller.custom_controller import CustomController
|
| 37 |
+
from src.utils.mcp_client import setup_mcp_client_and_tools
|
| 38 |
+
|
| 39 |
+
logger = logging.getLogger(__name__)
|
| 40 |
+
|
| 41 |
+
# Constants
|
| 42 |
+
REPORT_FILENAME = "report.md"
|
| 43 |
+
PLAN_FILENAME = "research_plan.md"
|
| 44 |
+
SEARCH_INFO_FILENAME = "search_info.json"
|
| 45 |
+
|
| 46 |
+
_AGENT_STOP_FLAGS = {}
|
| 47 |
+
_BROWSER_AGENT_INSTANCES = {}
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
async def run_single_browser_task(
|
| 51 |
+
task_query: str,
|
| 52 |
+
task_id: str,
|
| 53 |
+
llm: Any, # Pass the main LLM
|
| 54 |
+
browser_config: Dict[str, Any],
|
| 55 |
+
stop_event: threading.Event,
|
| 56 |
+
use_vision: bool = False,
|
| 57 |
+
) -> Dict[str, Any]:
|
| 58 |
+
"""
|
| 59 |
+
Runs a single BrowserUseAgent task.
|
| 60 |
+
Manages browser creation and closing for this specific task.
|
| 61 |
+
"""
|
| 62 |
+
if not BrowserUseAgent:
|
| 63 |
+
return {
|
| 64 |
+
"query": task_query,
|
| 65 |
+
"error": "BrowserUseAgent components not available.",
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
# --- Browser Setup ---
|
| 69 |
+
# These should ideally come from the main agent's config
|
| 70 |
+
headless = browser_config.get("headless", False)
|
| 71 |
+
window_w = browser_config.get("window_width", 1280)
|
| 72 |
+
window_h = browser_config.get("window_height", 1100)
|
| 73 |
+
browser_user_data_dir = browser_config.get("user_data_dir", None)
|
| 74 |
+
use_own_browser = browser_config.get("use_own_browser", False)
|
| 75 |
+
browser_binary_path = browser_config.get("browser_binary_path", None)
|
| 76 |
+
wss_url = browser_config.get("wss_url", None)
|
| 77 |
+
cdp_url = browser_config.get("cdp_url", None)
|
| 78 |
+
disable_security = browser_config.get("disable_security", False)
|
| 79 |
+
|
| 80 |
+
bu_browser = None
|
| 81 |
+
bu_browser_context = None
|
| 82 |
+
try:
|
| 83 |
+
logger.info(f"Starting browser task for query: {task_query}")
|
| 84 |
+
extra_args = []
|
| 85 |
+
if use_own_browser:
|
| 86 |
+
browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
|
| 87 |
+
if browser_binary_path == "":
|
| 88 |
+
browser_binary_path = None
|
| 89 |
+
browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
|
| 90 |
+
if browser_user_data:
|
| 91 |
+
extra_args += [f"--user-data-dir={browser_user_data}"]
|
| 92 |
+
else:
|
| 93 |
+
browser_binary_path = None
|
| 94 |
+
|
| 95 |
+
bu_browser = CustomBrowser(
|
| 96 |
+
config=BrowserConfig(
|
| 97 |
+
headless=headless,
|
| 98 |
+
browser_binary_path=browser_binary_path,
|
| 99 |
+
extra_browser_args=extra_args,
|
| 100 |
+
wss_url=wss_url,
|
| 101 |
+
cdp_url=cdp_url,
|
| 102 |
+
new_context_config=BrowserContextConfig(
|
| 103 |
+
window_width=window_w,
|
| 104 |
+
window_height=window_h,
|
| 105 |
+
)
|
| 106 |
+
)
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
context_config = BrowserContextConfig(
|
| 110 |
+
save_downloads_path="./tmp/downloads",
|
| 111 |
+
window_height=window_h,
|
| 112 |
+
window_width=window_w,
|
| 113 |
+
force_new_context=True,
|
| 114 |
+
)
|
| 115 |
+
bu_browser_context = await bu_browser.new_context(config=context_config)
|
| 116 |
+
|
| 117 |
+
# Simple controller example, replace with your actual implementation if needed
|
| 118 |
+
bu_controller = CustomController()
|
| 119 |
+
|
| 120 |
+
# Construct the task prompt for BrowserUseAgent
|
| 121 |
+
# Instruct it to find specific info and return title/URL
|
| 122 |
+
bu_task_prompt = f"""
|
| 123 |
+
Research Task: {task_query}
|
| 124 |
+
Objective: Find relevant information answering the query.
|
| 125 |
+
Output Requirements: For each relevant piece of information found, please provide:
|
| 126 |
+
1. A concise summary of the information.
|
| 127 |
+
2. The title of the source page or document.
|
| 128 |
+
3. The URL of the source.
|
| 129 |
+
Focus on accuracy and relevance. Avoid irrelevant details.
|
| 130 |
+
PDF cannot directly extract _content, please try to download first, then using read_file, if you can't save or read, please try other methods.
|
| 131 |
+
"""
|
| 132 |
+
|
| 133 |
+
bu_agent_instance = BrowserUseAgent(
|
| 134 |
+
task=bu_task_prompt,
|
| 135 |
+
llm=llm, # Use the passed LLM
|
| 136 |
+
browser=bu_browser,
|
| 137 |
+
browser_context=bu_browser_context,
|
| 138 |
+
controller=bu_controller,
|
| 139 |
+
use_vision=use_vision,
|
| 140 |
+
source="webui",
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
# Store instance for potential stop() call
|
| 144 |
+
task_key = f"{task_id}_{uuid.uuid4()}"
|
| 145 |
+
_BROWSER_AGENT_INSTANCES[task_key] = bu_agent_instance
|
| 146 |
+
|
| 147 |
+
# --- Run with Stop Check ---
|
| 148 |
+
# BrowserUseAgent needs to internally check a stop signal or have a stop method.
|
| 149 |
+
# We simulate checking before starting and assume `run` might be interruptible
|
| 150 |
+
# or have its own stop mechanism we can trigger via bu_agent_instance.stop().
|
| 151 |
+
if stop_event.is_set():
|
| 152 |
+
logger.info(f"Browser task for '{task_query}' cancelled before start.")
|
| 153 |
+
return {"query": task_query, "result": None, "status": "cancelled"}
|
| 154 |
+
|
| 155 |
+
# The run needs to be awaitable and ideally accept a stop signal or have a .stop() method
|
| 156 |
+
# result = await bu_agent_instance.run(max_steps=max_steps) # Add max_steps if applicable
|
| 157 |
+
# Let's assume a simplified run for now
|
| 158 |
+
logger.info(f"Running BrowserUseAgent for: {task_query}")
|
| 159 |
+
result = await bu_agent_instance.run() # Assuming run is the main method
|
| 160 |
+
logger.info(f"BrowserUseAgent finished for: {task_query}")
|
| 161 |
+
|
| 162 |
+
final_data = result.final_result()
|
| 163 |
+
|
| 164 |
+
if stop_event.is_set():
|
| 165 |
+
logger.info(f"Browser task for '{task_query}' stopped during execution.")
|
| 166 |
+
return {"query": task_query, "result": final_data, "status": "stopped"}
|
| 167 |
+
else:
|
| 168 |
+
logger.info(f"Browser result for '{task_query}': {final_data}")
|
| 169 |
+
return {"query": task_query, "result": final_data, "status": "completed"}
|
| 170 |
+
|
| 171 |
+
except Exception as e:
|
| 172 |
+
logger.error(
|
| 173 |
+
f"Error during browser task for query '{task_query}': {e}", exc_info=True
|
| 174 |
+
)
|
| 175 |
+
return {"query": task_query, "error": str(e), "status": "failed"}
|
| 176 |
+
finally:
|
| 177 |
+
if bu_browser_context:
|
| 178 |
+
try:
|
| 179 |
+
await bu_browser_context.close()
|
| 180 |
+
bu_browser_context = None
|
| 181 |
+
logger.info("Closed browser context.")
|
| 182 |
+
except Exception as e:
|
| 183 |
+
logger.error(f"Error closing browser context: {e}")
|
| 184 |
+
if bu_browser:
|
| 185 |
+
try:
|
| 186 |
+
await bu_browser.close()
|
| 187 |
+
bu_browser = None
|
| 188 |
+
logger.info("Closed browser.")
|
| 189 |
+
except Exception as e:
|
| 190 |
+
logger.error(f"Error closing browser: {e}")
|
| 191 |
+
|
| 192 |
+
if task_key in _BROWSER_AGENT_INSTANCES:
|
| 193 |
+
del _BROWSER_AGENT_INSTANCES[task_key]
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
class BrowserSearchInput(BaseModel):
|
| 197 |
+
queries: List[str] = Field(
|
| 198 |
+
description="List of distinct search queries to find information relevant to the research task."
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
async def _run_browser_search_tool(
|
| 203 |
+
queries: List[str],
|
| 204 |
+
task_id: str, # Injected dependency
|
| 205 |
+
llm: Any, # Injected dependency
|
| 206 |
+
browser_config: Dict[str, Any],
|
| 207 |
+
stop_event: threading.Event,
|
| 208 |
+
max_parallel_browsers: int = 1,
|
| 209 |
+
) -> List[Dict[str, Any]]:
|
| 210 |
+
"""
|
| 211 |
+
Internal function to execute parallel browser searches based on LLM-provided queries.
|
| 212 |
+
Handles concurrency and stop signals.
|
| 213 |
+
"""
|
| 214 |
+
|
| 215 |
+
# Limit queries just in case LLM ignores the description
|
| 216 |
+
queries = queries[:max_parallel_browsers]
|
| 217 |
+
logger.info(
|
| 218 |
+
f"[Browser Tool {task_id}] Running search for {len(queries)} queries: {queries}"
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
results = []
|
| 222 |
+
semaphore = asyncio.Semaphore(max_parallel_browsers)
|
| 223 |
+
|
| 224 |
+
async def task_wrapper(query):
|
| 225 |
+
async with semaphore:
|
| 226 |
+
if stop_event.is_set():
|
| 227 |
+
logger.info(
|
| 228 |
+
f"[Browser Tool {task_id}] Skipping task due to stop signal: {query}"
|
| 229 |
+
)
|
| 230 |
+
return {"query": query, "result": None, "status": "cancelled"}
|
| 231 |
+
# Pass necessary injected configs and the stop event
|
| 232 |
+
return await run_single_browser_task(
|
| 233 |
+
query,
|
| 234 |
+
task_id,
|
| 235 |
+
llm, # Pass the main LLM (or a dedicated one if needed)
|
| 236 |
+
browser_config,
|
| 237 |
+
stop_event,
|
| 238 |
+
# use_vision could be added here if needed
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
tasks = [task_wrapper(query) for query in queries]
|
| 242 |
+
search_results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 243 |
+
|
| 244 |
+
processed_results = []
|
| 245 |
+
for i, res in enumerate(search_results):
|
| 246 |
+
query = queries[i] # Get corresponding query
|
| 247 |
+
if isinstance(res, Exception):
|
| 248 |
+
logger.error(
|
| 249 |
+
f"[Browser Tool {task_id}] Gather caught exception for query '{query}': {res}",
|
| 250 |
+
exc_info=True,
|
| 251 |
+
)
|
| 252 |
+
processed_results.append(
|
| 253 |
+
{"query": query, "error": str(res), "status": "failed"}
|
| 254 |
+
)
|
| 255 |
+
elif isinstance(res, dict):
|
| 256 |
+
processed_results.append(res)
|
| 257 |
+
else:
|
| 258 |
+
logger.error(
|
| 259 |
+
f"[Browser Tool {task_id}] Unexpected result type for query '{query}': {type(res)}"
|
| 260 |
+
)
|
| 261 |
+
processed_results.append(
|
| 262 |
+
{"query": query, "error": "Unexpected result type", "status": "failed"}
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
logger.info(
|
| 266 |
+
f"[Browser Tool {task_id}] Finished search. Results count: {len(processed_results)}"
|
| 267 |
+
)
|
| 268 |
+
return processed_results
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def create_browser_search_tool(
|
| 272 |
+
llm: Any,
|
| 273 |
+
browser_config: Dict[str, Any],
|
| 274 |
+
task_id: str,
|
| 275 |
+
stop_event: threading.Event,
|
| 276 |
+
max_parallel_browsers: int = 1,
|
| 277 |
+
) -> StructuredTool:
|
| 278 |
+
"""Factory function to create the browser search tool with necessary dependencies."""
|
| 279 |
+
# Use partial to bind the dependencies that aren't part of the LLM call arguments
|
| 280 |
+
from functools import partial
|
| 281 |
+
|
| 282 |
+
bound_tool_func = partial(
|
| 283 |
+
_run_browser_search_tool,
|
| 284 |
+
task_id=task_id,
|
| 285 |
+
llm=llm,
|
| 286 |
+
browser_config=browser_config,
|
| 287 |
+
stop_event=stop_event,
|
| 288 |
+
max_parallel_browsers=max_parallel_browsers,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
return StructuredTool.from_function(
|
| 292 |
+
coroutine=bound_tool_func,
|
| 293 |
+
name="parallel_browser_search",
|
| 294 |
+
description=f"""Use this tool to actively search the web for information related to a specific research task or question.
|
| 295 |
+
It runs up to {max_parallel_browsers} searches in parallel using a browser agent for better results than simple scraping.
|
| 296 |
+
Provide a list of distinct search queries(up to {max_parallel_browsers}) that are likely to yield relevant information.""",
|
| 297 |
+
args_schema=BrowserSearchInput,
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
# --- Langgraph State Definition ---
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
class ResearchTaskItem(TypedDict):
|
| 305 |
+
# step: int # Maybe step within category, or just implicit by order
|
| 306 |
+
task_description: str
|
| 307 |
+
status: str # "pending", "completed", "failed"
|
| 308 |
+
queries: Optional[List[str]]
|
| 309 |
+
result_summary: Optional[str]
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
class ResearchCategoryItem(TypedDict):
|
| 313 |
+
category_name: str
|
| 314 |
+
tasks: List[ResearchTaskItem]
|
| 315 |
+
# Optional: category_status: str # Could be "pending", "in_progress", "completed"
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
class DeepResearchState(TypedDict):
|
| 319 |
+
task_id: str
|
| 320 |
+
topic: str
|
| 321 |
+
research_plan: List[ResearchCategoryItem] # CHANGED
|
| 322 |
+
search_results: List[Dict[str, Any]]
|
| 323 |
+
llm: Any
|
| 324 |
+
tools: List[Tool]
|
| 325 |
+
output_dir: Path
|
| 326 |
+
browser_config: Dict[str, Any]
|
| 327 |
+
final_report: Optional[str]
|
| 328 |
+
current_category_index: int
|
| 329 |
+
current_task_index_in_category: int
|
| 330 |
+
stop_requested: bool
|
| 331 |
+
error_message: Optional[str]
|
| 332 |
+
messages: List[BaseMessage]
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
# --- Langgraph Nodes ---
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
|
| 339 |
+
state_updates = {}
|
| 340 |
+
plan_file = os.path.join(output_dir, PLAN_FILENAME)
|
| 341 |
+
search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
|
| 342 |
+
|
| 343 |
+
loaded_plan: List[ResearchCategoryItem] = []
|
| 344 |
+
next_cat_idx, next_task_idx = 0, 0
|
| 345 |
+
found_pending = False
|
| 346 |
+
|
| 347 |
+
if os.path.exists(plan_file):
|
| 348 |
+
try:
|
| 349 |
+
with open(plan_file, "r", encoding="utf-8") as f:
|
| 350 |
+
current_category: Optional[ResearchCategoryItem] = None
|
| 351 |
+
lines = f.readlines()
|
| 352 |
+
cat_counter = 0
|
| 353 |
+
task_counter_in_cat = 0
|
| 354 |
+
|
| 355 |
+
for line_num, line_content in enumerate(lines):
|
| 356 |
+
line = line_content.strip()
|
| 357 |
+
if line.startswith("## "): # Category
|
| 358 |
+
if current_category: # Save previous category
|
| 359 |
+
loaded_plan.append(current_category)
|
| 360 |
+
if not found_pending: # If previous category was all done, advance cat counter
|
| 361 |
+
cat_counter += 1
|
| 362 |
+
task_counter_in_cat = 0
|
| 363 |
+
category_name = line[line.find(" "):].strip() # Get text after "## X. "
|
| 364 |
+
current_category = ResearchCategoryItem(category_name=category_name, tasks=[])
|
| 365 |
+
elif (line.startswith("- [ ]") or line.startswith("- [x]") or line.startswith(
|
| 366 |
+
"- [-]")) and current_category: # Task
|
| 367 |
+
status = "pending"
|
| 368 |
+
if line.startswith("- [x]"):
|
| 369 |
+
status = "completed"
|
| 370 |
+
elif line.startswith("- [-]"):
|
| 371 |
+
status = "failed"
|
| 372 |
+
|
| 373 |
+
task_desc = line[5:].strip()
|
| 374 |
+
current_category["tasks"].append(
|
| 375 |
+
ResearchTaskItem(task_description=task_desc, status=status, queries=None,
|
| 376 |
+
result_summary=None)
|
| 377 |
+
)
|
| 378 |
+
if status == "pending" and not found_pending:
|
| 379 |
+
next_cat_idx = cat_counter
|
| 380 |
+
next_task_idx = task_counter_in_cat
|
| 381 |
+
found_pending = True
|
| 382 |
+
if not found_pending: # only increment if previous tasks were completed/failed
|
| 383 |
+
task_counter_in_cat += 1
|
| 384 |
+
|
| 385 |
+
if current_category: # Append last category
|
| 386 |
+
loaded_plan.append(current_category)
|
| 387 |
+
|
| 388 |
+
if loaded_plan:
|
| 389 |
+
state_updates["research_plan"] = loaded_plan
|
| 390 |
+
if not found_pending and loaded_plan: # All tasks were completed or failed
|
| 391 |
+
next_cat_idx = len(loaded_plan) # Points beyond the last category
|
| 392 |
+
next_task_idx = 0
|
| 393 |
+
state_updates["current_category_index"] = next_cat_idx
|
| 394 |
+
state_updates["current_task_index_in_category"] = next_task_idx
|
| 395 |
+
logger.info(
|
| 396 |
+
f"Loaded hierarchical research plan from {plan_file}. "
|
| 397 |
+
f"Next task: Category {next_cat_idx}, Task {next_task_idx} in category."
|
| 398 |
+
)
|
| 399 |
+
else:
|
| 400 |
+
logger.warning(f"Plan file {plan_file} was empty or malformed.")
|
| 401 |
+
|
| 402 |
+
except Exception as e:
|
| 403 |
+
logger.error(f"Failed to load or parse research plan {plan_file}: {e}", exc_info=True)
|
| 404 |
+
state_updates["error_message"] = f"Failed to load research plan: {e}"
|
| 405 |
+
else:
|
| 406 |
+
logger.info(f"Plan file {plan_file} not found. Will start fresh.")
|
| 407 |
+
|
| 408 |
+
if os.path.exists(search_file):
|
| 409 |
+
try:
|
| 410 |
+
with open(search_file, "r", encoding="utf-8") as f:
|
| 411 |
+
state_updates["search_results"] = json.load(f)
|
| 412 |
+
logger.info(f"Loaded search results from {search_file}")
|
| 413 |
+
except Exception as e:
|
| 414 |
+
logger.error(f"Failed to load search results {search_file}: {e}")
|
| 415 |
+
state_updates["error_message"] = (
|
| 416 |
+
state_updates.get("error_message", "") + f" Failed to load search results: {e}").strip()
|
| 417 |
+
|
| 418 |
+
return state_updates
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
def _save_plan_to_md(plan: List[ResearchCategoryItem], output_dir: str):
|
| 422 |
+
plan_file = os.path.join(output_dir, PLAN_FILENAME)
|
| 423 |
+
try:
|
| 424 |
+
with open(plan_file, "w", encoding="utf-8") as f:
|
| 425 |
+
f.write(f"# Research Plan\n\n")
|
| 426 |
+
for cat_idx, category in enumerate(plan):
|
| 427 |
+
f.write(f"## {cat_idx + 1}. {category['category_name']}\n\n")
|
| 428 |
+
for task_idx, task in enumerate(category['tasks']):
|
| 429 |
+
marker = "- [x]" if task["status"] == "completed" else "- [ ]" if task[
|
| 430 |
+
"status"] == "pending" else "- [-]" # [-] for failed
|
| 431 |
+
f.write(f" {marker} {task['task_description']}\n")
|
| 432 |
+
f.write("\n")
|
| 433 |
+
logger.info(f"Hierarchical research plan saved to {plan_file}")
|
| 434 |
+
except Exception as e:
|
| 435 |
+
logger.error(f"Failed to save research plan to {plan_file}: {e}")
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
def _save_search_results_to_json(results: List[Dict[str, Any]], output_dir: str):
|
| 439 |
+
"""Appends or overwrites search results to a JSON file."""
|
| 440 |
+
search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
|
| 441 |
+
try:
|
| 442 |
+
# Simple overwrite for now, could be append
|
| 443 |
+
with open(search_file, "w", encoding="utf-8") as f:
|
| 444 |
+
json.dump(results, f, indent=2, ensure_ascii=False)
|
| 445 |
+
logger.info(f"Search results saved to {search_file}")
|
| 446 |
+
except Exception as e:
|
| 447 |
+
logger.error(f"Failed to save search results to {search_file}: {e}")
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
def _save_report_to_md(report: str, output_dir: Path):
|
| 451 |
+
"""Saves the final report to a markdown file."""
|
| 452 |
+
report_file = os.path.join(output_dir, REPORT_FILENAME)
|
| 453 |
+
try:
|
| 454 |
+
with open(report_file, "w", encoding="utf-8") as f:
|
| 455 |
+
f.write(report)
|
| 456 |
+
logger.info(f"Final report saved to {report_file}")
|
| 457 |
+
except Exception as e:
|
| 458 |
+
logger.error(f"Failed to save final report to {report_file}: {e}")
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
async def planning_node(state: DeepResearchState) -> Dict[str, Any]:
|
| 462 |
+
logger.info("--- Entering Planning Node ---")
|
| 463 |
+
if state.get("stop_requested"):
|
| 464 |
+
logger.info("Stop requested, skipping planning.")
|
| 465 |
+
return {"stop_requested": True}
|
| 466 |
+
|
| 467 |
+
llm = state["llm"]
|
| 468 |
+
topic = state["topic"]
|
| 469 |
+
existing_plan = state.get("research_plan")
|
| 470 |
+
output_dir = state["output_dir"]
|
| 471 |
+
|
| 472 |
+
if existing_plan and (
|
| 473 |
+
state.get("current_category_index", 0) > 0 or state.get("current_task_index_in_category", 0) > 0):
|
| 474 |
+
logger.info("Resuming with existing plan.")
|
| 475 |
+
_save_plan_to_md(existing_plan, output_dir) # Ensure it's saved initially
|
| 476 |
+
# current_category_index and current_task_index_in_category should be set by _load_previous_state
|
| 477 |
+
return {"research_plan": existing_plan}
|
| 478 |
+
|
| 479 |
+
logger.info(f"Generating new research plan for topic: {topic}")
|
| 480 |
+
|
| 481 |
+
prompt_text = f"""You are a meticulous research assistant. Your goal is to create a hierarchical research plan to thoroughly investigate the topic: "{topic}".
|
| 482 |
+
The plan should be structured into several main research categories. Each category should contain a list of specific, actionable research tasks or questions.
|
| 483 |
+
Format the output as a JSON list of objects. Each object represents a research category and should have:
|
| 484 |
+
1. "category_name": A string for the name of the research category.
|
| 485 |
+
2. "tasks": A list of strings, where each string is a specific research task for that category.
|
| 486 |
+
|
| 487 |
+
Example JSON Output:
|
| 488 |
+
[
|
| 489 |
+
{{
|
| 490 |
+
"category_name": "Understanding Core Concepts and Definitions",
|
| 491 |
+
"tasks": [
|
| 492 |
+
"Define the primary terminology associated with '{topic}'.",
|
| 493 |
+
"Identify the fundamental principles and theories underpinning '{topic}'."
|
| 494 |
+
]
|
| 495 |
+
}},
|
| 496 |
+
{{
|
| 497 |
+
"category_name": "Historical Development and Key Milestones",
|
| 498 |
+
"tasks": [
|
| 499 |
+
"Trace the historical evolution of '{topic}'.",
|
| 500 |
+
"Identify key figures, events, or breakthroughs in the development of '{topic}'."
|
| 501 |
+
]
|
| 502 |
+
}},
|
| 503 |
+
{{
|
| 504 |
+
"category_name": "Current State-of-the-Art and Applications",
|
| 505 |
+
"tasks": [
|
| 506 |
+
"Analyze the current advancements and prominent applications of '{topic}'.",
|
| 507 |
+
"Investigate ongoing research and active areas of development related to '{topic}'."
|
| 508 |
+
]
|
| 509 |
+
}},
|
| 510 |
+
{{
|
| 511 |
+
"category_name": "Challenges, Limitations, and Future Outlook",
|
| 512 |
+
"tasks": [
|
| 513 |
+
"Identify the major challenges and limitations currently facing '{topic}'.",
|
| 514 |
+
"Explore potential future trends, ethical considerations, and societal impacts of '{topic}'."
|
| 515 |
+
]
|
| 516 |
+
}}
|
| 517 |
+
]
|
| 518 |
+
|
| 519 |
+
Generate a plan with 3-10 categories, and 2-6 tasks per category for the topic: "{topic}" according to the complexity of the topic.
|
| 520 |
+
Ensure the output is a valid JSON array.
|
| 521 |
+
"""
|
| 522 |
+
messages = [
|
| 523 |
+
SystemMessage(content="You are a research planning assistant outputting JSON."),
|
| 524 |
+
HumanMessage(content=prompt_text)
|
| 525 |
+
]
|
| 526 |
+
|
| 527 |
+
try:
|
| 528 |
+
response = await llm.ainvoke(messages)
|
| 529 |
+
raw_content = response.content
|
| 530 |
+
# The LLM might wrap the JSON in backticks
|
| 531 |
+
if raw_content.strip().startswith("```json"):
|
| 532 |
+
raw_content = raw_content.strip()[7:-3].strip()
|
| 533 |
+
elif raw_content.strip().startswith("```"):
|
| 534 |
+
raw_content = raw_content.strip()[3:-3].strip()
|
| 535 |
+
|
| 536 |
+
logger.debug(f"LLM response for plan: {raw_content}")
|
| 537 |
+
parsed_plan_from_llm = json.loads(raw_content)
|
| 538 |
+
|
| 539 |
+
new_plan: List[ResearchCategoryItem] = []
|
| 540 |
+
for cat_idx, category_data in enumerate(parsed_plan_from_llm):
|
| 541 |
+
if not isinstance(category_data,
|
| 542 |
+
dict) or "category_name" not in category_data or "tasks" not in category_data:
|
| 543 |
+
logger.warning(f"Skipping invalid category data: {category_data}")
|
| 544 |
+
continue
|
| 545 |
+
|
| 546 |
+
tasks: List[ResearchTaskItem] = []
|
| 547 |
+
for task_idx, task_desc in enumerate(category_data["tasks"]):
|
| 548 |
+
if isinstance(task_desc, str):
|
| 549 |
+
tasks.append(
|
| 550 |
+
ResearchTaskItem(
|
| 551 |
+
task_description=task_desc,
|
| 552 |
+
status="pending",
|
| 553 |
+
queries=None,
|
| 554 |
+
result_summary=None,
|
| 555 |
+
)
|
| 556 |
+
)
|
| 557 |
+
else: # Sometimes LLM puts tasks as {"task": "description"}
|
| 558 |
+
if isinstance(task_desc, dict) and "task_description" in task_desc:
|
| 559 |
+
tasks.append(
|
| 560 |
+
ResearchTaskItem(
|
| 561 |
+
task_description=task_desc["task_description"],
|
| 562 |
+
status="pending",
|
| 563 |
+
queries=None,
|
| 564 |
+
result_summary=None,
|
| 565 |
+
)
|
| 566 |
+
)
|
| 567 |
+
elif isinstance(task_desc, dict) and "task" in task_desc: # common LLM mistake
|
| 568 |
+
tasks.append(
|
| 569 |
+
ResearchTaskItem(
|
| 570 |
+
task_description=task_desc["task"],
|
| 571 |
+
status="pending",
|
| 572 |
+
queries=None,
|
| 573 |
+
result_summary=None,
|
| 574 |
+
)
|
| 575 |
+
)
|
| 576 |
+
else:
|
| 577 |
+
logger.warning(
|
| 578 |
+
f"Skipping invalid task data: {task_desc} in category {category_data['category_name']}")
|
| 579 |
+
|
| 580 |
+
new_plan.append(
|
| 581 |
+
ResearchCategoryItem(
|
| 582 |
+
category_name=category_data["category_name"],
|
| 583 |
+
tasks=tasks,
|
| 584 |
+
)
|
| 585 |
+
)
|
| 586 |
+
|
| 587 |
+
if not new_plan:
|
| 588 |
+
logger.error("LLM failed to generate a valid plan structure from JSON.")
|
| 589 |
+
return {"error_message": "Failed to generate research plan structure."}
|
| 590 |
+
|
| 591 |
+
logger.info(f"Generated research plan with {len(new_plan)} categories.")
|
| 592 |
+
_save_plan_to_md(new_plan, output_dir) # Save the hierarchical plan
|
| 593 |
+
|
| 594 |
+
return {
|
| 595 |
+
"research_plan": new_plan,
|
| 596 |
+
"current_category_index": 0,
|
| 597 |
+
"current_task_index_in_category": 0,
|
| 598 |
+
"search_results": [],
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
except json.JSONDecodeError as e:
|
| 602 |
+
logger.error(f"Failed to parse JSON from LLM for plan: {e}. Response was: {raw_content}", exc_info=True)
|
| 603 |
+
return {"error_message": f"LLM generated invalid JSON for research plan: {e}"}
|
| 604 |
+
except Exception as e:
|
| 605 |
+
logger.error(f"Error during planning: {e}", exc_info=True)
|
| 606 |
+
return {"error_message": f"LLM Error during planning: {e}"}
|
| 607 |
+
|
| 608 |
+
|
| 609 |
+
async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
|
| 610 |
+
logger.info("--- Entering Research Execution Node ---")
|
| 611 |
+
if state.get("stop_requested"):
|
| 612 |
+
logger.info("Stop requested, skipping research execution.")
|
| 613 |
+
return {
|
| 614 |
+
"stop_requested": True,
|
| 615 |
+
"current_category_index": state["current_category_index"],
|
| 616 |
+
"current_task_index_in_category": state["current_task_index_in_category"],
|
| 617 |
+
}
|
| 618 |
+
|
| 619 |
+
plan = state["research_plan"]
|
| 620 |
+
cat_idx = state["current_category_index"]
|
| 621 |
+
task_idx = state["current_task_index_in_category"]
|
| 622 |
+
llm = state["llm"]
|
| 623 |
+
tools = state["tools"]
|
| 624 |
+
output_dir = str(state["output_dir"])
|
| 625 |
+
task_id = state["task_id"] # For _AGENT_STOP_FLAGS
|
| 626 |
+
|
| 627 |
+
# This check should ideally be handled by `should_continue`
|
| 628 |
+
if not plan or cat_idx >= len(plan):
|
| 629 |
+
logger.info("Research plan complete or categories exhausted.")
|
| 630 |
+
return {} # should route to synthesis
|
| 631 |
+
|
| 632 |
+
current_category = plan[cat_idx]
|
| 633 |
+
if task_idx >= len(current_category["tasks"]):
|
| 634 |
+
logger.info(f"All tasks in category '{current_category['category_name']}' completed. Moving to next category.")
|
| 635 |
+
# This logic is now effectively handled by should_continue and the index updates below
|
| 636 |
+
# The next iteration will be caught by should_continue or this node with updated indices
|
| 637 |
+
return {
|
| 638 |
+
"current_category_index": cat_idx + 1,
|
| 639 |
+
"current_task_index_in_category": 0,
|
| 640 |
+
"messages": state["messages"] # Pass messages along
|
| 641 |
+
}
|
| 642 |
+
|
| 643 |
+
current_task = current_category["tasks"][task_idx]
|
| 644 |
+
|
| 645 |
+
if current_task["status"] == "completed":
|
| 646 |
+
logger.info(
|
| 647 |
+
f"Task '{current_task['task_description']}' in category '{current_category['category_name']}' already completed. Skipping.")
|
| 648 |
+
# Logic to find next task
|
| 649 |
+
next_task_idx = task_idx + 1
|
| 650 |
+
next_cat_idx = cat_idx
|
| 651 |
+
if next_task_idx >= len(current_category["tasks"]):
|
| 652 |
+
next_cat_idx += 1
|
| 653 |
+
next_task_idx = 0
|
| 654 |
+
return {
|
| 655 |
+
"current_category_index": next_cat_idx,
|
| 656 |
+
"current_task_index_in_category": next_task_idx,
|
| 657 |
+
"messages": state["messages"] # Pass messages along
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
+
logger.info(
|
| 661 |
+
f"Executing research task: '{current_task['task_description']}' (Category: '{current_category['category_name']}')"
|
| 662 |
+
)
|
| 663 |
+
|
| 664 |
+
llm_with_tools = llm.bind_tools(tools)
|
| 665 |
+
|
| 666 |
+
# Construct messages for LLM invocation
|
| 667 |
+
task_prompt_content = (
|
| 668 |
+
f"Current Research Category: {current_category['category_name']}\n"
|
| 669 |
+
f"Specific Task: {current_task['task_description']}\n\n"
|
| 670 |
+
"Please use the available tools, especially 'parallel_browser_search', to gather information for this specific task. "
|
| 671 |
+
"Provide focused search queries relevant ONLY to this task. "
|
| 672 |
+
"If you believe you have sufficient information from previous steps for this specific task, you can indicate that you are ready to summarize or that no further search is needed."
|
| 673 |
+
)
|
| 674 |
+
current_task_message_history = [
|
| 675 |
+
HumanMessage(content=task_prompt_content)
|
| 676 |
+
]
|
| 677 |
+
if not state["messages"]: # First actual execution message
|
| 678 |
+
invocation_messages = [
|
| 679 |
+
SystemMessage(
|
| 680 |
+
content="You are a research assistant executing one task of a research plan. Focus on the current task only."),
|
| 681 |
+
] + current_task_message_history
|
| 682 |
+
else:
|
| 683 |
+
invocation_messages = state["messages"] + current_task_message_history
|
| 684 |
+
|
| 685 |
+
try:
|
| 686 |
+
logger.info(f"Invoking LLM with tools for task: {current_task['task_description']}")
|
| 687 |
+
ai_response: BaseMessage = await llm_with_tools.ainvoke(invocation_messages)
|
| 688 |
+
logger.info("LLM invocation complete.")
|
| 689 |
+
|
| 690 |
+
tool_results = []
|
| 691 |
+
executed_tool_names = []
|
| 692 |
+
current_search_results = state.get("search_results", []) # Get existing search results
|
| 693 |
+
|
| 694 |
+
if not isinstance(ai_response, AIMessage) or not ai_response.tool_calls:
|
| 695 |
+
logger.warning(
|
| 696 |
+
f"LLM did not call any tool for task '{current_task['task_description']}'. Response: {ai_response.content[:100]}..."
|
| 697 |
+
)
|
| 698 |
+
current_task["status"] = "pending" # Or "completed_no_tool" if LLM explains it's done
|
| 699 |
+
current_task["result_summary"] = f"LLM did not use a tool. Response: {ai_response.content}"
|
| 700 |
+
current_task["current_category_index"] = cat_idx
|
| 701 |
+
current_task["current_task_index_in_category"] = task_idx
|
| 702 |
+
return current_task
|
| 703 |
+
# We still save the plan and advance.
|
| 704 |
+
else:
|
| 705 |
+
# Process tool calls
|
| 706 |
+
for tool_call in ai_response.tool_calls:
|
| 707 |
+
tool_name = tool_call.get("name")
|
| 708 |
+
tool_args = tool_call.get("args", {})
|
| 709 |
+
tool_call_id = tool_call.get("id")
|
| 710 |
+
|
| 711 |
+
logger.info(f"LLM requested tool call: {tool_name} with args: {tool_args}")
|
| 712 |
+
executed_tool_names.append(tool_name)
|
| 713 |
+
selected_tool = next((t for t in tools if t.name == tool_name), None)
|
| 714 |
+
|
| 715 |
+
if not selected_tool:
|
| 716 |
+
logger.error(f"LLM called tool '{tool_name}' which is not available.")
|
| 717 |
+
tool_results.append(
|
| 718 |
+
ToolMessage(content=f"Error: Tool '{tool_name}' not found.", tool_call_id=tool_call_id))
|
| 719 |
+
continue
|
| 720 |
+
|
| 721 |
+
try:
|
| 722 |
+
stop_event = _AGENT_STOP_FLAGS.get(task_id)
|
| 723 |
+
if stop_event and stop_event.is_set():
|
| 724 |
+
logger.info(f"Stop requested before executing tool: {tool_name}")
|
| 725 |
+
current_task["status"] = "pending" # Or a new "stopped" status
|
| 726 |
+
_save_plan_to_md(plan, output_dir)
|
| 727 |
+
return {"stop_requested": True, "research_plan": plan, "current_category_index": cat_idx,
|
| 728 |
+
"current_task_index_in_category": task_idx}
|
| 729 |
+
|
| 730 |
+
logger.info(f"Executing tool: {tool_name}")
|
| 731 |
+
tool_output = await selected_tool.ainvoke(tool_args)
|
| 732 |
+
logger.info(f"Tool '{tool_name}' executed successfully.")
|
| 733 |
+
|
| 734 |
+
if tool_name == "parallel_browser_search":
|
| 735 |
+
current_search_results.extend(tool_output) # tool_output is List[Dict]
|
| 736 |
+
else: # For other tools, we might need specific handling or just log
|
| 737 |
+
logger.info(f"Result from tool '{tool_name}': {str(tool_output)[:200]}...")
|
| 738 |
+
# Storing non-browser results might need a different structure or key in search_results
|
| 739 |
+
current_search_results.append(
|
| 740 |
+
{"tool_name": tool_name, "args": tool_args, "output": str(tool_output),
|
| 741 |
+
"status": "completed"})
|
| 742 |
+
|
| 743 |
+
tool_results.append(ToolMessage(content=json.dumps(tool_output), tool_call_id=tool_call_id))
|
| 744 |
+
|
| 745 |
+
except Exception as e:
|
| 746 |
+
logger.error(f"Error executing tool '{tool_name}': {e}", exc_info=True)
|
| 747 |
+
tool_results.append(
|
| 748 |
+
ToolMessage(content=f"Error executing tool {tool_name}: {e}", tool_call_id=tool_call_id))
|
| 749 |
+
current_search_results.append(
|
| 750 |
+
{"tool_name": tool_name, "args": tool_args, "status": "failed", "error": str(e)})
|
| 751 |
+
|
| 752 |
+
# After processing all tool calls for this task
|
| 753 |
+
step_failed_tool_execution = any("Error:" in str(tr.content) for tr in tool_results)
|
| 754 |
+
# Consider a task successful if a browser search was attempted and didn't immediately error out during call
|
| 755 |
+
# The browser search itself returns status for each query.
|
| 756 |
+
browser_tool_attempted_successfully = "parallel_browser_search" in executed_tool_names and not step_failed_tool_execution
|
| 757 |
+
|
| 758 |
+
if step_failed_tool_execution:
|
| 759 |
+
current_task["status"] = "failed"
|
| 760 |
+
current_task[
|
| 761 |
+
"result_summary"] = f"Tool execution failed. Errors: {[tr.content for tr in tool_results if 'Error' in str(tr.content)]}"
|
| 762 |
+
elif executed_tool_names: # If any tool was called
|
| 763 |
+
current_task["status"] = "completed"
|
| 764 |
+
current_task["result_summary"] = f"Executed tool(s): {', '.join(executed_tool_names)}."
|
| 765 |
+
# TODO: Could ask LLM to summarize the tool_results for this task if needed, rather than just listing tools.
|
| 766 |
+
else: # No tool calls but AI response had .tool_calls structure (empty)
|
| 767 |
+
current_task["status"] = "failed" # Or a more specific status
|
| 768 |
+
current_task["result_summary"] = "LLM prepared for tool call but provided no tools."
|
| 769 |
+
|
| 770 |
+
# Save progress
|
| 771 |
+
_save_plan_to_md(plan, output_dir)
|
| 772 |
+
_save_search_results_to_json(current_search_results, output_dir)
|
| 773 |
+
|
| 774 |
+
# Determine next indices
|
| 775 |
+
next_task_idx = task_idx + 1
|
| 776 |
+
next_cat_idx = cat_idx
|
| 777 |
+
if next_task_idx >= len(current_category["tasks"]):
|
| 778 |
+
next_cat_idx += 1
|
| 779 |
+
next_task_idx = 0
|
| 780 |
+
|
| 781 |
+
updated_messages = state["messages"] + current_task_message_history + [ai_response] + tool_results
|
| 782 |
+
|
| 783 |
+
return {
|
| 784 |
+
"research_plan": plan,
|
| 785 |
+
"search_results": current_search_results,
|
| 786 |
+
"current_category_index": next_cat_idx,
|
| 787 |
+
"current_task_index_in_category": next_task_idx,
|
| 788 |
+
"messages": updated_messages,
|
| 789 |
+
}
|
| 790 |
+
|
| 791 |
+
except Exception as e:
|
| 792 |
+
logger.error(f"Unhandled error during research execution for task '{current_task['task_description']}': {e}",
|
| 793 |
+
exc_info=True)
|
| 794 |
+
current_task["status"] = "failed"
|
| 795 |
+
_save_plan_to_md(plan, output_dir)
|
| 796 |
+
# Determine next indices even on error to attempt to move on
|
| 797 |
+
next_task_idx = task_idx + 1
|
| 798 |
+
next_cat_idx = cat_idx
|
| 799 |
+
if next_task_idx >= len(current_category["tasks"]):
|
| 800 |
+
next_cat_idx += 1
|
| 801 |
+
next_task_idx = 0
|
| 802 |
+
return {
|
| 803 |
+
"research_plan": plan,
|
| 804 |
+
"current_category_index": next_cat_idx,
|
| 805 |
+
"current_task_index_in_category": next_task_idx,
|
| 806 |
+
"error_message": f"Core Execution Error on task '{current_task['task_description']}': {e}",
|
| 807 |
+
"messages": state["messages"] + current_task_message_history # Preserve messages up to error
|
| 808 |
+
}
|
| 809 |
+
|
| 810 |
+
|
| 811 |
+
async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
|
| 812 |
+
"""Synthesizes the final report from the collected search results."""
|
| 813 |
+
logger.info("--- Entering Synthesis Node ---")
|
| 814 |
+
if state.get("stop_requested"):
|
| 815 |
+
logger.info("Stop requested, skipping synthesis.")
|
| 816 |
+
return {"stop_requested": True}
|
| 817 |
+
|
| 818 |
+
llm = state["llm"]
|
| 819 |
+
topic = state["topic"]
|
| 820 |
+
search_results = state.get("search_results", [])
|
| 821 |
+
output_dir = state["output_dir"]
|
| 822 |
+
plan = state["research_plan"] # Include plan for context
|
| 823 |
+
|
| 824 |
+
if not search_results:
|
| 825 |
+
logger.warning("No search results found to synthesize report.")
|
| 826 |
+
report = f"# Research Report: {topic}\n\nNo information was gathered during the research process."
|
| 827 |
+
_save_report_to_md(report, output_dir)
|
| 828 |
+
return {"final_report": report}
|
| 829 |
+
|
| 830 |
+
logger.info(
|
| 831 |
+
f"Synthesizing report from {len(search_results)} collected search result entries."
|
| 832 |
+
)
|
| 833 |
+
|
| 834 |
+
# Prepare context for the LLM
|
| 835 |
+
# Format search results nicely, maybe group by query or original plan step
|
| 836 |
+
formatted_results = ""
|
| 837 |
+
references = {}
|
| 838 |
+
ref_count = 1
|
| 839 |
+
for i, result_entry in enumerate(search_results):
|
| 840 |
+
query = result_entry.get("query", "Unknown Query") # From parallel_browser_search
|
| 841 |
+
tool_name = result_entry.get("tool_name") # From other tools
|
| 842 |
+
status = result_entry.get("status", "unknown")
|
| 843 |
+
result_data = result_entry.get("result") # From BrowserUseAgent's final_result
|
| 844 |
+
tool_output_str = result_entry.get("output") # From other tools
|
| 845 |
+
|
| 846 |
+
if tool_name == "parallel_browser_search" and status == "completed" and result_data:
|
| 847 |
+
# result_data is the summary from BrowserUseAgent
|
| 848 |
+
formatted_results += f'### Finding from Web Search Query: "{query}"\n'
|
| 849 |
+
formatted_results += f"- **Summary:**\n{result_data}\n" # result_data is already a summary string here
|
| 850 |
+
# If result_data contained title/URL, you'd format them here.
|
| 851 |
+
# The current BrowserUseAgent returns a string summary directly as 'final_data' in run_single_browser_task
|
| 852 |
+
formatted_results += "---\n"
|
| 853 |
+
elif tool_name != "parallel_browser_search" and status == "completed" and tool_output_str:
|
| 854 |
+
formatted_results += f'### Finding from Tool: "{tool_name}" (Args: {result_entry.get("args")})\n'
|
| 855 |
+
formatted_results += f"- **Output:**\n{tool_output_str}\n"
|
| 856 |
+
formatted_results += "---\n"
|
| 857 |
+
elif status == "failed":
|
| 858 |
+
error = result_entry.get("error")
|
| 859 |
+
q_or_t = f"Query: \"{query}\"" if query != "Unknown Query" else f"Tool: \"{tool_name}\""
|
| 860 |
+
formatted_results += f'### Failed {q_or_t}\n'
|
| 861 |
+
formatted_results += f"- **Error:** {error}\n"
|
| 862 |
+
formatted_results += "---\n"
|
| 863 |
+
|
| 864 |
+
# Prepare the research plan context
|
| 865 |
+
plan_summary = "\nResearch Plan Followed:\n"
|
| 866 |
+
for cat_idx, category in enumerate(plan):
|
| 867 |
+
plan_summary += f"\n#### Category {cat_idx + 1}: {category['category_name']}\n"
|
| 868 |
+
for task_idx, task in enumerate(category['tasks']):
|
| 869 |
+
marker = "[x]" if task["status"] == "completed" else "[ ]" if task["status"] == "pending" else "[-]"
|
| 870 |
+
plan_summary += f" - {marker} {task['task_description']}\n"
|
| 871 |
+
|
| 872 |
+
synthesis_prompt = ChatPromptTemplate.from_messages(
|
| 873 |
+
[
|
| 874 |
+
(
|
| 875 |
+
"system",
|
| 876 |
+
"""You are a professional researcher tasked with writing a comprehensive and well-structured report based on collected findings.
|
| 877 |
+
The report should address the research topic thoroughly, synthesizing the information gathered from various sources.
|
| 878 |
+
Structure the report logically:
|
| 879 |
+
1. Briefly introduce the topic and the report's scope (mentioning the research plan followed, including categories and tasks, is good).
|
| 880 |
+
2. Discuss the key findings, organizing them thematically, possibly aligning with the research categories. Analyze, compare, and contrast information.
|
| 881 |
+
3. Summarize the main points and offer concluding thoughts.
|
| 882 |
+
|
| 883 |
+
Ensure the tone is objective and professional.
|
| 884 |
+
If findings are contradictory or incomplete, acknowledge this.
|
| 885 |
+
""", # Removed citation part for simplicity for now, as browser agent returns summaries.
|
| 886 |
+
),
|
| 887 |
+
(
|
| 888 |
+
"human",
|
| 889 |
+
f"""
|
| 890 |
+
**Research Topic:** {topic}
|
| 891 |
+
|
| 892 |
+
{plan_summary}
|
| 893 |
+
|
| 894 |
+
**Collected Findings:**
|
| 895 |
+
```
|
| 896 |
+
{formatted_results}
|
| 897 |
+
```
|
| 898 |
+
|
| 899 |
+
Please generate the final research report in Markdown format based **only** on the information above.
|
| 900 |
+
""",
|
| 901 |
+
),
|
| 902 |
+
]
|
| 903 |
+
)
|
| 904 |
+
|
| 905 |
+
try:
|
| 906 |
+
response = await llm.ainvoke(
|
| 907 |
+
synthesis_prompt.format_prompt(
|
| 908 |
+
topic=topic,
|
| 909 |
+
plan_summary=plan_summary,
|
| 910 |
+
formatted_results=formatted_results,
|
| 911 |
+
).to_messages()
|
| 912 |
+
)
|
| 913 |
+
final_report_md = response.content
|
| 914 |
+
|
| 915 |
+
# Append the reference list automatically to the end of the generated markdown
|
| 916 |
+
if references:
|
| 917 |
+
report_references_section = "\n\n## References\n\n"
|
| 918 |
+
# Sort refs by ID for consistent output
|
| 919 |
+
sorted_refs = sorted(references.values(), key=lambda x: x["id"])
|
| 920 |
+
for ref in sorted_refs:
|
| 921 |
+
report_references_section += (
|
| 922 |
+
f"[{ref['id']}] {ref['title']} - {ref['url']}\n"
|
| 923 |
+
)
|
| 924 |
+
final_report_md += report_references_section
|
| 925 |
+
|
| 926 |
+
logger.info("Successfully synthesized the final report.")
|
| 927 |
+
_save_report_to_md(final_report_md, output_dir)
|
| 928 |
+
return {"final_report": final_report_md}
|
| 929 |
+
|
| 930 |
+
except Exception as e:
|
| 931 |
+
logger.error(f"Error during report synthesis: {e}", exc_info=True)
|
| 932 |
+
return {"error_message": f"LLM Error during synthesis: {e}"}
|
| 933 |
+
|
| 934 |
+
|
| 935 |
+
# --- Langgraph Edges and Conditional Logic ---
|
| 936 |
+
|
| 937 |
+
|
| 938 |
+
def should_continue(state: DeepResearchState) -> str:
|
| 939 |
+
logger.info("--- Evaluating Condition: Should Continue? ---")
|
| 940 |
+
if state.get("stop_requested"):
|
| 941 |
+
logger.info("Stop requested, routing to END.")
|
| 942 |
+
return "end_run"
|
| 943 |
+
if state.get("error_message") and "Core Execution Error" in state["error_message"]: # Critical error in node
|
| 944 |
+
logger.warning(f"Critical error detected: {state['error_message']}. Routing to END.")
|
| 945 |
+
return "end_run"
|
| 946 |
+
|
| 947 |
+
plan = state.get("research_plan")
|
| 948 |
+
cat_idx = state.get("current_category_index", 0)
|
| 949 |
+
task_idx = state.get("current_task_index_in_category", 0) # This is the *next* task to check
|
| 950 |
+
|
| 951 |
+
if not plan:
|
| 952 |
+
logger.warning("No research plan found. Routing to END.")
|
| 953 |
+
return "end_run"
|
| 954 |
+
|
| 955 |
+
# Check if the current indices point to a valid pending task
|
| 956 |
+
if cat_idx < len(plan):
|
| 957 |
+
current_category = plan[cat_idx]
|
| 958 |
+
if task_idx < len(current_category["tasks"]):
|
| 959 |
+
# We are trying to execute the task at plan[cat_idx]["tasks"][task_idx]
|
| 960 |
+
# The research_execution_node will handle if it's already completed.
|
| 961 |
+
logger.info(
|
| 962 |
+
f"Plan has potential pending tasks (next up: Category {cat_idx}, Task {task_idx}). Routing to Research Execution."
|
| 963 |
+
)
|
| 964 |
+
return "execute_research"
|
| 965 |
+
else: # task_idx is out of bounds for current category, means we need to check next category
|
| 966 |
+
if cat_idx + 1 < len(plan): # If there is a next category
|
| 967 |
+
logger.info(
|
| 968 |
+
f"Finished tasks in category {cat_idx}. Moving to category {cat_idx + 1}. Routing to Research Execution."
|
| 969 |
+
)
|
| 970 |
+
# research_execution_node will update state to {current_category_index: cat_idx + 1, current_task_index_in_category: 0}
|
| 971 |
+
# Or rather, the previous execution node already set these indices to the start of the next category.
|
| 972 |
+
return "execute_research"
|
| 973 |
+
|
| 974 |
+
# If we've gone through all categories and tasks (cat_idx >= len(plan))
|
| 975 |
+
logger.info("All plan categories and tasks processed or current indices are out of bounds. Routing to Synthesis.")
|
| 976 |
+
return "synthesize_report"
|
| 977 |
+
|
| 978 |
+
|
| 979 |
+
# --- DeepSearchAgent Class ---
|
| 980 |
+
|
| 981 |
+
|
| 982 |
+
class DeepResearchAgent:
|
| 983 |
+
def __init__(
|
| 984 |
+
self,
|
| 985 |
+
llm: Any,
|
| 986 |
+
browser_config: Dict[str, Any],
|
| 987 |
+
mcp_server_config: Optional[Dict[str, Any]] = None,
|
| 988 |
+
):
|
| 989 |
+
"""
|
| 990 |
+
Initializes the DeepSearchAgent.
|
| 991 |
+
|
| 992 |
+
Args:
|
| 993 |
+
llm: The Langchain compatible language model instance.
|
| 994 |
+
browser_config: Configuration dictionary for the BrowserUseAgent tool.
|
| 995 |
+
Example: {"headless": True, "window_width": 1280, ...}
|
| 996 |
+
mcp_server_config: Optional configuration for the MCP client.
|
| 997 |
+
"""
|
| 998 |
+
self.llm = llm
|
| 999 |
+
self.browser_config = browser_config
|
| 1000 |
+
self.mcp_server_config = mcp_server_config
|
| 1001 |
+
self.mcp_client = None
|
| 1002 |
+
self.stopped = False
|
| 1003 |
+
self.graph = self._compile_graph()
|
| 1004 |
+
self.current_task_id: Optional[str] = None
|
| 1005 |
+
self.stop_event: Optional[threading.Event] = None
|
| 1006 |
+
self.runner: Optional[asyncio.Task] = None # To hold the asyncio task for run
|
| 1007 |
+
|
| 1008 |
+
async def _setup_tools(
|
| 1009 |
+
self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
|
| 1010 |
+
) -> List[Tool]:
|
| 1011 |
+
"""Sets up the basic tools (File I/O) and optional MCP tools."""
|
| 1012 |
+
tools = [
|
| 1013 |
+
WriteFileTool(),
|
| 1014 |
+
ReadFileTool(),
|
| 1015 |
+
ListDirectoryTool(),
|
| 1016 |
+
] # Basic file operations
|
| 1017 |
+
browser_use_tool = create_browser_search_tool(
|
| 1018 |
+
llm=self.llm,
|
| 1019 |
+
browser_config=self.browser_config,
|
| 1020 |
+
task_id=task_id,
|
| 1021 |
+
stop_event=stop_event,
|
| 1022 |
+
max_parallel_browsers=max_parallel_browsers,
|
| 1023 |
+
)
|
| 1024 |
+
tools += [browser_use_tool]
|
| 1025 |
+
# Add MCP tools if config is provided
|
| 1026 |
+
if self.mcp_server_config:
|
| 1027 |
+
try:
|
| 1028 |
+
logger.info("Setting up MCP client and tools...")
|
| 1029 |
+
if not self.mcp_client:
|
| 1030 |
+
self.mcp_client = await setup_mcp_client_and_tools(
|
| 1031 |
+
self.mcp_server_config
|
| 1032 |
+
)
|
| 1033 |
+
mcp_tools = self.mcp_client.get_tools()
|
| 1034 |
+
logger.info(f"Loaded {len(mcp_tools)} MCP tools.")
|
| 1035 |
+
tools.extend(mcp_tools)
|
| 1036 |
+
except Exception as e:
|
| 1037 |
+
logger.error(f"Failed to set up MCP tools: {e}", exc_info=True)
|
| 1038 |
+
elif self.mcp_server_config:
|
| 1039 |
+
logger.warning(
|
| 1040 |
+
"MCP server config provided, but setup function unavailable."
|
| 1041 |
+
)
|
| 1042 |
+
tools_map = {tool.name: tool for tool in tools}
|
| 1043 |
+
return tools_map.values()
|
| 1044 |
+
|
| 1045 |
+
async def close_mcp_client(self):
|
| 1046 |
+
if self.mcp_client:
|
| 1047 |
+
await self.mcp_client.__aexit__(None, None, None)
|
| 1048 |
+
self.mcp_client = None
|
| 1049 |
+
|
| 1050 |
+
def _compile_graph(self) -> StateGraph:
|
| 1051 |
+
"""Compiles the Langgraph state machine."""
|
| 1052 |
+
workflow = StateGraph(DeepResearchState)
|
| 1053 |
+
|
| 1054 |
+
# Add nodes
|
| 1055 |
+
workflow.add_node("plan_research", planning_node)
|
| 1056 |
+
workflow.add_node("execute_research", research_execution_node)
|
| 1057 |
+
workflow.add_node("synthesize_report", synthesis_node)
|
| 1058 |
+
workflow.add_node(
|
| 1059 |
+
"end_run", lambda state: logger.info("--- Reached End Run Node ---") or {}
|
| 1060 |
+
) # Simple end node
|
| 1061 |
+
|
| 1062 |
+
# Define edges
|
| 1063 |
+
workflow.set_entry_point("plan_research")
|
| 1064 |
+
|
| 1065 |
+
workflow.add_edge(
|
| 1066 |
+
"plan_research", "execute_research"
|
| 1067 |
+
) # Always execute after planning
|
| 1068 |
+
|
| 1069 |
+
# Conditional edge after execution
|
| 1070 |
+
workflow.add_conditional_edges(
|
| 1071 |
+
"execute_research",
|
| 1072 |
+
should_continue,
|
| 1073 |
+
{
|
| 1074 |
+
"execute_research": "execute_research", # Loop back if more steps
|
| 1075 |
+
"synthesize_report": "synthesize_report", # Move to synthesis if done
|
| 1076 |
+
"end_run": "end_run", # End if stop requested or error
|
| 1077 |
+
},
|
| 1078 |
+
)
|
| 1079 |
+
|
| 1080 |
+
workflow.add_edge("synthesize_report", "end_run") # End after synthesis
|
| 1081 |
+
|
| 1082 |
+
app = workflow.compile()
|
| 1083 |
+
return app
|
| 1084 |
+
|
| 1085 |
+
async def run(
|
| 1086 |
+
self,
|
| 1087 |
+
topic: str,
|
| 1088 |
+
task_id: Optional[str] = None,
|
| 1089 |
+
save_dir: str = "./tmp/deep_research",
|
| 1090 |
+
max_parallel_browsers: int = 1,
|
| 1091 |
+
) -> Dict[str, Any]:
|
| 1092 |
+
"""
|
| 1093 |
+
Starts the deep research process (Async Generator Version).
|
| 1094 |
+
|
| 1095 |
+
Args:
|
| 1096 |
+
topic: The research topic.
|
| 1097 |
+
task_id: Optional existing task ID to resume. If None, a new ID is generated.
|
| 1098 |
+
|
| 1099 |
+
Yields:
|
| 1100 |
+
Intermediate state updates or messages during execution.
|
| 1101 |
+
"""
|
| 1102 |
+
if self.runner and not self.runner.done():
|
| 1103 |
+
logger.warning(
|
| 1104 |
+
"Agent is already running. Please stop the current task first."
|
| 1105 |
+
)
|
| 1106 |
+
# Return an error status instead of yielding
|
| 1107 |
+
return {
|
| 1108 |
+
"status": "error",
|
| 1109 |
+
"message": "Agent already running.",
|
| 1110 |
+
"task_id": self.current_task_id,
|
| 1111 |
+
}
|
| 1112 |
+
|
| 1113 |
+
self.current_task_id = task_id if task_id else str(uuid.uuid4())
|
| 1114 |
+
safe_root_dir = "./tmp/deep_research"
|
| 1115 |
+
normalized_save_dir = os.path.normpath(save_dir)
|
| 1116 |
+
if not normalized_save_dir.startswith(os.path.abspath(safe_root_dir)):
|
| 1117 |
+
logger.warning(f"Unsafe save_dir detected: {save_dir}. Using default directory.")
|
| 1118 |
+
normalized_save_dir = os.path.abspath(safe_root_dir)
|
| 1119 |
+
output_dir = os.path.join(normalized_save_dir, self.current_task_id)
|
| 1120 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 1121 |
+
|
| 1122 |
+
logger.info(
|
| 1123 |
+
f"[AsyncGen] Starting research task ID: {self.current_task_id} for topic: '{topic}'"
|
| 1124 |
+
)
|
| 1125 |
+
logger.info(f"[AsyncGen] Output directory: {output_dir}")
|
| 1126 |
+
|
| 1127 |
+
self.stop_event = threading.Event()
|
| 1128 |
+
_AGENT_STOP_FLAGS[self.current_task_id] = self.stop_event
|
| 1129 |
+
agent_tools = await self._setup_tools(
|
| 1130 |
+
self.current_task_id, self.stop_event, max_parallel_browsers
|
| 1131 |
+
)
|
| 1132 |
+
initial_state: DeepResearchState = {
|
| 1133 |
+
"task_id": self.current_task_id,
|
| 1134 |
+
"topic": topic,
|
| 1135 |
+
"research_plan": [],
|
| 1136 |
+
"search_results": [],
|
| 1137 |
+
"messages": [],
|
| 1138 |
+
"llm": self.llm,
|
| 1139 |
+
"tools": agent_tools,
|
| 1140 |
+
"output_dir": Path(output_dir),
|
| 1141 |
+
"browser_config": self.browser_config,
|
| 1142 |
+
"final_report": None,
|
| 1143 |
+
"current_category_index": 0,
|
| 1144 |
+
"current_task_index_in_category": 0,
|
| 1145 |
+
"stop_requested": False,
|
| 1146 |
+
"error_message": None,
|
| 1147 |
+
}
|
| 1148 |
+
|
| 1149 |
+
if task_id:
|
| 1150 |
+
logger.info(f"Attempting to resume task {task_id}...")
|
| 1151 |
+
loaded_state = _load_previous_state(task_id, output_dir)
|
| 1152 |
+
initial_state.update(loaded_state)
|
| 1153 |
+
if loaded_state.get("research_plan"):
|
| 1154 |
+
logger.info(
|
| 1155 |
+
f"Resuming with {len(loaded_state['research_plan'])} plan categories "
|
| 1156 |
+
f"and {len(loaded_state.get('search_results', []))} existing results. "
|
| 1157 |
+
f"Next task: Cat {initial_state['current_category_index']}, Task {initial_state['current_task_index_in_category']}"
|
| 1158 |
+
)
|
| 1159 |
+
initial_state["topic"] = (
|
| 1160 |
+
topic # Allow overriding topic even when resuming? Or use stored topic? Let's use new one.
|
| 1161 |
+
)
|
| 1162 |
+
else:
|
| 1163 |
+
logger.warning(
|
| 1164 |
+
f"Resume requested for {task_id}, but no previous plan found. Starting fresh."
|
| 1165 |
+
)
|
| 1166 |
+
|
| 1167 |
+
# --- Execute Graph using ainvoke ---
|
| 1168 |
+
final_state = None
|
| 1169 |
+
status = "unknown"
|
| 1170 |
+
message = None
|
| 1171 |
+
try:
|
| 1172 |
+
logger.info(f"Invoking graph execution for task {self.current_task_id}...")
|
| 1173 |
+
self.runner = asyncio.create_task(self.graph.ainvoke(initial_state))
|
| 1174 |
+
final_state = await self.runner
|
| 1175 |
+
logger.info(f"Graph execution finished for task {self.current_task_id}.")
|
| 1176 |
+
|
| 1177 |
+
# Determine status based on final state
|
| 1178 |
+
if self.stop_event and self.stop_event.is_set():
|
| 1179 |
+
status = "stopped"
|
| 1180 |
+
message = "Research process was stopped by request."
|
| 1181 |
+
logger.info(message)
|
| 1182 |
+
elif final_state and final_state.get("error_message"):
|
| 1183 |
+
status = "error"
|
| 1184 |
+
message = final_state["error_message"]
|
| 1185 |
+
logger.error(f"Graph execution completed with error: {message}")
|
| 1186 |
+
elif final_state and final_state.get("final_report"):
|
| 1187 |
+
status = "completed"
|
| 1188 |
+
message = "Research process completed successfully."
|
| 1189 |
+
logger.info(message)
|
| 1190 |
+
else:
|
| 1191 |
+
# If it ends without error/report (e.g., empty plan, stopped before synthesis)
|
| 1192 |
+
status = "finished_incomplete"
|
| 1193 |
+
message = "Research process finished, but may be incomplete (no final report generated)."
|
| 1194 |
+
logger.warning(message)
|
| 1195 |
+
|
| 1196 |
+
except asyncio.CancelledError:
|
| 1197 |
+
status = "cancelled"
|
| 1198 |
+
message = f"Agent run task cancelled for {self.current_task_id}."
|
| 1199 |
+
logger.info(message)
|
| 1200 |
+
# final_state will remain None or the state before cancellation if checkpointing was used
|
| 1201 |
+
except Exception as e:
|
| 1202 |
+
status = "error"
|
| 1203 |
+
message = f"Unhandled error during graph execution for {self.current_task_id}: {e}"
|
| 1204 |
+
logger.error(message, exc_info=True)
|
| 1205 |
+
# final_state will remain None or the state before the error
|
| 1206 |
+
finally:
|
| 1207 |
+
logger.info(f"Cleaning up resources for task {self.current_task_id}")
|
| 1208 |
+
task_id_to_clean = self.current_task_id
|
| 1209 |
+
|
| 1210 |
+
self.stop_event = None
|
| 1211 |
+
self.current_task_id = None
|
| 1212 |
+
self.runner = None # Mark runner as finished
|
| 1213 |
+
if self.mcp_client:
|
| 1214 |
+
await self.mcp_client.__aexit__(None, None, None)
|
| 1215 |
+
|
| 1216 |
+
# Return a result dictionary including the status and the final state if available
|
| 1217 |
+
return {
|
| 1218 |
+
"status": status,
|
| 1219 |
+
"message": message,
|
| 1220 |
+
"task_id": task_id_to_clean, # Use the stored task_id
|
| 1221 |
+
"final_state": final_state
|
| 1222 |
+
if final_state
|
| 1223 |
+
else {}, # Return the final state dict
|
| 1224 |
+
}
|
| 1225 |
+
|
| 1226 |
+
async def _stop_lingering_browsers(self, task_id):
|
| 1227 |
+
"""Attempts to stop any BrowserUseAgent instances associated with the task_id."""
|
| 1228 |
+
keys_to_stop = [
|
| 1229 |
+
key for key in _BROWSER_AGENT_INSTANCES if key.startswith(f"{task_id}_")
|
| 1230 |
+
]
|
| 1231 |
+
if not keys_to_stop:
|
| 1232 |
+
return
|
| 1233 |
+
|
| 1234 |
+
logger.warning(
|
| 1235 |
+
f"Found {len(keys_to_stop)} potentially lingering browser agents for task {task_id}. Attempting stop..."
|
| 1236 |
+
)
|
| 1237 |
+
for key in keys_to_stop:
|
| 1238 |
+
agent_instance = _BROWSER_AGENT_INSTANCES.get(key)
|
| 1239 |
+
try:
|
| 1240 |
+
if agent_instance:
|
| 1241 |
+
# Assuming BU agent has an async stop method
|
| 1242 |
+
await agent_instance.stop()
|
| 1243 |
+
logger.info(f"Called stop() on browser agent instance {key}")
|
| 1244 |
+
except Exception as e:
|
| 1245 |
+
logger.error(
|
| 1246 |
+
f"Error calling stop() on browser agent instance {key}: {e}"
|
| 1247 |
+
)
|
| 1248 |
+
|
| 1249 |
+
async def stop(self):
|
| 1250 |
+
"""Signals the currently running agent task to stop."""
|
| 1251 |
+
if not self.current_task_id or not self.stop_event:
|
| 1252 |
+
logger.info("No agent task is currently running.")
|
| 1253 |
+
return
|
| 1254 |
+
|
| 1255 |
+
logger.info(f"Stop requested for task ID: {self.current_task_id}")
|
| 1256 |
+
self.stop_event.set() # Signal the stop event
|
| 1257 |
+
self.stopped = True
|
| 1258 |
+
await self._stop_lingering_browsers(self.current_task_id)
|
| 1259 |
+
|
| 1260 |
+
def close(self):
|
| 1261 |
+
self.stopped = False
|
src/browser/__init__.py
ADDED
|
File without changes
|
src/browser/custom_browser.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import pdb
|
| 3 |
+
|
| 4 |
+
from playwright.async_api import Browser as PlaywrightBrowser
|
| 5 |
+
from playwright.async_api import (
|
| 6 |
+
BrowserContext as PlaywrightBrowserContext,
|
| 7 |
+
)
|
| 8 |
+
from playwright.async_api import (
|
| 9 |
+
Playwright,
|
| 10 |
+
async_playwright,
|
| 11 |
+
)
|
| 12 |
+
from browser_use.browser.browser import Browser, IN_DOCKER
|
| 13 |
+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
| 14 |
+
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
| 15 |
+
import logging
|
| 16 |
+
|
| 17 |
+
from browser_use.browser.chrome import (
|
| 18 |
+
CHROME_ARGS,
|
| 19 |
+
CHROME_DETERMINISTIC_RENDERING_ARGS,
|
| 20 |
+
CHROME_DISABLE_SECURITY_ARGS,
|
| 21 |
+
CHROME_DOCKER_ARGS,
|
| 22 |
+
CHROME_HEADLESS_ARGS,
|
| 23 |
+
)
|
| 24 |
+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
| 25 |
+
from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
|
| 26 |
+
from browser_use.utils import time_execution_async
|
| 27 |
+
import socket
|
| 28 |
+
|
| 29 |
+
from .custom_context import CustomBrowserContext
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class CustomBrowser(Browser):
|
| 35 |
+
|
| 36 |
+
async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
|
| 37 |
+
"""Create a browser context"""
|
| 38 |
+
browser_config = self.config.model_dump() if self.config else {}
|
| 39 |
+
context_config = config.model_dump() if config else {}
|
| 40 |
+
merged_config = {**browser_config, **context_config}
|
| 41 |
+
return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
|
| 42 |
+
|
| 43 |
+
async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
| 44 |
+
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
| 45 |
+
assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
|
| 46 |
+
|
| 47 |
+
# Use the configured window size from new_context_config if available
|
| 48 |
+
if (
|
| 49 |
+
not self.config.headless
|
| 50 |
+
and hasattr(self.config, 'new_context_config')
|
| 51 |
+
and hasattr(self.config.new_context_config, 'window_width')
|
| 52 |
+
and hasattr(self.config.new_context_config, 'window_height')
|
| 53 |
+
):
|
| 54 |
+
screen_size = {
|
| 55 |
+
'width': self.config.new_context_config.window_width,
|
| 56 |
+
'height': self.config.new_context_config.window_height,
|
| 57 |
+
}
|
| 58 |
+
offset_x, offset_y = get_window_adjustments()
|
| 59 |
+
elif self.config.headless:
|
| 60 |
+
screen_size = {'width': 1920, 'height': 1080}
|
| 61 |
+
offset_x, offset_y = 0, 0
|
| 62 |
+
else:
|
| 63 |
+
screen_size = get_screen_resolution()
|
| 64 |
+
offset_x, offset_y = get_window_adjustments()
|
| 65 |
+
|
| 66 |
+
chrome_args = {
|
| 67 |
+
f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
|
| 68 |
+
*CHROME_ARGS,
|
| 69 |
+
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
|
| 70 |
+
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
|
| 71 |
+
*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
|
| 72 |
+
*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
|
| 73 |
+
f'--window-position={offset_x},{offset_y}',
|
| 74 |
+
f'--window-size={screen_size["width"]},{screen_size["height"]}',
|
| 75 |
+
*self.config.extra_browser_args,
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
# check if chrome remote debugging port is already taken,
|
| 79 |
+
# if so remove the remote-debugging-port arg to prevent conflicts
|
| 80 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
| 81 |
+
if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
|
| 82 |
+
chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
|
| 83 |
+
|
| 84 |
+
browser_class = getattr(playwright, self.config.browser_class)
|
| 85 |
+
args = {
|
| 86 |
+
'chromium': list(chrome_args),
|
| 87 |
+
'firefox': [
|
| 88 |
+
*{
|
| 89 |
+
'-no-remote',
|
| 90 |
+
*self.config.extra_browser_args,
|
| 91 |
+
}
|
| 92 |
+
],
|
| 93 |
+
'webkit': [
|
| 94 |
+
*{
|
| 95 |
+
'--no-startup-window',
|
| 96 |
+
*self.config.extra_browser_args,
|
| 97 |
+
}
|
| 98 |
+
],
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
browser = await browser_class.launch(
|
| 102 |
+
channel='chromium', # https://github.com/microsoft/playwright/issues/33566
|
| 103 |
+
headless=self.config.headless,
|
| 104 |
+
args=args[self.config.browser_class],
|
| 105 |
+
proxy=self.config.proxy.model_dump() if self.config.proxy else None,
|
| 106 |
+
handle_sigterm=False,
|
| 107 |
+
handle_sigint=False,
|
| 108 |
+
)
|
| 109 |
+
return browser
|
src/browser/custom_context.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
from browser_use.browser.browser import Browser, IN_DOCKER
|
| 6 |
+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
| 7 |
+
from playwright.async_api import Browser as PlaywrightBrowser
|
| 8 |
+
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from browser_use.browser.context import BrowserContextState
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class CustomBrowserContext(BrowserContext):
|
| 16 |
+
def __init__(
|
| 17 |
+
self,
|
| 18 |
+
browser: 'Browser',
|
| 19 |
+
config: BrowserContextConfig | None = None,
|
| 20 |
+
state: Optional[BrowserContextState] = None,
|
| 21 |
+
):
|
| 22 |
+
super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)
|
src/controller/__init__.py
ADDED
|
File without changes
|
src/controller/custom_controller.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pdb
|
| 2 |
+
|
| 3 |
+
import pyperclip
|
| 4 |
+
from typing import Optional, Type, Callable, Dict, Any, Union, Awaitable, TypeVar
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from browser_use.agent.views import ActionResult
|
| 7 |
+
from browser_use.browser.context import BrowserContext
|
| 8 |
+
from browser_use.controller.service import Controller, DoneAction
|
| 9 |
+
from browser_use.controller.registry.service import Registry, RegisteredAction
|
| 10 |
+
from main_content_extractor import MainContentExtractor
|
| 11 |
+
from browser_use.controller.views import (
|
| 12 |
+
ClickElementAction,
|
| 13 |
+
DoneAction,
|
| 14 |
+
ExtractPageContentAction,
|
| 15 |
+
GoToUrlAction,
|
| 16 |
+
InputTextAction,
|
| 17 |
+
OpenTabAction,
|
| 18 |
+
ScrollAction,
|
| 19 |
+
SearchGoogleAction,
|
| 20 |
+
SendKeysAction,
|
| 21 |
+
SwitchTabAction,
|
| 22 |
+
)
|
| 23 |
+
import logging
|
| 24 |
+
import inspect
|
| 25 |
+
import asyncio
|
| 26 |
+
import os
|
| 27 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
| 28 |
+
from browser_use.agent.views import ActionModel, ActionResult
|
| 29 |
+
|
| 30 |
+
from src.utils.mcp_client import create_tool_param_model, setup_mcp_client_and_tools
|
| 31 |
+
|
| 32 |
+
from browser_use.utils import time_execution_sync
|
| 33 |
+
|
| 34 |
+
logger = logging.getLogger(__name__)
|
| 35 |
+
|
| 36 |
+
Context = TypeVar('Context')
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class CustomController(Controller):
|
| 40 |
+
def __init__(self, exclude_actions: list[str] = [],
|
| 41 |
+
output_model: Optional[Type[BaseModel]] = None,
|
| 42 |
+
ask_assistant_callback: Optional[Union[Callable[[str, BrowserContext], Dict[str, Any]], Callable[
|
| 43 |
+
[str, BrowserContext], Awaitable[Dict[str, Any]]]]] = None,
|
| 44 |
+
):
|
| 45 |
+
super().__init__(exclude_actions=exclude_actions, output_model=output_model)
|
| 46 |
+
self._register_custom_actions()
|
| 47 |
+
self.ask_assistant_callback = ask_assistant_callback
|
| 48 |
+
self.mcp_client = None
|
| 49 |
+
self.mcp_server_config = None
|
| 50 |
+
|
| 51 |
+
def _register_custom_actions(self):
|
| 52 |
+
"""Register all custom browser actions"""
|
| 53 |
+
|
| 54 |
+
@self.registry.action(
|
| 55 |
+
"When executing tasks, prioritize autonomous completion. However, if you encounter a definitive blocker "
|
| 56 |
+
"that prevents you from proceeding independently – such as needing credentials you don't possess, "
|
| 57 |
+
"requiring subjective human judgment, needing a physical action performed, encountering complex CAPTCHAs, "
|
| 58 |
+
"or facing limitations in your capabilities – you must request human assistance."
|
| 59 |
+
)
|
| 60 |
+
async def ask_for_assistant(query: str, browser: BrowserContext):
|
| 61 |
+
if self.ask_assistant_callback:
|
| 62 |
+
if inspect.iscoroutinefunction(self.ask_assistant_callback):
|
| 63 |
+
user_response = await self.ask_assistant_callback(query, browser)
|
| 64 |
+
else:
|
| 65 |
+
user_response = self.ask_assistant_callback(query, browser)
|
| 66 |
+
msg = f"AI ask: {query}. User response: {user_response['response']}"
|
| 67 |
+
logger.info(msg)
|
| 68 |
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
| 69 |
+
else:
|
| 70 |
+
return ActionResult(extracted_content="Human cannot help you. Please try another way.",
|
| 71 |
+
include_in_memory=True)
|
| 72 |
+
|
| 73 |
+
@self.registry.action(
|
| 74 |
+
'Upload file to interactive element with file path ',
|
| 75 |
+
)
|
| 76 |
+
async def upload_file(index: int, path: str, browser: BrowserContext, available_file_paths: list[str]):
|
| 77 |
+
if path not in available_file_paths:
|
| 78 |
+
return ActionResult(error=f'File path {path} is not available')
|
| 79 |
+
|
| 80 |
+
if not os.path.exists(path):
|
| 81 |
+
return ActionResult(error=f'File {path} does not exist')
|
| 82 |
+
|
| 83 |
+
dom_el = await browser.get_dom_element_by_index(index)
|
| 84 |
+
|
| 85 |
+
file_upload_dom_el = dom_el.get_file_upload_element()
|
| 86 |
+
|
| 87 |
+
if file_upload_dom_el is None:
|
| 88 |
+
msg = f'No file upload element found at index {index}'
|
| 89 |
+
logger.info(msg)
|
| 90 |
+
return ActionResult(error=msg)
|
| 91 |
+
|
| 92 |
+
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
|
| 93 |
+
|
| 94 |
+
if file_upload_el is None:
|
| 95 |
+
msg = f'No file upload element found at index {index}'
|
| 96 |
+
logger.info(msg)
|
| 97 |
+
return ActionResult(error=msg)
|
| 98 |
+
|
| 99 |
+
try:
|
| 100 |
+
await file_upload_el.set_input_files(path)
|
| 101 |
+
msg = f'Successfully uploaded file to index {index}'
|
| 102 |
+
logger.info(msg)
|
| 103 |
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
| 104 |
+
except Exception as e:
|
| 105 |
+
msg = f'Failed to upload file to index {index}: {str(e)}'
|
| 106 |
+
logger.info(msg)
|
| 107 |
+
return ActionResult(error=msg)
|
| 108 |
+
|
| 109 |
+
@time_execution_sync('--act')
|
| 110 |
+
async def act(
|
| 111 |
+
self,
|
| 112 |
+
action: ActionModel,
|
| 113 |
+
browser_context: Optional[BrowserContext] = None,
|
| 114 |
+
#
|
| 115 |
+
page_extraction_llm: Optional[BaseChatModel] = None,
|
| 116 |
+
sensitive_data: Optional[Dict[str, str]] = None,
|
| 117 |
+
available_file_paths: Optional[list[str]] = None,
|
| 118 |
+
#
|
| 119 |
+
context: Context | None = None,
|
| 120 |
+
) -> ActionResult:
|
| 121 |
+
"""Execute an action"""
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
for action_name, params in action.model_dump(exclude_unset=True).items():
|
| 125 |
+
if params is not None:
|
| 126 |
+
if action_name.startswith("mcp"):
|
| 127 |
+
# this is a mcp tool
|
| 128 |
+
logger.debug(f"Invoke MCP tool: {action_name}")
|
| 129 |
+
mcp_tool = self.registry.registry.actions.get(action_name).function
|
| 130 |
+
result = await mcp_tool.ainvoke(params)
|
| 131 |
+
else:
|
| 132 |
+
result = await self.registry.execute_action(
|
| 133 |
+
action_name,
|
| 134 |
+
params,
|
| 135 |
+
browser=browser_context,
|
| 136 |
+
page_extraction_llm=page_extraction_llm,
|
| 137 |
+
sensitive_data=sensitive_data,
|
| 138 |
+
available_file_paths=available_file_paths,
|
| 139 |
+
context=context,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
if isinstance(result, str):
|
| 143 |
+
return ActionResult(extracted_content=result)
|
| 144 |
+
elif isinstance(result, ActionResult):
|
| 145 |
+
return result
|
| 146 |
+
elif result is None:
|
| 147 |
+
return ActionResult()
|
| 148 |
+
else:
|
| 149 |
+
raise ValueError(f'Invalid action result type: {type(result)} of {result}')
|
| 150 |
+
return ActionResult()
|
| 151 |
+
except Exception as e:
|
| 152 |
+
raise e
|
| 153 |
+
|
| 154 |
+
async def setup_mcp_client(self, mcp_server_config: Optional[Dict[str, Any]] = None):
|
| 155 |
+
self.mcp_server_config = mcp_server_config
|
| 156 |
+
if self.mcp_server_config:
|
| 157 |
+
self.mcp_client = await setup_mcp_client_and_tools(self.mcp_server_config)
|
| 158 |
+
self.register_mcp_tools()
|
| 159 |
+
|
| 160 |
+
def register_mcp_tools(self):
|
| 161 |
+
"""
|
| 162 |
+
Register the MCP tools used by this controller.
|
| 163 |
+
"""
|
| 164 |
+
if self.mcp_client:
|
| 165 |
+
for server_name in self.mcp_client.server_name_to_tools:
|
| 166 |
+
for tool in self.mcp_client.server_name_to_tools[server_name]:
|
| 167 |
+
tool_name = f"mcp.{server_name}.{tool.name}"
|
| 168 |
+
self.registry.registry.actions[tool_name] = RegisteredAction(
|
| 169 |
+
name=tool_name,
|
| 170 |
+
description=tool.description,
|
| 171 |
+
function=tool,
|
| 172 |
+
param_model=create_tool_param_model(tool),
|
| 173 |
+
)
|
| 174 |
+
logger.info(f"Add mcp tool: {tool_name}")
|
| 175 |
+
logger.debug(
|
| 176 |
+
f"Registered {len(self.mcp_client.server_name_to_tools[server_name])} mcp tools for {server_name}")
|
| 177 |
+
else:
|
| 178 |
+
logger.warning(f"MCP client not started.")
|
| 179 |
+
|
| 180 |
+
async def close_mcp_client(self):
|
| 181 |
+
if self.mcp_client:
|
| 182 |
+
await self.mcp_client.__aexit__(None, None, None)
|
src/utils/__init__.py
ADDED
|
File without changes
|
src/utils/config.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PROVIDER_DISPLAY_NAMES = {
|
| 2 |
+
"openai": "OpenAI",
|
| 3 |
+
"azure_openai": "Azure OpenAI",
|
| 4 |
+
"anthropic": "Anthropic",
|
| 5 |
+
"deepseek": "DeepSeek",
|
| 6 |
+
"google": "Google",
|
| 7 |
+
"alibaba": "Alibaba",
|
| 8 |
+
"moonshot": "MoonShot",
|
| 9 |
+
"unbound": "Unbound AI",
|
| 10 |
+
"ibm": "IBM",
|
| 11 |
+
"grok": "Grok",
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
# Predefined model names for common providers
|
| 15 |
+
model_names = {
|
| 16 |
+
"anthropic": ["claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-opus-20240229"],
|
| 17 |
+
"openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "o3-mini"],
|
| 18 |
+
"deepseek": ["deepseek-chat", "deepseek-reasoner"],
|
| 19 |
+
"google": ["gemini-2.0-flash", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest",
|
| 20 |
+
"gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05",
|
| 21 |
+
"gemini-2.5-pro-preview-03-25", "gemini-2.5-flash-preview-04-17"],
|
| 22 |
+
"ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b",
|
| 23 |
+
"deepseek-r1:14b", "deepseek-r1:32b"],
|
| 24 |
+
"azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
|
| 25 |
+
"mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
|
| 26 |
+
"alibaba": ["qwen-plus", "qwen-max", "qwen-vl-max", "qwen-vl-plus", "qwen-turbo", "qwen-long"],
|
| 27 |
+
"moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
|
| 28 |
+
"unbound": ["gemini-2.0-flash", "gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"],
|
| 29 |
+
"grok": [
|
| 30 |
+
"grok-3",
|
| 31 |
+
"grok-3-fast",
|
| 32 |
+
"grok-3-mini",
|
| 33 |
+
"grok-3-mini-fast",
|
| 34 |
+
"grok-2-vision",
|
| 35 |
+
"grok-2-image",
|
| 36 |
+
"grok-2",
|
| 37 |
+
],
|
| 38 |
+
"siliconflow": [
|
| 39 |
+
"deepseek-ai/DeepSeek-R1",
|
| 40 |
+
"deepseek-ai/DeepSeek-V3",
|
| 41 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
| 42 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
| 43 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
| 44 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
| 45 |
+
"deepseek-ai/DeepSeek-V2.5",
|
| 46 |
+
"deepseek-ai/deepseek-vl2",
|
| 47 |
+
"Qwen/Qwen2.5-72B-Instruct-128K",
|
| 48 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
| 49 |
+
"Qwen/Qwen2.5-32B-Instruct",
|
| 50 |
+
"Qwen/Qwen2.5-14B-Instruct",
|
| 51 |
+
"Qwen/Qwen2.5-7B-Instruct",
|
| 52 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 53 |
+
"Qwen/Qwen2.5-Coder-7B-Instruct",
|
| 54 |
+
"Qwen/Qwen2-7B-Instruct",
|
| 55 |
+
"Qwen/Qwen2-1.5B-Instruct",
|
| 56 |
+
"Qwen/QwQ-32B-Preview",
|
| 57 |
+
"Qwen/Qwen2-VL-72B-Instruct",
|
| 58 |
+
"Qwen/Qwen2.5-VL-32B-Instruct",
|
| 59 |
+
"Qwen/Qwen2.5-VL-72B-Instruct",
|
| 60 |
+
"TeleAI/TeleChat2",
|
| 61 |
+
"THUDM/glm-4-9b-chat",
|
| 62 |
+
"Vendor-A/Qwen/Qwen2.5-72B-Instruct",
|
| 63 |
+
"internlm/internlm2_5-7b-chat",
|
| 64 |
+
"internlm/internlm2_5-20b-chat",
|
| 65 |
+
"Pro/Qwen/Qwen2.5-7B-Instruct",
|
| 66 |
+
"Pro/Qwen/Qwen2-7B-Instruct",
|
| 67 |
+
"Pro/Qwen/Qwen2-1.5B-Instruct",
|
| 68 |
+
"Pro/THUDM/chatglm3-6b",
|
| 69 |
+
"Pro/THUDM/glm-4-9b-chat",
|
| 70 |
+
],
|
| 71 |
+
"ibm": ["ibm/granite-vision-3.1-2b-preview", "meta-llama/llama-4-maverick-17b-128e-instruct-fp8",
|
| 72 |
+
"meta-llama/llama-3-2-90b-vision-instruct"],
|
| 73 |
+
"modelscope":[
|
| 74 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 75 |
+
"Qwen/Qwen2.5-Coder-14B-Instruct",
|
| 76 |
+
"Qwen/Qwen2.5-Coder-7B-Instruct",
|
| 77 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
| 78 |
+
"Qwen/Qwen2.5-32B-Instruct",
|
| 79 |
+
"Qwen/Qwen2.5-14B-Instruct",
|
| 80 |
+
"Qwen/Qwen2.5-7B-Instruct",
|
| 81 |
+
"Qwen/QwQ-32B-Preview",
|
| 82 |
+
"Qwen/Qwen2.5-VL-3B-Instruct",
|
| 83 |
+
"Qwen/Qwen2.5-VL-7B-Instruct",
|
| 84 |
+
"Qwen/Qwen2.5-VL-32B-Instruct",
|
| 85 |
+
"Qwen/Qwen2.5-VL-72B-Instruct",
|
| 86 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
| 87 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
| 88 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
| 89 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
| 90 |
+
"deepseek-ai/DeepSeek-R1",
|
| 91 |
+
"deepseek-ai/DeepSeek-V3",
|
| 92 |
+
"Qwen/Qwen3-1.7B",
|
| 93 |
+
"Qwen/Qwen3-4B",
|
| 94 |
+
"Qwen/Qwen3-8B",
|
| 95 |
+
"Qwen/Qwen3-14B",
|
| 96 |
+
"Qwen/Qwen3-30B-A3B",
|
| 97 |
+
"Qwen/Qwen3-32B",
|
| 98 |
+
"Qwen/Qwen3-235B-A22B",
|
| 99 |
+
],
|
| 100 |
+
}
|
src/utils/llm_provider.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
import pdb
|
| 3 |
+
from langchain_openai import ChatOpenAI
|
| 4 |
+
from langchain_core.globals import get_llm_cache
|
| 5 |
+
from langchain_core.language_models.base import (
|
| 6 |
+
BaseLanguageModel,
|
| 7 |
+
LangSmithParams,
|
| 8 |
+
LanguageModelInput,
|
| 9 |
+
)
|
| 10 |
+
import os
|
| 11 |
+
from langchain_core.load import dumpd, dumps
|
| 12 |
+
from langchain_core.messages import (
|
| 13 |
+
AIMessage,
|
| 14 |
+
SystemMessage,
|
| 15 |
+
AnyMessage,
|
| 16 |
+
BaseMessage,
|
| 17 |
+
BaseMessageChunk,
|
| 18 |
+
HumanMessage,
|
| 19 |
+
convert_to_messages,
|
| 20 |
+
message_chunk_to_message,
|
| 21 |
+
)
|
| 22 |
+
from langchain_core.outputs import (
|
| 23 |
+
ChatGeneration,
|
| 24 |
+
ChatGenerationChunk,
|
| 25 |
+
ChatResult,
|
| 26 |
+
LLMResult,
|
| 27 |
+
RunInfo,
|
| 28 |
+
)
|
| 29 |
+
from langchain_ollama import ChatOllama
|
| 30 |
+
from langchain_core.output_parsers.base import OutputParserLike
|
| 31 |
+
from langchain_core.runnables import Runnable, RunnableConfig
|
| 32 |
+
from langchain_core.tools import BaseTool
|
| 33 |
+
|
| 34 |
+
from typing import (
|
| 35 |
+
TYPE_CHECKING,
|
| 36 |
+
Any,
|
| 37 |
+
Callable,
|
| 38 |
+
Literal,
|
| 39 |
+
Optional,
|
| 40 |
+
Union,
|
| 41 |
+
cast, List,
|
| 42 |
+
)
|
| 43 |
+
from langchain_anthropic import ChatAnthropic
|
| 44 |
+
from langchain_mistralai import ChatMistralAI
|
| 45 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 46 |
+
from langchain_ollama import ChatOllama
|
| 47 |
+
from langchain_openai import AzureChatOpenAI, ChatOpenAI
|
| 48 |
+
from langchain_ibm import ChatWatsonx
|
| 49 |
+
from langchain_aws import ChatBedrock
|
| 50 |
+
from pydantic import SecretStr
|
| 51 |
+
|
| 52 |
+
from src.utils import config
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class DeepSeekR1ChatOpenAI(ChatOpenAI):
|
| 56 |
+
|
| 57 |
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
| 58 |
+
super().__init__(*args, **kwargs)
|
| 59 |
+
self.client = OpenAI(
|
| 60 |
+
base_url=kwargs.get("base_url"),
|
| 61 |
+
api_key=kwargs.get("api_key")
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
async def ainvoke(
|
| 65 |
+
self,
|
| 66 |
+
input: LanguageModelInput,
|
| 67 |
+
config: Optional[RunnableConfig] = None,
|
| 68 |
+
*,
|
| 69 |
+
stop: Optional[list[str]] = None,
|
| 70 |
+
**kwargs: Any,
|
| 71 |
+
) -> AIMessage:
|
| 72 |
+
message_history = []
|
| 73 |
+
for input_ in input:
|
| 74 |
+
if isinstance(input_, SystemMessage):
|
| 75 |
+
message_history.append({"role": "system", "content": input_.content})
|
| 76 |
+
elif isinstance(input_, AIMessage):
|
| 77 |
+
message_history.append({"role": "assistant", "content": input_.content})
|
| 78 |
+
else:
|
| 79 |
+
message_history.append({"role": "user", "content": input_.content})
|
| 80 |
+
|
| 81 |
+
response = self.client.chat.completions.create(
|
| 82 |
+
model=self.model_name,
|
| 83 |
+
messages=message_history
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
reasoning_content = response.choices[0].message.reasoning_content
|
| 87 |
+
content = response.choices[0].message.content
|
| 88 |
+
return AIMessage(content=content, reasoning_content=reasoning_content)
|
| 89 |
+
|
| 90 |
+
def invoke(
|
| 91 |
+
self,
|
| 92 |
+
input: LanguageModelInput,
|
| 93 |
+
config: Optional[RunnableConfig] = None,
|
| 94 |
+
*,
|
| 95 |
+
stop: Optional[list[str]] = None,
|
| 96 |
+
**kwargs: Any,
|
| 97 |
+
) -> AIMessage:
|
| 98 |
+
message_history = []
|
| 99 |
+
for input_ in input:
|
| 100 |
+
if isinstance(input_, SystemMessage):
|
| 101 |
+
message_history.append({"role": "system", "content": input_.content})
|
| 102 |
+
elif isinstance(input_, AIMessage):
|
| 103 |
+
message_history.append({"role": "assistant", "content": input_.content})
|
| 104 |
+
else:
|
| 105 |
+
message_history.append({"role": "user", "content": input_.content})
|
| 106 |
+
|
| 107 |
+
response = self.client.chat.completions.create(
|
| 108 |
+
model=self.model_name,
|
| 109 |
+
messages=message_history
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
reasoning_content = response.choices[0].message.reasoning_content
|
| 113 |
+
content = response.choices[0].message.content
|
| 114 |
+
return AIMessage(content=content, reasoning_content=reasoning_content)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
class DeepSeekR1ChatOllama(ChatOllama):
|
| 118 |
+
|
| 119 |
+
async def ainvoke(
|
| 120 |
+
self,
|
| 121 |
+
input: LanguageModelInput,
|
| 122 |
+
config: Optional[RunnableConfig] = None,
|
| 123 |
+
*,
|
| 124 |
+
stop: Optional[list[str]] = None,
|
| 125 |
+
**kwargs: Any,
|
| 126 |
+
) -> AIMessage:
|
| 127 |
+
org_ai_message = await super().ainvoke(input=input)
|
| 128 |
+
org_content = org_ai_message.content
|
| 129 |
+
reasoning_content = org_content.split("</think>")[0].replace("<think>", "")
|
| 130 |
+
content = org_content.split("</think>")[1]
|
| 131 |
+
if "**JSON Response:**" in content:
|
| 132 |
+
content = content.split("**JSON Response:**")[-1]
|
| 133 |
+
return AIMessage(content=content, reasoning_content=reasoning_content)
|
| 134 |
+
|
| 135 |
+
def invoke(
|
| 136 |
+
self,
|
| 137 |
+
input: LanguageModelInput,
|
| 138 |
+
config: Optional[RunnableConfig] = None,
|
| 139 |
+
*,
|
| 140 |
+
stop: Optional[list[str]] = None,
|
| 141 |
+
**kwargs: Any,
|
| 142 |
+
) -> AIMessage:
|
| 143 |
+
org_ai_message = super().invoke(input=input)
|
| 144 |
+
org_content = org_ai_message.content
|
| 145 |
+
reasoning_content = org_content.split("</think>")[0].replace("<think>", "")
|
| 146 |
+
content = org_content.split("</think>")[1]
|
| 147 |
+
if "**JSON Response:**" in content:
|
| 148 |
+
content = content.split("**JSON Response:**")[-1]
|
| 149 |
+
return AIMessage(content=content, reasoning_content=reasoning_content)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def get_llm_model(provider: str, **kwargs):
|
| 153 |
+
"""
|
| 154 |
+
Get LLM model
|
| 155 |
+
:param provider: LLM provider
|
| 156 |
+
:param kwargs:
|
| 157 |
+
:return:
|
| 158 |
+
"""
|
| 159 |
+
if provider not in ["ollama", "bedrock"]:
|
| 160 |
+
env_var = f"{provider.upper()}_API_KEY"
|
| 161 |
+
api_key = kwargs.get("api_key", "") or os.getenv(env_var, "")
|
| 162 |
+
if not api_key:
|
| 163 |
+
provider_display = config.PROVIDER_DISPLAY_NAMES.get(provider, provider.upper())
|
| 164 |
+
error_msg = f"💥 {provider_display} API key not found! 🔑 Please set the `{env_var}` environment variable or provide it in the UI."
|
| 165 |
+
raise ValueError(error_msg)
|
| 166 |
+
kwargs["api_key"] = api_key
|
| 167 |
+
|
| 168 |
+
if provider == "anthropic":
|
| 169 |
+
if not kwargs.get("base_url", ""):
|
| 170 |
+
base_url = "https://api.anthropic.com"
|
| 171 |
+
else:
|
| 172 |
+
base_url = kwargs.get("base_url")
|
| 173 |
+
|
| 174 |
+
return ChatAnthropic(
|
| 175 |
+
model=kwargs.get("model_name", "claude-3-5-sonnet-20241022"),
|
| 176 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 177 |
+
base_url=base_url,
|
| 178 |
+
api_key=api_key,
|
| 179 |
+
)
|
| 180 |
+
elif provider == 'mistral':
|
| 181 |
+
if not kwargs.get("base_url", ""):
|
| 182 |
+
base_url = os.getenv("MISTRAL_ENDPOINT", "https://api.mistral.ai/v1")
|
| 183 |
+
else:
|
| 184 |
+
base_url = kwargs.get("base_url")
|
| 185 |
+
if not kwargs.get("api_key", ""):
|
| 186 |
+
api_key = os.getenv("MISTRAL_API_KEY", "")
|
| 187 |
+
else:
|
| 188 |
+
api_key = kwargs.get("api_key")
|
| 189 |
+
|
| 190 |
+
return ChatMistralAI(
|
| 191 |
+
model=kwargs.get("model_name", "mistral-large-latest"),
|
| 192 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 193 |
+
base_url=base_url,
|
| 194 |
+
api_key=api_key,
|
| 195 |
+
)
|
| 196 |
+
elif provider == "openai":
|
| 197 |
+
if not kwargs.get("base_url", ""):
|
| 198 |
+
base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
|
| 199 |
+
else:
|
| 200 |
+
base_url = kwargs.get("base_url")
|
| 201 |
+
|
| 202 |
+
return ChatOpenAI(
|
| 203 |
+
model=kwargs.get("model_name", "gpt-4o"),
|
| 204 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 205 |
+
base_url=base_url,
|
| 206 |
+
api_key=api_key,
|
| 207 |
+
)
|
| 208 |
+
elif provider == "grok":
|
| 209 |
+
if not kwargs.get("base_url", ""):
|
| 210 |
+
base_url = os.getenv("GROK_ENDPOINT", "https://api.x.ai/v1")
|
| 211 |
+
else:
|
| 212 |
+
base_url = kwargs.get("base_url")
|
| 213 |
+
|
| 214 |
+
return ChatOpenAI(
|
| 215 |
+
model=kwargs.get("model_name", "grok-3"),
|
| 216 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 217 |
+
base_url=base_url,
|
| 218 |
+
api_key=api_key,
|
| 219 |
+
)
|
| 220 |
+
elif provider == "deepseek":
|
| 221 |
+
if not kwargs.get("base_url", ""):
|
| 222 |
+
base_url = os.getenv("DEEPSEEK_ENDPOINT", "")
|
| 223 |
+
else:
|
| 224 |
+
base_url = kwargs.get("base_url")
|
| 225 |
+
|
| 226 |
+
if kwargs.get("model_name", "deepseek-chat") == "deepseek-reasoner":
|
| 227 |
+
return DeepSeekR1ChatOpenAI(
|
| 228 |
+
model=kwargs.get("model_name", "deepseek-reasoner"),
|
| 229 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 230 |
+
base_url=base_url,
|
| 231 |
+
api_key=api_key,
|
| 232 |
+
)
|
| 233 |
+
else:
|
| 234 |
+
return ChatOpenAI(
|
| 235 |
+
model=kwargs.get("model_name", "deepseek-chat"),
|
| 236 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 237 |
+
base_url=base_url,
|
| 238 |
+
api_key=api_key,
|
| 239 |
+
)
|
| 240 |
+
elif provider == "google":
|
| 241 |
+
return ChatGoogleGenerativeAI(
|
| 242 |
+
model=kwargs.get("model_name", "gemini-2.0-flash-exp"),
|
| 243 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 244 |
+
api_key=api_key,
|
| 245 |
+
)
|
| 246 |
+
elif provider == "ollama":
|
| 247 |
+
if not kwargs.get("base_url", ""):
|
| 248 |
+
base_url = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
|
| 249 |
+
else:
|
| 250 |
+
base_url = kwargs.get("base_url")
|
| 251 |
+
|
| 252 |
+
if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"):
|
| 253 |
+
return DeepSeekR1ChatOllama(
|
| 254 |
+
model=kwargs.get("model_name", "deepseek-r1:14b"),
|
| 255 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 256 |
+
num_ctx=kwargs.get("num_ctx", 32000),
|
| 257 |
+
base_url=base_url,
|
| 258 |
+
)
|
| 259 |
+
else:
|
| 260 |
+
return ChatOllama(
|
| 261 |
+
model=kwargs.get("model_name", "qwen2.5:7b"),
|
| 262 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 263 |
+
num_ctx=kwargs.get("num_ctx", 32000),
|
| 264 |
+
num_predict=kwargs.get("num_predict", 1024),
|
| 265 |
+
base_url=base_url,
|
| 266 |
+
)
|
| 267 |
+
elif provider == "azure_openai":
|
| 268 |
+
if not kwargs.get("base_url", ""):
|
| 269 |
+
base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
|
| 270 |
+
else:
|
| 271 |
+
base_url = kwargs.get("base_url")
|
| 272 |
+
api_version = kwargs.get("api_version", "") or os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview")
|
| 273 |
+
return AzureChatOpenAI(
|
| 274 |
+
model=kwargs.get("model_name", "gpt-4o"),
|
| 275 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 276 |
+
api_version=api_version,
|
| 277 |
+
azure_endpoint=base_url,
|
| 278 |
+
api_key=api_key,
|
| 279 |
+
)
|
| 280 |
+
elif provider == "alibaba":
|
| 281 |
+
if not kwargs.get("base_url", ""):
|
| 282 |
+
base_url = os.getenv("ALIBABA_ENDPOINT", "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
| 283 |
+
else:
|
| 284 |
+
base_url = kwargs.get("base_url")
|
| 285 |
+
|
| 286 |
+
return ChatOpenAI(
|
| 287 |
+
model=kwargs.get("model_name", "qwen-plus"),
|
| 288 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 289 |
+
base_url=base_url,
|
| 290 |
+
api_key=api_key,
|
| 291 |
+
)
|
| 292 |
+
elif provider == "ibm":
|
| 293 |
+
parameters = {
|
| 294 |
+
"temperature": kwargs.get("temperature", 0.0),
|
| 295 |
+
"max_tokens": kwargs.get("num_ctx", 32000)
|
| 296 |
+
}
|
| 297 |
+
if not kwargs.get("base_url", ""):
|
| 298 |
+
base_url = os.getenv("IBM_ENDPOINT", "https://us-south.ml.cloud.ibm.com")
|
| 299 |
+
else:
|
| 300 |
+
base_url = kwargs.get("base_url")
|
| 301 |
+
|
| 302 |
+
return ChatWatsonx(
|
| 303 |
+
model_id=kwargs.get("model_name", "ibm/granite-vision-3.1-2b-preview"),
|
| 304 |
+
url=base_url,
|
| 305 |
+
project_id=os.getenv("IBM_PROJECT_ID"),
|
| 306 |
+
apikey=os.getenv("IBM_API_KEY"),
|
| 307 |
+
params=parameters
|
| 308 |
+
)
|
| 309 |
+
elif provider == "moonshot":
|
| 310 |
+
return ChatOpenAI(
|
| 311 |
+
model=kwargs.get("model_name", "moonshot-v1-32k-vision-preview"),
|
| 312 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 313 |
+
base_url=os.getenv("MOONSHOT_ENDPOINT"),
|
| 314 |
+
api_key=os.getenv("MOONSHOT_API_KEY"),
|
| 315 |
+
)
|
| 316 |
+
elif provider == "unbound":
|
| 317 |
+
return ChatOpenAI(
|
| 318 |
+
model=kwargs.get("model_name", "gpt-4o-mini"),
|
| 319 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 320 |
+
base_url=os.getenv("UNBOUND_ENDPOINT", "https://api.getunbound.ai"),
|
| 321 |
+
api_key=api_key,
|
| 322 |
+
)
|
| 323 |
+
elif provider == "siliconflow":
|
| 324 |
+
if not kwargs.get("api_key", ""):
|
| 325 |
+
api_key = os.getenv("SiliconFLOW_API_KEY", "")
|
| 326 |
+
else:
|
| 327 |
+
api_key = kwargs.get("api_key")
|
| 328 |
+
if not kwargs.get("base_url", ""):
|
| 329 |
+
base_url = os.getenv("SiliconFLOW_ENDPOINT", "")
|
| 330 |
+
else:
|
| 331 |
+
base_url = kwargs.get("base_url")
|
| 332 |
+
return ChatOpenAI(
|
| 333 |
+
api_key=api_key,
|
| 334 |
+
base_url=base_url,
|
| 335 |
+
model_name=kwargs.get("model_name", "Qwen/QwQ-32B"),
|
| 336 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 337 |
+
)
|
| 338 |
+
elif provider == "modelscope":
|
| 339 |
+
if not kwargs.get("api_key", ""):
|
| 340 |
+
api_key = os.getenv("MODELSCOPE_API_KEY", "")
|
| 341 |
+
else:
|
| 342 |
+
api_key = kwargs.get("api_key")
|
| 343 |
+
if not kwargs.get("base_url", ""):
|
| 344 |
+
base_url = os.getenv("MODELSCOPE_ENDPOINT", "")
|
| 345 |
+
else:
|
| 346 |
+
base_url = kwargs.get("base_url")
|
| 347 |
+
return ChatOpenAI(
|
| 348 |
+
api_key=api_key,
|
| 349 |
+
base_url=base_url,
|
| 350 |
+
model_name=kwargs.get("model_name", "Qwen/QwQ-32B"),
|
| 351 |
+
temperature=kwargs.get("temperature", 0.0),
|
| 352 |
+
)
|
| 353 |
+
else:
|
| 354 |
+
raise ValueError(f"Unsupported provider: {provider}")
|
src/utils/mcp_client.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import inspect
|
| 2 |
+
import logging
|
| 3 |
+
import uuid
|
| 4 |
+
from datetime import date, datetime, time
|
| 5 |
+
from enum import Enum
|
| 6 |
+
from typing import Any, Dict, List, Optional, Set, Type, Union, get_type_hints
|
| 7 |
+
|
| 8 |
+
from browser_use.controller.registry.views import ActionModel
|
| 9 |
+
from langchain.tools import BaseTool
|
| 10 |
+
from langchain_mcp_adapters.client import MultiServerMCPClient
|
| 11 |
+
from pydantic import BaseModel, Field, create_model
|
| 12 |
+
from pydantic.v1 import BaseModel, Field
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
async def setup_mcp_client_and_tools(mcp_server_config: Dict[str, Any]) -> Optional[MultiServerMCPClient]:
|
| 18 |
+
"""
|
| 19 |
+
Initializes the MultiServerMCPClient, connects to servers, fetches tools,
|
| 20 |
+
filters them, and returns a flat list of usable tools and the client instance.
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
A tuple containing:
|
| 24 |
+
- list[BaseTool]: The filtered list of usable LangChain tools.
|
| 25 |
+
- MultiServerMCPClient | None: The initialized and started client instance, or None on failure.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
logger.info("Initializing MultiServerMCPClient...")
|
| 29 |
+
|
| 30 |
+
if not mcp_server_config:
|
| 31 |
+
logger.error("No MCP server configuration provided.")
|
| 32 |
+
return None
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
if "mcpServers" in mcp_server_config:
|
| 36 |
+
mcp_server_config = mcp_server_config["mcpServers"]
|
| 37 |
+
client = MultiServerMCPClient(mcp_server_config)
|
| 38 |
+
await client.__aenter__()
|
| 39 |
+
return client
|
| 40 |
+
|
| 41 |
+
except Exception as e:
|
| 42 |
+
logger.error(f"Failed to setup MCP client or fetch tools: {e}", exc_info=True)
|
| 43 |
+
return None
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def create_tool_param_model(tool: BaseTool) -> Type[BaseModel]:
|
| 47 |
+
"""Creates a Pydantic model from a LangChain tool's schema"""
|
| 48 |
+
|
| 49 |
+
# Get tool schema information
|
| 50 |
+
json_schema = tool.args_schema
|
| 51 |
+
tool_name = tool.name
|
| 52 |
+
|
| 53 |
+
# If the tool already has a schema defined, convert it to a new param_model
|
| 54 |
+
if json_schema is not None:
|
| 55 |
+
|
| 56 |
+
# Create new parameter model
|
| 57 |
+
params = {}
|
| 58 |
+
|
| 59 |
+
# Process properties if they exist
|
| 60 |
+
if 'properties' in json_schema:
|
| 61 |
+
# Find required fields
|
| 62 |
+
required_fields: Set[str] = set(json_schema.get('required', []))
|
| 63 |
+
|
| 64 |
+
for prop_name, prop_details in json_schema['properties'].items():
|
| 65 |
+
field_type = resolve_type(prop_details, f"{tool_name}_{prop_name}")
|
| 66 |
+
|
| 67 |
+
# Check if parameter is required
|
| 68 |
+
is_required = prop_name in required_fields
|
| 69 |
+
|
| 70 |
+
# Get default value and description
|
| 71 |
+
default_value = prop_details.get('default', ... if is_required else None)
|
| 72 |
+
description = prop_details.get('description', '')
|
| 73 |
+
|
| 74 |
+
# Add field constraints
|
| 75 |
+
field_kwargs = {'default': default_value}
|
| 76 |
+
if description:
|
| 77 |
+
field_kwargs['description'] = description
|
| 78 |
+
|
| 79 |
+
# Add additional constraints if present
|
| 80 |
+
if 'minimum' in prop_details:
|
| 81 |
+
field_kwargs['ge'] = prop_details['minimum']
|
| 82 |
+
if 'maximum' in prop_details:
|
| 83 |
+
field_kwargs['le'] = prop_details['maximum']
|
| 84 |
+
if 'minLength' in prop_details:
|
| 85 |
+
field_kwargs['min_length'] = prop_details['minLength']
|
| 86 |
+
if 'maxLength' in prop_details:
|
| 87 |
+
field_kwargs['max_length'] = prop_details['maxLength']
|
| 88 |
+
if 'pattern' in prop_details:
|
| 89 |
+
field_kwargs['pattern'] = prop_details['pattern']
|
| 90 |
+
|
| 91 |
+
# Add to parameters dictionary
|
| 92 |
+
params[prop_name] = (field_type, Field(**field_kwargs))
|
| 93 |
+
|
| 94 |
+
return create_model(
|
| 95 |
+
f'{tool_name}_parameters',
|
| 96 |
+
__base__=ActionModel,
|
| 97 |
+
**params, # type: ignore
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# If no schema is defined, extract parameters from the _run method
|
| 101 |
+
run_method = tool._run
|
| 102 |
+
sig = inspect.signature(run_method)
|
| 103 |
+
|
| 104 |
+
# Get type hints for better type information
|
| 105 |
+
try:
|
| 106 |
+
type_hints = get_type_hints(run_method)
|
| 107 |
+
except Exception:
|
| 108 |
+
type_hints = {}
|
| 109 |
+
|
| 110 |
+
params = {}
|
| 111 |
+
for name, param in sig.parameters.items():
|
| 112 |
+
# Skip 'self' parameter and any other parameters you want to exclude
|
| 113 |
+
if name == 'self':
|
| 114 |
+
continue
|
| 115 |
+
|
| 116 |
+
# Get annotation from type hints if available, otherwise from signature
|
| 117 |
+
annotation = type_hints.get(name, param.annotation)
|
| 118 |
+
if annotation == inspect.Parameter.empty:
|
| 119 |
+
annotation = Any
|
| 120 |
+
|
| 121 |
+
# Use default value if available, otherwise make it required
|
| 122 |
+
if param.default != param.empty:
|
| 123 |
+
params[name] = (annotation, param.default)
|
| 124 |
+
else:
|
| 125 |
+
params[name] = (annotation, ...)
|
| 126 |
+
|
| 127 |
+
return create_model(
|
| 128 |
+
f'{tool_name}_parameters',
|
| 129 |
+
__base__=ActionModel,
|
| 130 |
+
**params, # type: ignore
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def resolve_type(prop_details: Dict[str, Any], prefix: str = "") -> Any:
|
| 135 |
+
"""Recursively resolves JSON schema type to Python/Pydantic type"""
|
| 136 |
+
|
| 137 |
+
# Handle reference types
|
| 138 |
+
if '$ref' in prop_details:
|
| 139 |
+
# In a real application, reference resolution would be needed
|
| 140 |
+
return Any
|
| 141 |
+
|
| 142 |
+
# Basic type mapping
|
| 143 |
+
type_mapping = {
|
| 144 |
+
'string': str,
|
| 145 |
+
'integer': int,
|
| 146 |
+
'number': float,
|
| 147 |
+
'boolean': bool,
|
| 148 |
+
'array': List,
|
| 149 |
+
'object': Dict,
|
| 150 |
+
'null': type(None),
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
# Handle formatted strings
|
| 154 |
+
if prop_details.get('type') == 'string' and 'format' in prop_details:
|
| 155 |
+
format_mapping = {
|
| 156 |
+
'date-time': datetime,
|
| 157 |
+
'date': date,
|
| 158 |
+
'time': time,
|
| 159 |
+
'email': str,
|
| 160 |
+
'uri': str,
|
| 161 |
+
'url': str,
|
| 162 |
+
'uuid': uuid.UUID,
|
| 163 |
+
'binary': bytes,
|
| 164 |
+
}
|
| 165 |
+
return format_mapping.get(prop_details['format'], str)
|
| 166 |
+
|
| 167 |
+
# Handle enum types
|
| 168 |
+
if 'enum' in prop_details:
|
| 169 |
+
enum_values = prop_details['enum']
|
| 170 |
+
# Create dynamic enum class with safe names
|
| 171 |
+
enum_dict = {}
|
| 172 |
+
for i, v in enumerate(enum_values):
|
| 173 |
+
# Ensure enum names are valid Python identifiers
|
| 174 |
+
if isinstance(v, str):
|
| 175 |
+
key = v.upper().replace(' ', '_').replace('-', '_')
|
| 176 |
+
if not key.isidentifier():
|
| 177 |
+
key = f"VALUE_{i}"
|
| 178 |
+
else:
|
| 179 |
+
key = f"VALUE_{i}"
|
| 180 |
+
enum_dict[key] = v
|
| 181 |
+
|
| 182 |
+
# Only create enum if we have values
|
| 183 |
+
if enum_dict:
|
| 184 |
+
return Enum(f"{prefix}_Enum", enum_dict)
|
| 185 |
+
return str # Fallback
|
| 186 |
+
|
| 187 |
+
# Handle array types
|
| 188 |
+
if prop_details.get('type') == 'array' and 'items' in prop_details:
|
| 189 |
+
item_type = resolve_type(prop_details['items'], f"{prefix}_item")
|
| 190 |
+
return List[item_type] # type: ignore
|
| 191 |
+
|
| 192 |
+
# Handle object types with properties
|
| 193 |
+
if prop_details.get('type') == 'object' and 'properties' in prop_details:
|
| 194 |
+
nested_params = {}
|
| 195 |
+
for nested_name, nested_details in prop_details['properties'].items():
|
| 196 |
+
nested_type = resolve_type(nested_details, f"{prefix}_{nested_name}")
|
| 197 |
+
# Get required field info
|
| 198 |
+
required_fields = prop_details.get('required', [])
|
| 199 |
+
is_required = nested_name in required_fields
|
| 200 |
+
default_value = nested_details.get('default', ... if is_required else None)
|
| 201 |
+
description = nested_details.get('description', '')
|
| 202 |
+
|
| 203 |
+
field_kwargs = {'default': default_value}
|
| 204 |
+
if description:
|
| 205 |
+
field_kwargs['description'] = description
|
| 206 |
+
|
| 207 |
+
nested_params[nested_name] = (nested_type, Field(**field_kwargs))
|
| 208 |
+
|
| 209 |
+
# Create nested model
|
| 210 |
+
nested_model = create_model(f"{prefix}_Model", **nested_params)
|
| 211 |
+
return nested_model
|
| 212 |
+
|
| 213 |
+
# Handle union types (oneOf, anyOf)
|
| 214 |
+
if 'oneOf' in prop_details or 'anyOf' in prop_details:
|
| 215 |
+
union_schema = prop_details.get('oneOf') or prop_details.get('anyOf')
|
| 216 |
+
union_types = []
|
| 217 |
+
for i, t in enumerate(union_schema):
|
| 218 |
+
union_types.append(resolve_type(t, f"{prefix}_{i}"))
|
| 219 |
+
|
| 220 |
+
if union_types:
|
| 221 |
+
return Union.__getitem__(tuple(union_types)) # type: ignore
|
| 222 |
+
return Any
|
| 223 |
+
|
| 224 |
+
# Handle allOf (intersection types)
|
| 225 |
+
if 'allOf' in prop_details:
|
| 226 |
+
nested_params = {}
|
| 227 |
+
for i, schema_part in enumerate(prop_details['allOf']):
|
| 228 |
+
if 'properties' in schema_part:
|
| 229 |
+
for nested_name, nested_details in schema_part['properties'].items():
|
| 230 |
+
nested_type = resolve_type(nested_details, f"{prefix}_allOf_{i}_{nested_name}")
|
| 231 |
+
# Check if required
|
| 232 |
+
required_fields = schema_part.get('required', [])
|
| 233 |
+
is_required = nested_name in required_fields
|
| 234 |
+
nested_params[nested_name] = (nested_type, ... if is_required else None)
|
| 235 |
+
|
| 236 |
+
# Create composite model
|
| 237 |
+
if nested_params:
|
| 238 |
+
composite_model = create_model(f"{prefix}_CompositeModel", **nested_params)
|
| 239 |
+
return composite_model
|
| 240 |
+
return Dict
|
| 241 |
+
|
| 242 |
+
# Default to basic types
|
| 243 |
+
schema_type = prop_details.get('type', 'string')
|
| 244 |
+
if isinstance(schema_type, list):
|
| 245 |
+
# Handle multiple types (e.g., ["string", "null"])
|
| 246 |
+
non_null_types = [t for t in schema_type if t != 'null']
|
| 247 |
+
if non_null_types:
|
| 248 |
+
primary_type = type_mapping.get(non_null_types[0], Any)
|
| 249 |
+
if 'null' in schema_type:
|
| 250 |
+
return Optional[primary_type] # type: ignore
|
| 251 |
+
return primary_type
|
| 252 |
+
return Any
|
| 253 |
+
|
| 254 |
+
return type_mapping.get(schema_type, Any)
|
src/utils/utils.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Dict, Optional
|
| 6 |
+
import requests
|
| 7 |
+
import json
|
| 8 |
+
import gradio as gr
|
| 9 |
+
import uuid
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def encode_image(img_path):
|
| 13 |
+
if not img_path:
|
| 14 |
+
return None
|
| 15 |
+
with open(img_path, "rb") as fin:
|
| 16 |
+
image_data = base64.b64encode(fin.read()).decode("utf-8")
|
| 17 |
+
return image_data
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Dict[str, Optional[str]]:
|
| 21 |
+
"""Get the latest recording and trace files"""
|
| 22 |
+
latest_files: Dict[str, Optional[str]] = {ext: None for ext in file_types}
|
| 23 |
+
|
| 24 |
+
if not os.path.exists(directory):
|
| 25 |
+
os.makedirs(directory, exist_ok=True)
|
| 26 |
+
return latest_files
|
| 27 |
+
|
| 28 |
+
for file_type in file_types:
|
| 29 |
+
try:
|
| 30 |
+
matches = list(Path(directory).rglob(f"*{file_type}"))
|
| 31 |
+
if matches:
|
| 32 |
+
latest = max(matches, key=lambda p: p.stat().st_mtime)
|
| 33 |
+
# Only return files that are complete (not being written)
|
| 34 |
+
if time.time() - latest.stat().st_mtime > 1.0:
|
| 35 |
+
latest_files[file_type] = str(latest)
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"Error getting latest {file_type} file: {e}")
|
| 38 |
+
|
| 39 |
+
return latest_files
|
src/webui/__init__.py
ADDED
|
File without changes
|
src/webui/components/__init__.py
ADDED
|
File without changes
|
src/webui/components/agent_settings_tab.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
import gradio as gr
|
| 5 |
+
from gradio.components import Component
|
| 6 |
+
from typing import Any, Dict, Optional
|
| 7 |
+
from src.webui.webui_manager import WebuiManager
|
| 8 |
+
from src.utils import config
|
| 9 |
+
import logging
|
| 10 |
+
from functools import partial
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def update_model_dropdown(llm_provider):
|
| 16 |
+
"""
|
| 17 |
+
Update the model name dropdown with predefined models for the selected provider.
|
| 18 |
+
"""
|
| 19 |
+
# Use predefined models for the selected provider
|
| 20 |
+
if llm_provider in config.model_names:
|
| 21 |
+
return gr.Dropdown(choices=config.model_names[llm_provider], value=config.model_names[llm_provider][0],
|
| 22 |
+
interactive=True)
|
| 23 |
+
else:
|
| 24 |
+
return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
async def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
|
| 28 |
+
"""
|
| 29 |
+
Update the MCP server.
|
| 30 |
+
"""
|
| 31 |
+
if hasattr(webui_manager, "bu_controller") and webui_manager.bu_controller:
|
| 32 |
+
logger.warning("⚠️ Close controller because mcp file has changed!")
|
| 33 |
+
await webui_manager.bu_controller.close_mcp_client()
|
| 34 |
+
webui_manager.bu_controller = None
|
| 35 |
+
|
| 36 |
+
if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
|
| 37 |
+
logger.warning(f"{mcp_file} is not a valid MCP file.")
|
| 38 |
+
return None, gr.update(visible=False)
|
| 39 |
+
|
| 40 |
+
with open(mcp_file, 'r') as f:
|
| 41 |
+
mcp_server = json.load(f)
|
| 42 |
+
|
| 43 |
+
return json.dumps(mcp_server, indent=2), gr.update(visible=True)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def create_agent_settings_tab(webui_manager: WebuiManager):
|
| 47 |
+
"""
|
| 48 |
+
Creates an agent settings tab.
|
| 49 |
+
"""
|
| 50 |
+
input_components = set(webui_manager.get_components())
|
| 51 |
+
tab_components = {}
|
| 52 |
+
|
| 53 |
+
with gr.Group():
|
| 54 |
+
with gr.Column():
|
| 55 |
+
override_system_prompt = gr.Textbox(label="Override system prompt", lines=4, interactive=True)
|
| 56 |
+
extend_system_prompt = gr.Textbox(label="Extend system prompt", lines=4, interactive=True)
|
| 57 |
+
|
| 58 |
+
with gr.Group():
|
| 59 |
+
mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
|
| 60 |
+
mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
|
| 61 |
+
|
| 62 |
+
with gr.Group():
|
| 63 |
+
with gr.Row():
|
| 64 |
+
llm_provider = gr.Dropdown(
|
| 65 |
+
choices=[provider for provider, model in config.model_names.items()],
|
| 66 |
+
label="LLM Provider",
|
| 67 |
+
value=os.getenv("DEFAULT_LLM", "openai"),
|
| 68 |
+
info="Select LLM provider for LLM",
|
| 69 |
+
interactive=True
|
| 70 |
+
)
|
| 71 |
+
llm_model_name = gr.Dropdown(
|
| 72 |
+
label="LLM Model Name",
|
| 73 |
+
choices=config.model_names[os.getenv("DEFAULT_LLM", "openai")],
|
| 74 |
+
value=config.model_names[os.getenv("DEFAULT_LLM", "openai")][0],
|
| 75 |
+
interactive=True,
|
| 76 |
+
allow_custom_value=True,
|
| 77 |
+
info="Select a model in the dropdown options or directly type a custom model name"
|
| 78 |
+
)
|
| 79 |
+
with gr.Row():
|
| 80 |
+
llm_temperature = gr.Slider(
|
| 81 |
+
minimum=0.0,
|
| 82 |
+
maximum=2.0,
|
| 83 |
+
value=0.6,
|
| 84 |
+
step=0.1,
|
| 85 |
+
label="LLM Temperature",
|
| 86 |
+
info="Controls randomness in model outputs",
|
| 87 |
+
interactive=True
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
use_vision = gr.Checkbox(
|
| 91 |
+
label="Use Vision",
|
| 92 |
+
value=True,
|
| 93 |
+
info="Enable Vision(Input highlighted screenshot into LLM)",
|
| 94 |
+
interactive=True
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
ollama_num_ctx = gr.Slider(
|
| 98 |
+
minimum=2 ** 8,
|
| 99 |
+
maximum=2 ** 16,
|
| 100 |
+
value=16000,
|
| 101 |
+
step=1,
|
| 102 |
+
label="Ollama Context Length",
|
| 103 |
+
info="Controls max context length model needs to handle (less = faster)",
|
| 104 |
+
visible=False,
|
| 105 |
+
interactive=True
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
with gr.Row():
|
| 109 |
+
llm_base_url = gr.Textbox(
|
| 110 |
+
label="Base URL",
|
| 111 |
+
value="",
|
| 112 |
+
info="API endpoint URL (if required)"
|
| 113 |
+
)
|
| 114 |
+
llm_api_key = gr.Textbox(
|
| 115 |
+
label="API Key",
|
| 116 |
+
type="password",
|
| 117 |
+
value="",
|
| 118 |
+
info="Your API key (leave blank to use .env)"
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
with gr.Group():
|
| 122 |
+
with gr.Row():
|
| 123 |
+
planner_llm_provider = gr.Dropdown(
|
| 124 |
+
choices=[provider for provider, model in config.model_names.items()],
|
| 125 |
+
label="Planner LLM Provider",
|
| 126 |
+
info="Select LLM provider for LLM",
|
| 127 |
+
value=None,
|
| 128 |
+
interactive=True
|
| 129 |
+
)
|
| 130 |
+
planner_llm_model_name = gr.Dropdown(
|
| 131 |
+
label="Planner LLM Model Name",
|
| 132 |
+
interactive=True,
|
| 133 |
+
allow_custom_value=True,
|
| 134 |
+
info="Select a model in the dropdown options or directly type a custom model name"
|
| 135 |
+
)
|
| 136 |
+
with gr.Row():
|
| 137 |
+
planner_llm_temperature = gr.Slider(
|
| 138 |
+
minimum=0.0,
|
| 139 |
+
maximum=2.0,
|
| 140 |
+
value=0.6,
|
| 141 |
+
step=0.1,
|
| 142 |
+
label="Planner LLM Temperature",
|
| 143 |
+
info="Controls randomness in model outputs",
|
| 144 |
+
interactive=True
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
planner_use_vision = gr.Checkbox(
|
| 148 |
+
label="Use Vision(Planner LLM)",
|
| 149 |
+
value=False,
|
| 150 |
+
info="Enable Vision(Input highlighted screenshot into LLM)",
|
| 151 |
+
interactive=True
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
planner_ollama_num_ctx = gr.Slider(
|
| 155 |
+
minimum=2 ** 8,
|
| 156 |
+
maximum=2 ** 16,
|
| 157 |
+
value=16000,
|
| 158 |
+
step=1,
|
| 159 |
+
label="Ollama Context Length",
|
| 160 |
+
info="Controls max context length model needs to handle (less = faster)",
|
| 161 |
+
visible=False,
|
| 162 |
+
interactive=True
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
with gr.Row():
|
| 166 |
+
planner_llm_base_url = gr.Textbox(
|
| 167 |
+
label="Base URL",
|
| 168 |
+
value="",
|
| 169 |
+
info="API endpoint URL (if required)"
|
| 170 |
+
)
|
| 171 |
+
planner_llm_api_key = gr.Textbox(
|
| 172 |
+
label="API Key",
|
| 173 |
+
type="password",
|
| 174 |
+
value="",
|
| 175 |
+
info="Your API key (leave blank to use .env)"
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
with gr.Row():
|
| 179 |
+
max_steps = gr.Slider(
|
| 180 |
+
minimum=1,
|
| 181 |
+
maximum=1000,
|
| 182 |
+
value=100,
|
| 183 |
+
step=1,
|
| 184 |
+
label="Max Run Steps",
|
| 185 |
+
info="Maximum number of steps the agent will take",
|
| 186 |
+
interactive=True
|
| 187 |
+
)
|
| 188 |
+
max_actions = gr.Slider(
|
| 189 |
+
minimum=1,
|
| 190 |
+
maximum=100,
|
| 191 |
+
value=10,
|
| 192 |
+
step=1,
|
| 193 |
+
label="Max Number of Actions",
|
| 194 |
+
info="Maximum number of actions the agent will take per step",
|
| 195 |
+
interactive=True
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
with gr.Row():
|
| 199 |
+
max_input_tokens = gr.Number(
|
| 200 |
+
label="Max Input Tokens",
|
| 201 |
+
value=128000,
|
| 202 |
+
precision=0,
|
| 203 |
+
interactive=True
|
| 204 |
+
)
|
| 205 |
+
tool_calling_method = gr.Dropdown(
|
| 206 |
+
label="Tool Calling Method",
|
| 207 |
+
value="auto",
|
| 208 |
+
interactive=True,
|
| 209 |
+
allow_custom_value=True,
|
| 210 |
+
choices=['function_calling', 'json_mode', 'raw', 'auto', 'tools', "None"],
|
| 211 |
+
visible=True
|
| 212 |
+
)
|
| 213 |
+
tab_components.update(dict(
|
| 214 |
+
override_system_prompt=override_system_prompt,
|
| 215 |
+
extend_system_prompt=extend_system_prompt,
|
| 216 |
+
llm_provider=llm_provider,
|
| 217 |
+
llm_model_name=llm_model_name,
|
| 218 |
+
llm_temperature=llm_temperature,
|
| 219 |
+
use_vision=use_vision,
|
| 220 |
+
ollama_num_ctx=ollama_num_ctx,
|
| 221 |
+
llm_base_url=llm_base_url,
|
| 222 |
+
llm_api_key=llm_api_key,
|
| 223 |
+
planner_llm_provider=planner_llm_provider,
|
| 224 |
+
planner_llm_model_name=planner_llm_model_name,
|
| 225 |
+
planner_llm_temperature=planner_llm_temperature,
|
| 226 |
+
planner_use_vision=planner_use_vision,
|
| 227 |
+
planner_ollama_num_ctx=planner_ollama_num_ctx,
|
| 228 |
+
planner_llm_base_url=planner_llm_base_url,
|
| 229 |
+
planner_llm_api_key=planner_llm_api_key,
|
| 230 |
+
max_steps=max_steps,
|
| 231 |
+
max_actions=max_actions,
|
| 232 |
+
max_input_tokens=max_input_tokens,
|
| 233 |
+
tool_calling_method=tool_calling_method,
|
| 234 |
+
mcp_json_file=mcp_json_file,
|
| 235 |
+
mcp_server_config=mcp_server_config,
|
| 236 |
+
))
|
| 237 |
+
webui_manager.add_components("agent_settings", tab_components)
|
| 238 |
+
|
| 239 |
+
llm_provider.change(
|
| 240 |
+
fn=lambda x: gr.update(visible=x == "ollama"),
|
| 241 |
+
inputs=llm_provider,
|
| 242 |
+
outputs=ollama_num_ctx
|
| 243 |
+
)
|
| 244 |
+
llm_provider.change(
|
| 245 |
+
lambda provider: update_model_dropdown(provider),
|
| 246 |
+
inputs=[llm_provider],
|
| 247 |
+
outputs=[llm_model_name]
|
| 248 |
+
)
|
| 249 |
+
planner_llm_provider.change(
|
| 250 |
+
fn=lambda x: gr.update(visible=x == "ollama"),
|
| 251 |
+
inputs=[planner_llm_provider],
|
| 252 |
+
outputs=[planner_ollama_num_ctx]
|
| 253 |
+
)
|
| 254 |
+
planner_llm_provider.change(
|
| 255 |
+
lambda provider: update_model_dropdown(provider),
|
| 256 |
+
inputs=[planner_llm_provider],
|
| 257 |
+
outputs=[planner_llm_model_name]
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
async def update_wrapper(mcp_file):
|
| 261 |
+
"""Wrapper for handle_pause_resume."""
|
| 262 |
+
update_dict = await update_mcp_server(mcp_file, webui_manager)
|
| 263 |
+
yield update_dict
|
| 264 |
+
|
| 265 |
+
mcp_json_file.change(
|
| 266 |
+
update_wrapper,
|
| 267 |
+
inputs=[mcp_json_file],
|
| 268 |
+
outputs=[mcp_server_config, mcp_server_config]
|
| 269 |
+
)
|
src/webui/components/browser_settings_tab.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from distutils.util import strtobool
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import logging
|
| 5 |
+
from gradio.components import Component
|
| 6 |
+
|
| 7 |
+
from src.webui.webui_manager import WebuiManager
|
| 8 |
+
from src.utils import config
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
async def close_browser(webui_manager: WebuiManager):
|
| 13 |
+
"""
|
| 14 |
+
Close browser
|
| 15 |
+
"""
|
| 16 |
+
if webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
|
| 17 |
+
webui_manager.bu_current_task.cancel()
|
| 18 |
+
webui_manager.bu_current_task = None
|
| 19 |
+
|
| 20 |
+
if webui_manager.bu_browser_context:
|
| 21 |
+
logger.info("⚠️ Closing browser context when changing browser config.")
|
| 22 |
+
await webui_manager.bu_browser_context.close()
|
| 23 |
+
webui_manager.bu_browser_context = None
|
| 24 |
+
|
| 25 |
+
if webui_manager.bu_browser:
|
| 26 |
+
logger.info("⚠️ Closing browser when changing browser config.")
|
| 27 |
+
await webui_manager.bu_browser.close()
|
| 28 |
+
webui_manager.bu_browser = None
|
| 29 |
+
|
| 30 |
+
def create_browser_settings_tab(webui_manager: WebuiManager):
|
| 31 |
+
"""
|
| 32 |
+
Creates a browser settings tab.
|
| 33 |
+
"""
|
| 34 |
+
input_components = set(webui_manager.get_components())
|
| 35 |
+
tab_components = {}
|
| 36 |
+
|
| 37 |
+
with gr.Group():
|
| 38 |
+
with gr.Row():
|
| 39 |
+
browser_binary_path = gr.Textbox(
|
| 40 |
+
label="Browser Binary Path",
|
| 41 |
+
lines=1,
|
| 42 |
+
interactive=True,
|
| 43 |
+
placeholder="e.g. '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome'"
|
| 44 |
+
)
|
| 45 |
+
browser_user_data_dir = gr.Textbox(
|
| 46 |
+
label="Browser User Data Dir",
|
| 47 |
+
lines=1,
|
| 48 |
+
interactive=True,
|
| 49 |
+
placeholder="Leave it empty if you use your default user data",
|
| 50 |
+
)
|
| 51 |
+
with gr.Group():
|
| 52 |
+
with gr.Row():
|
| 53 |
+
use_own_browser = gr.Checkbox(
|
| 54 |
+
label="Use Own Browser",
|
| 55 |
+
value=bool(strtobool(os.getenv("USE_OWN_BROWSER", "false"))),
|
| 56 |
+
info="Use your existing browser instance",
|
| 57 |
+
interactive=True
|
| 58 |
+
)
|
| 59 |
+
keep_browser_open = gr.Checkbox(
|
| 60 |
+
label="Keep Browser Open",
|
| 61 |
+
value=bool(strtobool(os.getenv("KEEP_BROWSER_OPEN", "true"))),
|
| 62 |
+
info="Keep Browser Open between Tasks",
|
| 63 |
+
interactive=True
|
| 64 |
+
)
|
| 65 |
+
headless = gr.Checkbox(
|
| 66 |
+
label="Headless Mode",
|
| 67 |
+
value=False,
|
| 68 |
+
info="Run browser without GUI",
|
| 69 |
+
interactive=True
|
| 70 |
+
)
|
| 71 |
+
disable_security = gr.Checkbox(
|
| 72 |
+
label="Disable Security",
|
| 73 |
+
value=False,
|
| 74 |
+
info="Disable browser security",
|
| 75 |
+
interactive=True
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
with gr.Group():
|
| 79 |
+
with gr.Row():
|
| 80 |
+
window_w = gr.Number(
|
| 81 |
+
label="Window Width",
|
| 82 |
+
value=1280,
|
| 83 |
+
info="Browser window width",
|
| 84 |
+
interactive=True
|
| 85 |
+
)
|
| 86 |
+
window_h = gr.Number(
|
| 87 |
+
label="Window Height",
|
| 88 |
+
value=1100,
|
| 89 |
+
info="Browser window height",
|
| 90 |
+
interactive=True
|
| 91 |
+
)
|
| 92 |
+
with gr.Group():
|
| 93 |
+
with gr.Row():
|
| 94 |
+
cdp_url = gr.Textbox(
|
| 95 |
+
label="CDP URL",
|
| 96 |
+
value=os.getenv("BROWSER_CDP", None),
|
| 97 |
+
info="CDP URL for browser remote debugging",
|
| 98 |
+
interactive=True,
|
| 99 |
+
)
|
| 100 |
+
wss_url = gr.Textbox(
|
| 101 |
+
label="WSS URL",
|
| 102 |
+
info="WSS URL for browser remote debugging",
|
| 103 |
+
interactive=True,
|
| 104 |
+
)
|
| 105 |
+
with gr.Group():
|
| 106 |
+
with gr.Row():
|
| 107 |
+
save_recording_path = gr.Textbox(
|
| 108 |
+
label="Recording Path",
|
| 109 |
+
placeholder="e.g. ./tmp/record_videos",
|
| 110 |
+
info="Path to save browser recordings",
|
| 111 |
+
interactive=True,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
save_trace_path = gr.Textbox(
|
| 115 |
+
label="Trace Path",
|
| 116 |
+
placeholder="e.g. ./tmp/traces",
|
| 117 |
+
info="Path to save Agent traces",
|
| 118 |
+
interactive=True,
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
with gr.Row():
|
| 122 |
+
save_agent_history_path = gr.Textbox(
|
| 123 |
+
label="Agent History Save Path",
|
| 124 |
+
value="./tmp/agent_history",
|
| 125 |
+
info="Specify the directory where agent history should be saved.",
|
| 126 |
+
interactive=True,
|
| 127 |
+
)
|
| 128 |
+
save_download_path = gr.Textbox(
|
| 129 |
+
label="Save Directory for browser downloads",
|
| 130 |
+
value="./tmp/downloads",
|
| 131 |
+
info="Specify the directory where downloaded files should be saved.",
|
| 132 |
+
interactive=True,
|
| 133 |
+
)
|
| 134 |
+
tab_components.update(
|
| 135 |
+
dict(
|
| 136 |
+
browser_binary_path=browser_binary_path,
|
| 137 |
+
browser_user_data_dir=browser_user_data_dir,
|
| 138 |
+
use_own_browser=use_own_browser,
|
| 139 |
+
keep_browser_open=keep_browser_open,
|
| 140 |
+
headless=headless,
|
| 141 |
+
disable_security=disable_security,
|
| 142 |
+
save_recording_path=save_recording_path,
|
| 143 |
+
save_trace_path=save_trace_path,
|
| 144 |
+
save_agent_history_path=save_agent_history_path,
|
| 145 |
+
save_download_path=save_download_path,
|
| 146 |
+
cdp_url=cdp_url,
|
| 147 |
+
wss_url=wss_url,
|
| 148 |
+
window_h=window_h,
|
| 149 |
+
window_w=window_w,
|
| 150 |
+
)
|
| 151 |
+
)
|
| 152 |
+
webui_manager.add_components("browser_settings", tab_components)
|
| 153 |
+
|
| 154 |
+
async def close_wrapper():
|
| 155 |
+
"""Wrapper for handle_clear."""
|
| 156 |
+
await close_browser(webui_manager)
|
| 157 |
+
|
| 158 |
+
headless.change(close_wrapper)
|
| 159 |
+
keep_browser_open.change(close_wrapper)
|
| 160 |
+
disable_security.change(close_wrapper)
|
| 161 |
+
use_own_browser.change(close_wrapper)
|
src/webui/components/browser_use_agent_tab.py
ADDED
|
@@ -0,0 +1,1083 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
import uuid
|
| 6 |
+
from typing import Any, AsyncGenerator, Dict, Optional
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
|
| 10 |
+
# from browser_use.agent.service import Agent
|
| 11 |
+
from browser_use.agent.views import (
|
| 12 |
+
AgentHistoryList,
|
| 13 |
+
AgentOutput,
|
| 14 |
+
)
|
| 15 |
+
from browser_use.browser.browser import BrowserConfig
|
| 16 |
+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
| 17 |
+
from browser_use.browser.views import BrowserState
|
| 18 |
+
from gradio.components import Component
|
| 19 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
| 20 |
+
|
| 21 |
+
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
| 22 |
+
from src.browser.custom_browser import CustomBrowser
|
| 23 |
+
from src.controller.custom_controller import CustomController
|
| 24 |
+
from src.utils import llm_provider
|
| 25 |
+
from src.webui.webui_manager import WebuiManager
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# --- Helper Functions --- (Defined at module level)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
async def _initialize_llm(
|
| 34 |
+
provider: Optional[str],
|
| 35 |
+
model_name: Optional[str],
|
| 36 |
+
temperature: float,
|
| 37 |
+
base_url: Optional[str],
|
| 38 |
+
api_key: Optional[str],
|
| 39 |
+
num_ctx: Optional[int] = None,
|
| 40 |
+
) -> Optional[BaseChatModel]:
|
| 41 |
+
"""Initializes the LLM based on settings. Returns None if provider/model is missing."""
|
| 42 |
+
if not provider or not model_name:
|
| 43 |
+
logger.info("LLM Provider or Model Name not specified, LLM will be None.")
|
| 44 |
+
return None
|
| 45 |
+
try:
|
| 46 |
+
# Use your actual LLM provider logic here
|
| 47 |
+
logger.info(
|
| 48 |
+
f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}"
|
| 49 |
+
)
|
| 50 |
+
# Example using a placeholder function
|
| 51 |
+
llm = llm_provider.get_llm_model(
|
| 52 |
+
provider=provider,
|
| 53 |
+
model_name=model_name,
|
| 54 |
+
temperature=temperature,
|
| 55 |
+
base_url=base_url or None,
|
| 56 |
+
api_key=api_key or None,
|
| 57 |
+
# Add other relevant params like num_ctx for ollama
|
| 58 |
+
num_ctx=num_ctx if provider == "ollama" else None,
|
| 59 |
+
)
|
| 60 |
+
return llm
|
| 61 |
+
except Exception as e:
|
| 62 |
+
logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
|
| 63 |
+
gr.Warning(
|
| 64 |
+
f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}"
|
| 65 |
+
)
|
| 66 |
+
return None
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _get_config_value(
|
| 70 |
+
webui_manager: WebuiManager,
|
| 71 |
+
comp_dict: Dict[gr.components.Component, Any],
|
| 72 |
+
comp_id_suffix: str,
|
| 73 |
+
default: Any = None,
|
| 74 |
+
) -> Any:
|
| 75 |
+
"""Safely get value from component dictionary using its ID suffix relative to the tab."""
|
| 76 |
+
# Assumes component ID format is "tab_name.comp_name"
|
| 77 |
+
tab_name = "browser_use_agent" # Hardcode or derive if needed
|
| 78 |
+
comp_id = f"{tab_name}.{comp_id_suffix}"
|
| 79 |
+
# Need to find the component object first using the ID from the manager
|
| 80 |
+
try:
|
| 81 |
+
comp = webui_manager.get_component_by_id(comp_id)
|
| 82 |
+
return comp_dict.get(comp, default)
|
| 83 |
+
except KeyError:
|
| 84 |
+
# Try accessing settings tabs as well
|
| 85 |
+
for prefix in ["agent_settings", "browser_settings"]:
|
| 86 |
+
try:
|
| 87 |
+
comp_id = f"{prefix}.{comp_id_suffix}"
|
| 88 |
+
comp = webui_manager.get_component_by_id(comp_id)
|
| 89 |
+
return comp_dict.get(comp, default)
|
| 90 |
+
except KeyError:
|
| 91 |
+
continue
|
| 92 |
+
logger.warning(
|
| 93 |
+
f"Component with suffix '{comp_id_suffix}' not found in manager for value lookup."
|
| 94 |
+
)
|
| 95 |
+
return default
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _format_agent_output(model_output: AgentOutput) -> str:
|
| 99 |
+
"""Formats AgentOutput for display in the chatbot using JSON."""
|
| 100 |
+
content = ""
|
| 101 |
+
if model_output:
|
| 102 |
+
try:
|
| 103 |
+
# Directly use model_dump if actions and current_state are Pydantic models
|
| 104 |
+
action_dump = [
|
| 105 |
+
action.model_dump(exclude_none=True) for action in model_output.action
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
state_dump = model_output.current_state.model_dump(exclude_none=True)
|
| 109 |
+
model_output_dump = {
|
| 110 |
+
"current_state": state_dump,
|
| 111 |
+
"action": action_dump,
|
| 112 |
+
}
|
| 113 |
+
# Dump to JSON string with indentation
|
| 114 |
+
json_string = json.dumps(model_output_dump, indent=4, ensure_ascii=False)
|
| 115 |
+
# Wrap in <pre><code> for proper display in HTML
|
| 116 |
+
content = f"<pre><code class='language-json'>{json_string}</code></pre>"
|
| 117 |
+
|
| 118 |
+
except AttributeError as ae:
|
| 119 |
+
logger.error(
|
| 120 |
+
f"AttributeError during model dump: {ae}. Check if 'action' or 'current_state' or their items support 'model_dump'."
|
| 121 |
+
)
|
| 122 |
+
content = f"<pre><code>Error: Could not format agent output (AttributeError: {ae}).\nRaw output: {str(model_output)}</code></pre>"
|
| 123 |
+
except Exception as e:
|
| 124 |
+
logger.error(f"Error formatting agent output: {e}", exc_info=True)
|
| 125 |
+
# Fallback to simple string representation on error
|
| 126 |
+
content = f"<pre><code>Error formatting agent output.\nRaw output:\n{str(model_output)}</code></pre>"
|
| 127 |
+
|
| 128 |
+
return content.strip()
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# --- Updated Callback Implementation ---
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
async def _handle_new_step(
|
| 135 |
+
webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
|
| 136 |
+
):
|
| 137 |
+
"""Callback for each step taken by the agent, including screenshot display."""
|
| 138 |
+
|
| 139 |
+
# Use the correct chat history attribute name from the user's code
|
| 140 |
+
if not hasattr(webui_manager, "bu_chat_history"):
|
| 141 |
+
logger.error(
|
| 142 |
+
"Attribute 'bu_chat_history' not found in webui_manager! Cannot add chat message."
|
| 143 |
+
)
|
| 144 |
+
# Initialize it maybe? Or raise an error? For now, log and potentially skip chat update.
|
| 145 |
+
webui_manager.bu_chat_history = [] # Initialize if missing (consider if this is the right place)
|
| 146 |
+
# return # Or stop if this is critical
|
| 147 |
+
step_num -= 1
|
| 148 |
+
logger.info(f"Step {step_num} completed.")
|
| 149 |
+
|
| 150 |
+
# --- Screenshot Handling ---
|
| 151 |
+
screenshot_html = ""
|
| 152 |
+
# Ensure state.screenshot exists and is not empty before proceeding
|
| 153 |
+
# Use getattr for safer access
|
| 154 |
+
screenshot_data = getattr(state, "screenshot", None)
|
| 155 |
+
if screenshot_data:
|
| 156 |
+
try:
|
| 157 |
+
# Basic validation: check if it looks like base64
|
| 158 |
+
if (
|
| 159 |
+
isinstance(screenshot_data, str) and len(screenshot_data) > 100
|
| 160 |
+
): # Arbitrary length check
|
| 161 |
+
# *** UPDATED STYLE: Removed centering, adjusted width ***
|
| 162 |
+
img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 800px; max-height: 600px; object-fit:contain;" />'
|
| 163 |
+
screenshot_html = (
|
| 164 |
+
img_tag + "<br/>"
|
| 165 |
+
) # Use <br/> for line break after inline-block image
|
| 166 |
+
else:
|
| 167 |
+
logger.warning(
|
| 168 |
+
f"Screenshot for step {step_num} seems invalid (type: {type(screenshot_data)}, len: {len(screenshot_data) if isinstance(screenshot_data, str) else 'N/A'})."
|
| 169 |
+
)
|
| 170 |
+
screenshot_html = "**[Invalid screenshot data]**<br/>"
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
logger.error(
|
| 174 |
+
f"Error processing or formatting screenshot for step {step_num}: {e}",
|
| 175 |
+
exc_info=True,
|
| 176 |
+
)
|
| 177 |
+
screenshot_html = "**[Error displaying screenshot]**<br/>"
|
| 178 |
+
else:
|
| 179 |
+
logger.debug(f"No screenshot available for step {step_num}.")
|
| 180 |
+
|
| 181 |
+
# --- Format Agent Output ---
|
| 182 |
+
formatted_output = _format_agent_output(output) # Use the updated function
|
| 183 |
+
|
| 184 |
+
# --- Combine and Append to Chat ---
|
| 185 |
+
step_header = f"--- **Step {step_num}** ---"
|
| 186 |
+
# Combine header, image (with line break), and JSON block
|
| 187 |
+
final_content = step_header + "<br/>" + screenshot_html + formatted_output
|
| 188 |
+
|
| 189 |
+
chat_message = {
|
| 190 |
+
"role": "assistant",
|
| 191 |
+
"content": final_content.strip(), # Remove leading/trailing whitespace
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
# Append to the correct chat history list
|
| 195 |
+
webui_manager.bu_chat_history.append(chat_message)
|
| 196 |
+
|
| 197 |
+
await asyncio.sleep(0.05)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):
|
| 201 |
+
"""Callback when the agent finishes the task (success or failure)."""
|
| 202 |
+
logger.info(
|
| 203 |
+
f"Agent task finished. Duration: {history.total_duration_seconds():.2f}s, Tokens: {history.total_input_tokens()}"
|
| 204 |
+
)
|
| 205 |
+
final_summary = "**Task Completed**\n"
|
| 206 |
+
final_summary += f"- Duration: {history.total_duration_seconds():.2f} seconds\n"
|
| 207 |
+
final_summary += f"- Total Input Tokens: {history.total_input_tokens()}\n" # Or total tokens if available
|
| 208 |
+
|
| 209 |
+
final_result = history.final_result()
|
| 210 |
+
if final_result:
|
| 211 |
+
final_summary += f"- Final Result: {final_result}\n"
|
| 212 |
+
|
| 213 |
+
errors = history.errors()
|
| 214 |
+
if errors and any(errors):
|
| 215 |
+
final_summary += f"- **Errors:**\n```\n{errors}\n```\n"
|
| 216 |
+
else:
|
| 217 |
+
final_summary += "- Status: Success\n"
|
| 218 |
+
|
| 219 |
+
webui_manager.bu_chat_history.append(
|
| 220 |
+
{"role": "assistant", "content": final_summary}
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
async def _ask_assistant_callback(
|
| 225 |
+
webui_manager: WebuiManager, query: str, browser_context: BrowserContext
|
| 226 |
+
) -> Dict[str, Any]:
|
| 227 |
+
"""Callback triggered by the agent's ask_for_assistant action."""
|
| 228 |
+
logger.info("Agent requires assistance. Waiting for user input.")
|
| 229 |
+
|
| 230 |
+
if not hasattr(webui_manager, "_chat_history"):
|
| 231 |
+
logger.error("Chat history not found in webui_manager during ask_assistant!")
|
| 232 |
+
return {"response": "Internal Error: Cannot display help request."}
|
| 233 |
+
|
| 234 |
+
webui_manager.bu_chat_history.append(
|
| 235 |
+
{
|
| 236 |
+
"role": "assistant",
|
| 237 |
+
"content": f"**Need Help:** {query}\nPlease provide information or perform the required action in the browser, then type your response/confirmation below and click 'Submit Response'.",
|
| 238 |
+
}
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
# Use state stored in webui_manager
|
| 242 |
+
webui_manager.bu_response_event = asyncio.Event()
|
| 243 |
+
webui_manager.bu_user_help_response = None # Reset previous response
|
| 244 |
+
|
| 245 |
+
try:
|
| 246 |
+
logger.info("Waiting for user response event...")
|
| 247 |
+
await asyncio.wait_for(
|
| 248 |
+
webui_manager.bu_response_event.wait(), timeout=3600.0
|
| 249 |
+
) # Long timeout
|
| 250 |
+
logger.info("User response event received.")
|
| 251 |
+
except asyncio.TimeoutError:
|
| 252 |
+
logger.warning("Timeout waiting for user assistance.")
|
| 253 |
+
webui_manager.bu_chat_history.append(
|
| 254 |
+
{
|
| 255 |
+
"role": "assistant",
|
| 256 |
+
"content": "**Timeout:** No response received. Trying to proceed.",
|
| 257 |
+
}
|
| 258 |
+
)
|
| 259 |
+
webui_manager.bu_response_event = None # Clear the event
|
| 260 |
+
return {"response": "Timeout: User did not respond."} # Inform the agent
|
| 261 |
+
|
| 262 |
+
response = webui_manager.bu_user_help_response
|
| 263 |
+
webui_manager.bu_chat_history.append(
|
| 264 |
+
{"role": "user", "content": response}
|
| 265 |
+
) # Show user response in chat
|
| 266 |
+
webui_manager.bu_response_event = (
|
| 267 |
+
None # Clear the event for the next potential request
|
| 268 |
+
)
|
| 269 |
+
return {"response": response}
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
# --- Core Agent Execution Logic --- (Needs access to webui_manager)
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
async def run_agent_task(
|
| 276 |
+
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
| 277 |
+
) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
|
| 278 |
+
"""Handles the entire lifecycle of initializing and running the agent."""
|
| 279 |
+
|
| 280 |
+
# --- Get Components ---
|
| 281 |
+
# Need handles to specific UI components to update them
|
| 282 |
+
user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
|
| 283 |
+
run_button_comp = webui_manager.get_component_by_id("browser_use_agent.run_button")
|
| 284 |
+
stop_button_comp = webui_manager.get_component_by_id(
|
| 285 |
+
"browser_use_agent.stop_button"
|
| 286 |
+
)
|
| 287 |
+
pause_resume_button_comp = webui_manager.get_component_by_id(
|
| 288 |
+
"browser_use_agent.pause_resume_button"
|
| 289 |
+
)
|
| 290 |
+
clear_button_comp = webui_manager.get_component_by_id(
|
| 291 |
+
"browser_use_agent.clear_button"
|
| 292 |
+
)
|
| 293 |
+
chatbot_comp = webui_manager.get_component_by_id("browser_use_agent.chatbot")
|
| 294 |
+
history_file_comp = webui_manager.get_component_by_id(
|
| 295 |
+
"browser_use_agent.agent_history_file"
|
| 296 |
+
)
|
| 297 |
+
gif_comp = webui_manager.get_component_by_id("browser_use_agent.recording_gif")
|
| 298 |
+
browser_view_comp = webui_manager.get_component_by_id(
|
| 299 |
+
"browser_use_agent.browser_view"
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
# --- 1. Get Task and Initial UI Update ---
|
| 303 |
+
task = components.get(user_input_comp, "").strip()
|
| 304 |
+
if not task:
|
| 305 |
+
gr.Warning("Please enter a task.")
|
| 306 |
+
yield {run_button_comp: gr.update(interactive=True)}
|
| 307 |
+
return
|
| 308 |
+
|
| 309 |
+
# Set running state indirectly via _current_task
|
| 310 |
+
webui_manager.bu_chat_history.append({"role": "user", "content": task})
|
| 311 |
+
|
| 312 |
+
yield {
|
| 313 |
+
user_input_comp: gr.Textbox(
|
| 314 |
+
value="", interactive=False, placeholder="Agent is running..."
|
| 315 |
+
),
|
| 316 |
+
run_button_comp: gr.Button(value="⏳ Running...", interactive=False),
|
| 317 |
+
stop_button_comp: gr.Button(interactive=True),
|
| 318 |
+
pause_resume_button_comp: gr.Button(value="⏸️ Pause", interactive=True),
|
| 319 |
+
clear_button_comp: gr.Button(interactive=False),
|
| 320 |
+
chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
|
| 321 |
+
history_file_comp: gr.update(value=None),
|
| 322 |
+
gif_comp: gr.update(value=None),
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
# --- Agent Settings ---
|
| 326 |
+
# Access settings values via components dict, getting IDs from webui_manager
|
| 327 |
+
def get_setting(key, default=None):
|
| 328 |
+
comp = webui_manager.id_to_component.get(f"agent_settings.{key}")
|
| 329 |
+
return components.get(comp, default) if comp else default
|
| 330 |
+
|
| 331 |
+
override_system_prompt = get_setting("override_system_prompt") or None
|
| 332 |
+
extend_system_prompt = get_setting("extend_system_prompt") or None
|
| 333 |
+
llm_provider_name = get_setting(
|
| 334 |
+
"llm_provider", None
|
| 335 |
+
) # Default to None if not found
|
| 336 |
+
llm_model_name = get_setting("llm_model_name", None)
|
| 337 |
+
llm_temperature = get_setting("llm_temperature", 0.6)
|
| 338 |
+
use_vision = get_setting("use_vision", True)
|
| 339 |
+
ollama_num_ctx = get_setting("ollama_num_ctx", 16000)
|
| 340 |
+
llm_base_url = get_setting("llm_base_url") or None
|
| 341 |
+
llm_api_key = get_setting("llm_api_key") or None
|
| 342 |
+
max_steps = get_setting("max_steps", 100)
|
| 343 |
+
max_actions = get_setting("max_actions", 10)
|
| 344 |
+
max_input_tokens = get_setting("max_input_tokens", 128000)
|
| 345 |
+
tool_calling_str = get_setting("tool_calling_method", "auto")
|
| 346 |
+
tool_calling_method = tool_calling_str if tool_calling_str != "None" else None
|
| 347 |
+
mcp_server_config_comp = webui_manager.id_to_component.get(
|
| 348 |
+
"agent_settings.mcp_server_config"
|
| 349 |
+
)
|
| 350 |
+
mcp_server_config_str = (
|
| 351 |
+
components.get(mcp_server_config_comp) if mcp_server_config_comp else None
|
| 352 |
+
)
|
| 353 |
+
mcp_server_config = (
|
| 354 |
+
json.loads(mcp_server_config_str) if mcp_server_config_str else None
|
| 355 |
+
)
|
| 356 |
+
|
| 357 |
+
# Planner LLM Settings (Optional)
|
| 358 |
+
planner_llm_provider_name = get_setting("planner_llm_provider") or None
|
| 359 |
+
planner_llm = None
|
| 360 |
+
planner_use_vision = False
|
| 361 |
+
if planner_llm_provider_name:
|
| 362 |
+
planner_llm_model_name = get_setting("planner_llm_model_name")
|
| 363 |
+
planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
|
| 364 |
+
planner_ollama_num_ctx = get_setting("planner_ollama_num_ctx", 16000)
|
| 365 |
+
planner_llm_base_url = get_setting("planner_llm_base_url") or None
|
| 366 |
+
planner_llm_api_key = get_setting("planner_llm_api_key") or None
|
| 367 |
+
planner_use_vision = get_setting("planner_use_vision", False)
|
| 368 |
+
|
| 369 |
+
planner_llm = await _initialize_llm(
|
| 370 |
+
planner_llm_provider_name,
|
| 371 |
+
planner_llm_model_name,
|
| 372 |
+
planner_llm_temperature,
|
| 373 |
+
planner_llm_base_url,
|
| 374 |
+
planner_llm_api_key,
|
| 375 |
+
planner_ollama_num_ctx if planner_llm_provider_name == "ollama" else None,
|
| 376 |
+
)
|
| 377 |
+
|
| 378 |
+
# --- Browser Settings ---
|
| 379 |
+
def get_browser_setting(key, default=None):
|
| 380 |
+
comp = webui_manager.id_to_component.get(f"browser_settings.{key}")
|
| 381 |
+
return components.get(comp, default) if comp else default
|
| 382 |
+
|
| 383 |
+
browser_binary_path = get_browser_setting("browser_binary_path") or None
|
| 384 |
+
browser_user_data_dir = get_browser_setting("browser_user_data_dir") or None
|
| 385 |
+
use_own_browser = get_browser_setting(
|
| 386 |
+
"use_own_browser", False
|
| 387 |
+
) # Logic handled by CDP/WSS presence
|
| 388 |
+
keep_browser_open = get_browser_setting("keep_browser_open", False)
|
| 389 |
+
headless = get_browser_setting("headless", False)
|
| 390 |
+
disable_security = get_browser_setting("disable_security", False)
|
| 391 |
+
window_w = int(get_browser_setting("window_w", 1280))
|
| 392 |
+
window_h = int(get_browser_setting("window_h", 1100))
|
| 393 |
+
cdp_url = get_browser_setting("cdp_url") or None
|
| 394 |
+
wss_url = get_browser_setting("wss_url") or None
|
| 395 |
+
save_recording_path = get_browser_setting("save_recording_path") or None
|
| 396 |
+
save_trace_path = get_browser_setting("save_trace_path") or None
|
| 397 |
+
save_agent_history_path = get_browser_setting(
|
| 398 |
+
"save_agent_history_path", "./tmp/agent_history"
|
| 399 |
+
)
|
| 400 |
+
save_download_path = get_browser_setting("save_download_path", "./tmp/downloads")
|
| 401 |
+
|
| 402 |
+
stream_vw = 70
|
| 403 |
+
stream_vh = int(70 * window_h // window_w)
|
| 404 |
+
|
| 405 |
+
os.makedirs(save_agent_history_path, exist_ok=True)
|
| 406 |
+
if save_recording_path:
|
| 407 |
+
os.makedirs(save_recording_path, exist_ok=True)
|
| 408 |
+
if save_trace_path:
|
| 409 |
+
os.makedirs(save_trace_path, exist_ok=True)
|
| 410 |
+
if save_download_path:
|
| 411 |
+
os.makedirs(save_download_path, exist_ok=True)
|
| 412 |
+
|
| 413 |
+
# --- 2. Initialize LLM ---
|
| 414 |
+
main_llm = await _initialize_llm(
|
| 415 |
+
llm_provider_name,
|
| 416 |
+
llm_model_name,
|
| 417 |
+
llm_temperature,
|
| 418 |
+
llm_base_url,
|
| 419 |
+
llm_api_key,
|
| 420 |
+
ollama_num_ctx if llm_provider_name == "ollama" else None,
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
# Pass the webui_manager instance to the callback when wrapping it
|
| 424 |
+
async def ask_callback_wrapper(
|
| 425 |
+
query: str, browser_context: BrowserContext
|
| 426 |
+
) -> Dict[str, Any]:
|
| 427 |
+
return await _ask_assistant_callback(webui_manager, query, browser_context)
|
| 428 |
+
|
| 429 |
+
if not webui_manager.bu_controller:
|
| 430 |
+
webui_manager.bu_controller = CustomController(
|
| 431 |
+
ask_assistant_callback=ask_callback_wrapper
|
| 432 |
+
)
|
| 433 |
+
await webui_manager.bu_controller.setup_mcp_client(mcp_server_config)
|
| 434 |
+
|
| 435 |
+
# --- 4. Initialize Browser and Context ---
|
| 436 |
+
should_close_browser_on_finish = not keep_browser_open
|
| 437 |
+
|
| 438 |
+
try:
|
| 439 |
+
# Close existing resources if not keeping open
|
| 440 |
+
if not keep_browser_open:
|
| 441 |
+
if webui_manager.bu_browser_context:
|
| 442 |
+
logger.info("Closing previous browser context.")
|
| 443 |
+
await webui_manager.bu_browser_context.close()
|
| 444 |
+
webui_manager.bu_browser_context = None
|
| 445 |
+
if webui_manager.bu_browser:
|
| 446 |
+
logger.info("Closing previous browser.")
|
| 447 |
+
await webui_manager.bu_browser.close()
|
| 448 |
+
webui_manager.bu_browser = None
|
| 449 |
+
|
| 450 |
+
# Create Browser if needed
|
| 451 |
+
if not webui_manager.bu_browser:
|
| 452 |
+
logger.info("Launching new browser instance.")
|
| 453 |
+
extra_args = []
|
| 454 |
+
if use_own_browser:
|
| 455 |
+
browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
|
| 456 |
+
if browser_binary_path == "":
|
| 457 |
+
browser_binary_path = None
|
| 458 |
+
browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
|
| 459 |
+
if browser_user_data:
|
| 460 |
+
extra_args += [f"--user-data-dir={browser_user_data}"]
|
| 461 |
+
else:
|
| 462 |
+
browser_binary_path = None
|
| 463 |
+
|
| 464 |
+
webui_manager.bu_browser = CustomBrowser(
|
| 465 |
+
config=BrowserConfig(
|
| 466 |
+
headless=headless,
|
| 467 |
+
disable_security=disable_security,
|
| 468 |
+
browser_binary_path=browser_binary_path,
|
| 469 |
+
extra_browser_args=extra_args,
|
| 470 |
+
wss_url=wss_url,
|
| 471 |
+
cdp_url=cdp_url,
|
| 472 |
+
new_context_config=BrowserContextConfig(
|
| 473 |
+
window_width=window_w,
|
| 474 |
+
window_height=window_h,
|
| 475 |
+
)
|
| 476 |
+
)
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
# Create Context if needed
|
| 480 |
+
if not webui_manager.bu_browser_context:
|
| 481 |
+
logger.info("Creating new browser context.")
|
| 482 |
+
context_config = BrowserContextConfig(
|
| 483 |
+
trace_path=save_trace_path if save_trace_path else None,
|
| 484 |
+
save_recording_path=save_recording_path
|
| 485 |
+
if save_recording_path
|
| 486 |
+
else None,
|
| 487 |
+
save_downloads_path=save_download_path if save_download_path else None,
|
| 488 |
+
window_height=window_h,
|
| 489 |
+
window_width=window_w,
|
| 490 |
+
)
|
| 491 |
+
if not webui_manager.bu_browser:
|
| 492 |
+
raise ValueError("Browser not initialized, cannot create context.")
|
| 493 |
+
webui_manager.bu_browser_context = (
|
| 494 |
+
await webui_manager.bu_browser.new_context(config=context_config)
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
# --- 5. Initialize or Update Agent ---
|
| 498 |
+
webui_manager.bu_agent_task_id = str(uuid.uuid4()) # New ID for this task run
|
| 499 |
+
os.makedirs(
|
| 500 |
+
os.path.join(save_agent_history_path, webui_manager.bu_agent_task_id),
|
| 501 |
+
exist_ok=True,
|
| 502 |
+
)
|
| 503 |
+
history_file = os.path.join(
|
| 504 |
+
save_agent_history_path,
|
| 505 |
+
webui_manager.bu_agent_task_id,
|
| 506 |
+
f"{webui_manager.bu_agent_task_id}.json",
|
| 507 |
+
)
|
| 508 |
+
gif_path = os.path.join(
|
| 509 |
+
save_agent_history_path,
|
| 510 |
+
webui_manager.bu_agent_task_id,
|
| 511 |
+
f"{webui_manager.bu_agent_task_id}.gif",
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
# Pass the webui_manager to callbacks when wrapping them
|
| 515 |
+
async def step_callback_wrapper(
|
| 516 |
+
state: BrowserState, output: AgentOutput, step_num: int
|
| 517 |
+
):
|
| 518 |
+
await _handle_new_step(webui_manager, state, output, step_num)
|
| 519 |
+
|
| 520 |
+
def done_callback_wrapper(history: AgentHistoryList):
|
| 521 |
+
_handle_done(webui_manager, history)
|
| 522 |
+
|
| 523 |
+
if not webui_manager.bu_agent:
|
| 524 |
+
logger.info(f"Initializing new agent for task: {task}")
|
| 525 |
+
if not webui_manager.bu_browser or not webui_manager.bu_browser_context:
|
| 526 |
+
raise ValueError(
|
| 527 |
+
"Browser or Context not initialized, cannot create agent."
|
| 528 |
+
)
|
| 529 |
+
webui_manager.bu_agent = BrowserUseAgent(
|
| 530 |
+
task=task,
|
| 531 |
+
llm=main_llm,
|
| 532 |
+
browser=webui_manager.bu_browser,
|
| 533 |
+
browser_context=webui_manager.bu_browser_context,
|
| 534 |
+
controller=webui_manager.bu_controller,
|
| 535 |
+
register_new_step_callback=step_callback_wrapper,
|
| 536 |
+
register_done_callback=done_callback_wrapper,
|
| 537 |
+
use_vision=use_vision,
|
| 538 |
+
override_system_message=override_system_prompt,
|
| 539 |
+
extend_system_message=extend_system_prompt,
|
| 540 |
+
max_input_tokens=max_input_tokens,
|
| 541 |
+
max_actions_per_step=max_actions,
|
| 542 |
+
tool_calling_method=tool_calling_method,
|
| 543 |
+
planner_llm=planner_llm,
|
| 544 |
+
use_vision_for_planner=planner_use_vision if planner_llm else False,
|
| 545 |
+
source="webui",
|
| 546 |
+
)
|
| 547 |
+
webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
|
| 548 |
+
webui_manager.bu_agent.settings.generate_gif = gif_path
|
| 549 |
+
else:
|
| 550 |
+
webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
|
| 551 |
+
webui_manager.bu_agent.add_new_task(task)
|
| 552 |
+
webui_manager.bu_agent.settings.generate_gif = gif_path
|
| 553 |
+
webui_manager.bu_agent.browser = webui_manager.bu_browser
|
| 554 |
+
webui_manager.bu_agent.browser_context = webui_manager.bu_browser_context
|
| 555 |
+
webui_manager.bu_agent.controller = webui_manager.bu_controller
|
| 556 |
+
|
| 557 |
+
# --- 6. Run Agent Task and Stream Updates ---
|
| 558 |
+
agent_run_coro = webui_manager.bu_agent.run(max_steps=max_steps)
|
| 559 |
+
agent_task = asyncio.create_task(agent_run_coro)
|
| 560 |
+
webui_manager.bu_current_task = agent_task # Store the task
|
| 561 |
+
|
| 562 |
+
last_chat_len = len(webui_manager.bu_chat_history)
|
| 563 |
+
while not agent_task.done():
|
| 564 |
+
is_paused = webui_manager.bu_agent.state.paused
|
| 565 |
+
is_stopped = webui_manager.bu_agent.state.stopped
|
| 566 |
+
|
| 567 |
+
# Check for pause state
|
| 568 |
+
if is_paused:
|
| 569 |
+
yield {
|
| 570 |
+
pause_resume_button_comp: gr.update(
|
| 571 |
+
value="▶️ Resume", interactive=True
|
| 572 |
+
),
|
| 573 |
+
stop_button_comp: gr.update(interactive=True),
|
| 574 |
+
}
|
| 575 |
+
# Wait until pause is released or task is stopped/done
|
| 576 |
+
while is_paused and not agent_task.done():
|
| 577 |
+
# Re-check agent state in loop
|
| 578 |
+
is_paused = webui_manager.bu_agent.state.paused
|
| 579 |
+
is_stopped = webui_manager.bu_agent.state.stopped
|
| 580 |
+
if is_stopped: # Stop signal received while paused
|
| 581 |
+
break
|
| 582 |
+
await asyncio.sleep(0.2)
|
| 583 |
+
|
| 584 |
+
if (
|
| 585 |
+
agent_task.done() or is_stopped
|
| 586 |
+
): # If stopped or task finished while paused
|
| 587 |
+
break
|
| 588 |
+
|
| 589 |
+
# If resumed, yield UI update
|
| 590 |
+
yield {
|
| 591 |
+
pause_resume_button_comp: gr.update(
|
| 592 |
+
value="⏸️ Pause", interactive=True
|
| 593 |
+
),
|
| 594 |
+
run_button_comp: gr.update(
|
| 595 |
+
value="⏳ Running...", interactive=False
|
| 596 |
+
),
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
# Check if agent stopped itself or stop button was pressed (which sets agent.state.stopped)
|
| 600 |
+
if is_stopped:
|
| 601 |
+
logger.info("Agent has stopped (internally or via stop button).")
|
| 602 |
+
if not agent_task.done():
|
| 603 |
+
# Ensure the task coroutine finishes if agent just set flag
|
| 604 |
+
try:
|
| 605 |
+
await asyncio.wait_for(
|
| 606 |
+
agent_task, timeout=1.0
|
| 607 |
+
) # Give it a moment to exit run()
|
| 608 |
+
except asyncio.TimeoutError:
|
| 609 |
+
logger.warning(
|
| 610 |
+
"Agent task did not finish quickly after stop signal, cancelling."
|
| 611 |
+
)
|
| 612 |
+
agent_task.cancel()
|
| 613 |
+
except Exception: # Catch task exceptions if it errors on stop
|
| 614 |
+
pass
|
| 615 |
+
break # Exit the streaming loop
|
| 616 |
+
|
| 617 |
+
# Check if agent is asking for help (via response_event)
|
| 618 |
+
update_dict = {}
|
| 619 |
+
if webui_manager.bu_response_event is not None:
|
| 620 |
+
update_dict = {
|
| 621 |
+
user_input_comp: gr.update(
|
| 622 |
+
placeholder="Agent needs help. Enter response and submit.",
|
| 623 |
+
interactive=True,
|
| 624 |
+
),
|
| 625 |
+
run_button_comp: gr.update(
|
| 626 |
+
value="✔️ Submit Response", interactive=True
|
| 627 |
+
),
|
| 628 |
+
pause_resume_button_comp: gr.update(interactive=False),
|
| 629 |
+
stop_button_comp: gr.update(interactive=False),
|
| 630 |
+
chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
|
| 631 |
+
}
|
| 632 |
+
last_chat_len = len(webui_manager.bu_chat_history)
|
| 633 |
+
yield update_dict
|
| 634 |
+
# Wait until response is submitted or task finishes
|
| 635 |
+
while (
|
| 636 |
+
webui_manager.bu_response_event is not None
|
| 637 |
+
and not agent_task.done()
|
| 638 |
+
):
|
| 639 |
+
await asyncio.sleep(0.2)
|
| 640 |
+
# Restore UI after response submitted or if task ended unexpectedly
|
| 641 |
+
if not agent_task.done():
|
| 642 |
+
yield {
|
| 643 |
+
user_input_comp: gr.update(
|
| 644 |
+
placeholder="Agent is running...", interactive=False
|
| 645 |
+
),
|
| 646 |
+
run_button_comp: gr.update(
|
| 647 |
+
value="⏳ Running...", interactive=False
|
| 648 |
+
),
|
| 649 |
+
pause_resume_button_comp: gr.update(interactive=True),
|
| 650 |
+
stop_button_comp: gr.update(interactive=True),
|
| 651 |
+
}
|
| 652 |
+
else:
|
| 653 |
+
break # Task finished while waiting for response
|
| 654 |
+
|
| 655 |
+
# Update Chatbot if new messages arrived via callbacks
|
| 656 |
+
if len(webui_manager.bu_chat_history) > last_chat_len:
|
| 657 |
+
update_dict[chatbot_comp] = gr.update(
|
| 658 |
+
value=webui_manager.bu_chat_history
|
| 659 |
+
)
|
| 660 |
+
last_chat_len = len(webui_manager.bu_chat_history)
|
| 661 |
+
|
| 662 |
+
# Update Browser View
|
| 663 |
+
if headless and webui_manager.bu_browser_context:
|
| 664 |
+
try:
|
| 665 |
+
screenshot_b64 = (
|
| 666 |
+
await webui_manager.bu_browser_context.take_screenshot()
|
| 667 |
+
)
|
| 668 |
+
if screenshot_b64:
|
| 669 |
+
html_content = f'<img src="data:image/jpeg;base64,{screenshot_b64}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
|
| 670 |
+
update_dict[browser_view_comp] = gr.update(
|
| 671 |
+
value=html_content, visible=True
|
| 672 |
+
)
|
| 673 |
+
else:
|
| 674 |
+
html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
|
| 675 |
+
update_dict[browser_view_comp] = gr.update(
|
| 676 |
+
value=html_content, visible=True
|
| 677 |
+
)
|
| 678 |
+
except Exception as e:
|
| 679 |
+
logger.debug(f"Failed to capture screenshot: {e}")
|
| 680 |
+
update_dict[browser_view_comp] = gr.update(
|
| 681 |
+
value="<div style='...'>Error loading view...</div>",
|
| 682 |
+
visible=True,
|
| 683 |
+
)
|
| 684 |
+
else:
|
| 685 |
+
update_dict[browser_view_comp] = gr.update(visible=False)
|
| 686 |
+
|
| 687 |
+
# Yield accumulated updates
|
| 688 |
+
if update_dict:
|
| 689 |
+
yield update_dict
|
| 690 |
+
|
| 691 |
+
await asyncio.sleep(0.1) # Polling interval
|
| 692 |
+
|
| 693 |
+
# --- 7. Task Finalization ---
|
| 694 |
+
webui_manager.bu_agent.state.paused = False
|
| 695 |
+
webui_manager.bu_agent.state.stopped = False
|
| 696 |
+
final_update = {}
|
| 697 |
+
try:
|
| 698 |
+
logger.info("Agent task completing...")
|
| 699 |
+
# Await the task ensure completion and catch exceptions if not already caught
|
| 700 |
+
if not agent_task.done():
|
| 701 |
+
await agent_task # Retrieve result/exception
|
| 702 |
+
elif agent_task.exception(): # Check if task finished with exception
|
| 703 |
+
agent_task.result() # Raise the exception to be caught below
|
| 704 |
+
logger.info("Agent task completed processing.")
|
| 705 |
+
|
| 706 |
+
logger.info(f"Explicitly saving agent history to: {history_file}")
|
| 707 |
+
webui_manager.bu_agent.save_history(history_file)
|
| 708 |
+
|
| 709 |
+
if os.path.exists(history_file):
|
| 710 |
+
final_update[history_file_comp] = gr.File(value=history_file)
|
| 711 |
+
|
| 712 |
+
if gif_path and os.path.exists(gif_path):
|
| 713 |
+
logger.info(f"GIF found at: {gif_path}")
|
| 714 |
+
final_update[gif_comp] = gr.Image(value=gif_path)
|
| 715 |
+
|
| 716 |
+
except asyncio.CancelledError:
|
| 717 |
+
logger.info("Agent task was cancelled.")
|
| 718 |
+
if not any(
|
| 719 |
+
"Cancelled" in msg.get("content", "")
|
| 720 |
+
for msg in webui_manager.bu_chat_history
|
| 721 |
+
if msg.get("role") == "assistant"
|
| 722 |
+
):
|
| 723 |
+
webui_manager.bu_chat_history.append(
|
| 724 |
+
{"role": "assistant", "content": "**Task Cancelled**."}
|
| 725 |
+
)
|
| 726 |
+
final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
|
| 727 |
+
except Exception as e:
|
| 728 |
+
logger.error(f"Error during agent execution: {e}", exc_info=True)
|
| 729 |
+
error_message = (
|
| 730 |
+
f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
|
| 731 |
+
)
|
| 732 |
+
if not any(
|
| 733 |
+
error_message in msg.get("content", "")
|
| 734 |
+
for msg in webui_manager.bu_chat_history
|
| 735 |
+
if msg.get("role") == "assistant"
|
| 736 |
+
):
|
| 737 |
+
webui_manager.bu_chat_history.append(
|
| 738 |
+
{"role": "assistant", "content": error_message}
|
| 739 |
+
)
|
| 740 |
+
final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
|
| 741 |
+
gr.Error(f"Agent execution failed: {e}")
|
| 742 |
+
|
| 743 |
+
finally:
|
| 744 |
+
webui_manager.bu_current_task = None # Clear the task reference
|
| 745 |
+
|
| 746 |
+
# Close browser/context if requested
|
| 747 |
+
if should_close_browser_on_finish:
|
| 748 |
+
if webui_manager.bu_browser_context:
|
| 749 |
+
logger.info("Closing browser context after task.")
|
| 750 |
+
await webui_manager.bu_browser_context.close()
|
| 751 |
+
webui_manager.bu_browser_context = None
|
| 752 |
+
if webui_manager.bu_browser:
|
| 753 |
+
logger.info("Closing browser after task.")
|
| 754 |
+
await webui_manager.bu_browser.close()
|
| 755 |
+
webui_manager.bu_browser = None
|
| 756 |
+
|
| 757 |
+
# --- 8. Final UI Update ---
|
| 758 |
+
final_update.update(
|
| 759 |
+
{
|
| 760 |
+
user_input_comp: gr.update(
|
| 761 |
+
value="",
|
| 762 |
+
interactive=True,
|
| 763 |
+
placeholder="Enter your next task...",
|
| 764 |
+
),
|
| 765 |
+
run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
|
| 766 |
+
stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
|
| 767 |
+
pause_resume_button_comp: gr.update(
|
| 768 |
+
value="⏸️ Pause", interactive=False
|
| 769 |
+
),
|
| 770 |
+
clear_button_comp: gr.update(interactive=True),
|
| 771 |
+
# Ensure final chat history is shown
|
| 772 |
+
chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
|
| 773 |
+
}
|
| 774 |
+
)
|
| 775 |
+
yield final_update
|
| 776 |
+
|
| 777 |
+
except Exception as e:
|
| 778 |
+
# Catch errors during setup (before agent run starts)
|
| 779 |
+
logger.error(f"Error setting up agent task: {e}", exc_info=True)
|
| 780 |
+
webui_manager.bu_current_task = None # Ensure state is reset
|
| 781 |
+
yield {
|
| 782 |
+
user_input_comp: gr.update(
|
| 783 |
+
interactive=True, placeholder="Error during setup. Enter task..."
|
| 784 |
+
),
|
| 785 |
+
run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
|
| 786 |
+
stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
|
| 787 |
+
pause_resume_button_comp: gr.update(value="⏸️ Pause", interactive=False),
|
| 788 |
+
clear_button_comp: gr.update(interactive=True),
|
| 789 |
+
chatbot_comp: gr.update(
|
| 790 |
+
value=webui_manager.bu_chat_history
|
| 791 |
+
+ [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
|
| 792 |
+
),
|
| 793 |
+
}
|
| 794 |
+
|
| 795 |
+
|
| 796 |
+
# --- Button Click Handlers --- (Need access to webui_manager)
|
| 797 |
+
|
| 798 |
+
|
| 799 |
+
async def handle_submit(
|
| 800 |
+
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
| 801 |
+
):
|
| 802 |
+
"""Handles clicks on the main 'Submit' button."""
|
| 803 |
+
user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
|
| 804 |
+
user_input_value = components.get(user_input_comp, "").strip()
|
| 805 |
+
|
| 806 |
+
# Check if waiting for user assistance
|
| 807 |
+
if webui_manager.bu_response_event and not webui_manager.bu_response_event.is_set():
|
| 808 |
+
logger.info(f"User submitted assistance: {user_input_value}")
|
| 809 |
+
webui_manager.bu_user_help_response = (
|
| 810 |
+
user_input_value if user_input_value else "User provided no text response."
|
| 811 |
+
)
|
| 812 |
+
webui_manager.bu_response_event.set()
|
| 813 |
+
# UI updates handled by the main loop reacting to the event being set
|
| 814 |
+
yield {
|
| 815 |
+
user_input_comp: gr.update(
|
| 816 |
+
value="",
|
| 817 |
+
interactive=False,
|
| 818 |
+
placeholder="Waiting for agent to continue...",
|
| 819 |
+
),
|
| 820 |
+
webui_manager.get_component_by_id(
|
| 821 |
+
"browser_use_agent.run_button"
|
| 822 |
+
): gr.update(value="⏳ Running...", interactive=False),
|
| 823 |
+
}
|
| 824 |
+
# Check if a task is currently running (using _current_task)
|
| 825 |
+
elif webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
|
| 826 |
+
logger.warning(
|
| 827 |
+
"Submit button clicked while agent is already running and not asking for help."
|
| 828 |
+
)
|
| 829 |
+
gr.Info("Agent is currently running. Please wait or use Stop/Pause.")
|
| 830 |
+
yield {} # No change
|
| 831 |
+
else:
|
| 832 |
+
# Handle submission for a new task
|
| 833 |
+
logger.info("Submit button clicked for new task.")
|
| 834 |
+
# Use async generator to stream updates from run_agent_task
|
| 835 |
+
async for update in run_agent_task(webui_manager, components):
|
| 836 |
+
yield update
|
| 837 |
+
|
| 838 |
+
|
| 839 |
+
async def handle_stop(webui_manager: WebuiManager):
|
| 840 |
+
"""Handles clicks on the 'Stop' button."""
|
| 841 |
+
logger.info("Stop button clicked.")
|
| 842 |
+
agent = webui_manager.bu_agent
|
| 843 |
+
task = webui_manager.bu_current_task
|
| 844 |
+
|
| 845 |
+
if agent and task and not task.done():
|
| 846 |
+
# Signal the agent to stop by setting its internal flag
|
| 847 |
+
agent.state.stopped = True
|
| 848 |
+
agent.state.paused = False # Ensure not paused if stopped
|
| 849 |
+
return {
|
| 850 |
+
webui_manager.get_component_by_id(
|
| 851 |
+
"browser_use_agent.stop_button"
|
| 852 |
+
): gr.update(interactive=False, value="⏹️ Stopping..."),
|
| 853 |
+
webui_manager.get_component_by_id(
|
| 854 |
+
"browser_use_agent.pause_resume_button"
|
| 855 |
+
): gr.update(interactive=False),
|
| 856 |
+
webui_manager.get_component_by_id(
|
| 857 |
+
"browser_use_agent.run_button"
|
| 858 |
+
): gr.update(interactive=False),
|
| 859 |
+
}
|
| 860 |
+
else:
|
| 861 |
+
logger.warning("Stop clicked but agent is not running or task is already done.")
|
| 862 |
+
# Reset UI just in case it's stuck
|
| 863 |
+
return {
|
| 864 |
+
webui_manager.get_component_by_id(
|
| 865 |
+
"browser_use_agent.run_button"
|
| 866 |
+
): gr.update(interactive=True),
|
| 867 |
+
webui_manager.get_component_by_id(
|
| 868 |
+
"browser_use_agent.stop_button"
|
| 869 |
+
): gr.update(interactive=False),
|
| 870 |
+
webui_manager.get_component_by_id(
|
| 871 |
+
"browser_use_agent.pause_resume_button"
|
| 872 |
+
): gr.update(interactive=False),
|
| 873 |
+
webui_manager.get_component_by_id(
|
| 874 |
+
"browser_use_agent.clear_button"
|
| 875 |
+
): gr.update(interactive=True),
|
| 876 |
+
}
|
| 877 |
+
|
| 878 |
+
|
| 879 |
+
async def handle_pause_resume(webui_manager: WebuiManager):
|
| 880 |
+
"""Handles clicks on the 'Pause/Resume' button."""
|
| 881 |
+
agent = webui_manager.bu_agent
|
| 882 |
+
task = webui_manager.bu_current_task
|
| 883 |
+
|
| 884 |
+
if agent and task and not task.done():
|
| 885 |
+
if agent.state.paused:
|
| 886 |
+
logger.info("Resume button clicked.")
|
| 887 |
+
agent.resume()
|
| 888 |
+
# UI update happens in main loop
|
| 889 |
+
return {
|
| 890 |
+
webui_manager.get_component_by_id(
|
| 891 |
+
"browser_use_agent.pause_resume_button"
|
| 892 |
+
): gr.update(value="⏸️ Pause", interactive=True)
|
| 893 |
+
} # Optimistic update
|
| 894 |
+
else:
|
| 895 |
+
logger.info("Pause button clicked.")
|
| 896 |
+
agent.pause()
|
| 897 |
+
return {
|
| 898 |
+
webui_manager.get_component_by_id(
|
| 899 |
+
"browser_use_agent.pause_resume_button"
|
| 900 |
+
): gr.update(value="▶️ Resume", interactive=True)
|
| 901 |
+
} # Optimistic update
|
| 902 |
+
else:
|
| 903 |
+
logger.warning(
|
| 904 |
+
"Pause/Resume clicked but agent is not running or doesn't support state."
|
| 905 |
+
)
|
| 906 |
+
return {} # No change
|
| 907 |
+
|
| 908 |
+
|
| 909 |
+
async def handle_clear(webui_manager: WebuiManager):
|
| 910 |
+
"""Handles clicks on the 'Clear' button."""
|
| 911 |
+
logger.info("Clear button clicked.")
|
| 912 |
+
|
| 913 |
+
# Stop any running task first
|
| 914 |
+
task = webui_manager.bu_current_task
|
| 915 |
+
if task and not task.done():
|
| 916 |
+
logger.info("Clearing requires stopping the current task.")
|
| 917 |
+
webui_manager.bu_agent.stop()
|
| 918 |
+
task.cancel()
|
| 919 |
+
try:
|
| 920 |
+
await asyncio.wait_for(task, timeout=2.0) # Wait briefly
|
| 921 |
+
except (asyncio.CancelledError, asyncio.TimeoutError):
|
| 922 |
+
pass
|
| 923 |
+
except Exception as e:
|
| 924 |
+
logger.warning(f"Error stopping task on clear: {e}")
|
| 925 |
+
webui_manager.bu_current_task = None
|
| 926 |
+
|
| 927 |
+
if webui_manager.bu_controller:
|
| 928 |
+
await webui_manager.bu_controller.close_mcp_client()
|
| 929 |
+
webui_manager.bu_controller = None
|
| 930 |
+
webui_manager.bu_agent = None
|
| 931 |
+
|
| 932 |
+
# Reset state stored in manager
|
| 933 |
+
webui_manager.bu_chat_history = []
|
| 934 |
+
webui_manager.bu_response_event = None
|
| 935 |
+
webui_manager.bu_user_help_response = None
|
| 936 |
+
webui_manager.bu_agent_task_id = None
|
| 937 |
+
|
| 938 |
+
logger.info("Agent state and browser resources cleared.")
|
| 939 |
+
|
| 940 |
+
# Reset UI components
|
| 941 |
+
return {
|
| 942 |
+
webui_manager.get_component_by_id("browser_use_agent.chatbot"): gr.update(
|
| 943 |
+
value=[]
|
| 944 |
+
),
|
| 945 |
+
webui_manager.get_component_by_id("browser_use_agent.user_input"): gr.update(
|
| 946 |
+
value="", placeholder="Enter your task here..."
|
| 947 |
+
),
|
| 948 |
+
webui_manager.get_component_by_id(
|
| 949 |
+
"browser_use_agent.agent_history_file"
|
| 950 |
+
): gr.update(value=None),
|
| 951 |
+
webui_manager.get_component_by_id("browser_use_agent.recording_gif"): gr.update(
|
| 952 |
+
value=None
|
| 953 |
+
),
|
| 954 |
+
webui_manager.get_component_by_id("browser_use_agent.browser_view"): gr.update(
|
| 955 |
+
value="<div style='...'>Browser Cleared</div>"
|
| 956 |
+
),
|
| 957 |
+
webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(
|
| 958 |
+
value="▶️ Submit Task", interactive=True
|
| 959 |
+
),
|
| 960 |
+
webui_manager.get_component_by_id("browser_use_agent.stop_button"): gr.update(
|
| 961 |
+
interactive=False
|
| 962 |
+
),
|
| 963 |
+
webui_manager.get_component_by_id(
|
| 964 |
+
"browser_use_agent.pause_resume_button"
|
| 965 |
+
): gr.update(value="⏸️ Pause", interactive=False),
|
| 966 |
+
webui_manager.get_component_by_id("browser_use_agent.clear_button"): gr.update(
|
| 967 |
+
interactive=True
|
| 968 |
+
),
|
| 969 |
+
}
|
| 970 |
+
|
| 971 |
+
|
| 972 |
+
# --- Tab Creation Function ---
|
| 973 |
+
|
| 974 |
+
|
| 975 |
+
def create_browser_use_agent_tab(webui_manager: WebuiManager):
|
| 976 |
+
"""
|
| 977 |
+
Create the run agent tab, defining UI, state, and handlers.
|
| 978 |
+
"""
|
| 979 |
+
webui_manager.init_browser_use_agent()
|
| 980 |
+
|
| 981 |
+
# --- Define UI Components ---
|
| 982 |
+
tab_components = {}
|
| 983 |
+
with gr.Column():
|
| 984 |
+
chatbot = gr.Chatbot(
|
| 985 |
+
lambda: webui_manager.bu_chat_history, # Load history dynamically
|
| 986 |
+
elem_id="browser_use_chatbot",
|
| 987 |
+
label="Agent Interaction",
|
| 988 |
+
type="messages",
|
| 989 |
+
height=600,
|
| 990 |
+
show_copy_button=True,
|
| 991 |
+
)
|
| 992 |
+
user_input = gr.Textbox(
|
| 993 |
+
label="Your Task or Response",
|
| 994 |
+
placeholder="Enter your task here or provide assistance when asked.",
|
| 995 |
+
lines=3,
|
| 996 |
+
interactive=True,
|
| 997 |
+
elem_id="user_input",
|
| 998 |
+
)
|
| 999 |
+
with gr.Row():
|
| 1000 |
+
stop_button = gr.Button(
|
| 1001 |
+
"⏹️ Stop", interactive=False, variant="stop", scale=2
|
| 1002 |
+
)
|
| 1003 |
+
pause_resume_button = gr.Button(
|
| 1004 |
+
"⏸️ Pause", interactive=False, variant="secondary", scale=2, visible=True
|
| 1005 |
+
)
|
| 1006 |
+
clear_button = gr.Button(
|
| 1007 |
+
"🗑️ Clear", interactive=True, variant="secondary", scale=2
|
| 1008 |
+
)
|
| 1009 |
+
run_button = gr.Button("▶️ Submit Task", variant="primary", scale=3)
|
| 1010 |
+
|
| 1011 |
+
browser_view = gr.HTML(
|
| 1012 |
+
value="<div style='width:100%; height:50vh; display:flex; justify-content:center; align-items:center; border:1px solid #ccc; background-color:#f0f0f0;'><p>Browser View (Requires Headless=True)</p></div>",
|
| 1013 |
+
label="Browser Live View",
|
| 1014 |
+
elem_id="browser_view",
|
| 1015 |
+
visible=False,
|
| 1016 |
+
)
|
| 1017 |
+
with gr.Column():
|
| 1018 |
+
gr.Markdown("### Task Outputs")
|
| 1019 |
+
agent_history_file = gr.File(label="Agent History JSON", interactive=False)
|
| 1020 |
+
recording_gif = gr.Image(
|
| 1021 |
+
label="Task Recording GIF",
|
| 1022 |
+
format="gif",
|
| 1023 |
+
interactive=False,
|
| 1024 |
+
type="filepath",
|
| 1025 |
+
)
|
| 1026 |
+
|
| 1027 |
+
# --- Store Components in Manager ---
|
| 1028 |
+
tab_components.update(
|
| 1029 |
+
dict(
|
| 1030 |
+
chatbot=chatbot,
|
| 1031 |
+
user_input=user_input,
|
| 1032 |
+
clear_button=clear_button,
|
| 1033 |
+
run_button=run_button,
|
| 1034 |
+
stop_button=stop_button,
|
| 1035 |
+
pause_resume_button=pause_resume_button,
|
| 1036 |
+
agent_history_file=agent_history_file,
|
| 1037 |
+
recording_gif=recording_gif,
|
| 1038 |
+
browser_view=browser_view,
|
| 1039 |
+
)
|
| 1040 |
+
)
|
| 1041 |
+
webui_manager.add_components(
|
| 1042 |
+
"browser_use_agent", tab_components
|
| 1043 |
+
) # Use "browser_use_agent" as tab_name prefix
|
| 1044 |
+
|
| 1045 |
+
all_managed_components = set(
|
| 1046 |
+
webui_manager.get_components()
|
| 1047 |
+
) # Get all components known to manager
|
| 1048 |
+
run_tab_outputs = list(tab_components.values())
|
| 1049 |
+
|
| 1050 |
+
async def submit_wrapper(
|
| 1051 |
+
components_dict: Dict[Component, Any],
|
| 1052 |
+
) -> AsyncGenerator[Dict[Component, Any], None]:
|
| 1053 |
+
"""Wrapper for handle_submit that yields its results."""
|
| 1054 |
+
async for update in handle_submit(webui_manager, components_dict):
|
| 1055 |
+
yield update
|
| 1056 |
+
|
| 1057 |
+
async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
| 1058 |
+
"""Wrapper for handle_stop."""
|
| 1059 |
+
update_dict = await handle_stop(webui_manager)
|
| 1060 |
+
yield update_dict
|
| 1061 |
+
|
| 1062 |
+
async def pause_resume_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
| 1063 |
+
"""Wrapper for handle_pause_resume."""
|
| 1064 |
+
update_dict = await handle_pause_resume(webui_manager)
|
| 1065 |
+
yield update_dict
|
| 1066 |
+
|
| 1067 |
+
async def clear_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
| 1068 |
+
"""Wrapper for handle_clear."""
|
| 1069 |
+
update_dict = await handle_clear(webui_manager)
|
| 1070 |
+
yield update_dict
|
| 1071 |
+
|
| 1072 |
+
# --- Connect Event Handlers using the Wrappers --
|
| 1073 |
+
run_button.click(
|
| 1074 |
+
fn=submit_wrapper, inputs=all_managed_components, outputs=run_tab_outputs
|
| 1075 |
+
)
|
| 1076 |
+
user_input.submit(
|
| 1077 |
+
fn=submit_wrapper, inputs=all_managed_components, outputs=run_tab_outputs
|
| 1078 |
+
)
|
| 1079 |
+
stop_button.click(fn=stop_wrapper, inputs=None, outputs=run_tab_outputs)
|
| 1080 |
+
pause_resume_button.click(
|
| 1081 |
+
fn=pause_resume_wrapper, inputs=None, outputs=run_tab_outputs
|
| 1082 |
+
)
|
| 1083 |
+
clear_button.click(fn=clear_wrapper, inputs=None, outputs=run_tab_outputs)
|
src/webui/components/deep_research_agent_tab.py
ADDED
|
@@ -0,0 +1,457 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from gradio.components import Component
|
| 3 |
+
from functools import partial
|
| 4 |
+
|
| 5 |
+
from src.webui.webui_manager import WebuiManager
|
| 6 |
+
from src.utils import config
|
| 7 |
+
import logging
|
| 8 |
+
import os
|
| 9 |
+
from typing import Any, Dict, AsyncGenerator, Optional, Tuple, Union
|
| 10 |
+
import asyncio
|
| 11 |
+
import json
|
| 12 |
+
from src.agent.deep_research.deep_research_agent import DeepResearchAgent
|
| 13 |
+
from src.utils import llm_provider
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
async def _initialize_llm(provider: Optional[str], model_name: Optional[str], temperature: float,
|
| 19 |
+
base_url: Optional[str], api_key: Optional[str], num_ctx: Optional[int] = None):
|
| 20 |
+
"""Initializes the LLM based on settings. Returns None if provider/model is missing."""
|
| 21 |
+
if not provider or not model_name:
|
| 22 |
+
logger.info("LLM Provider or Model Name not specified, LLM will be None.")
|
| 23 |
+
return None
|
| 24 |
+
try:
|
| 25 |
+
logger.info(f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}")
|
| 26 |
+
# Use your actual LLM provider logic here
|
| 27 |
+
llm = llm_provider.get_llm_model(
|
| 28 |
+
provider=provider,
|
| 29 |
+
model_name=model_name,
|
| 30 |
+
temperature=temperature,
|
| 31 |
+
base_url=base_url or None,
|
| 32 |
+
api_key=api_key or None,
|
| 33 |
+
num_ctx=num_ctx if provider == "ollama" else None
|
| 34 |
+
)
|
| 35 |
+
return llm
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
|
| 38 |
+
gr.Warning(
|
| 39 |
+
f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}")
|
| 40 |
+
return None
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _read_file_safe(file_path: str) -> Optional[str]:
|
| 44 |
+
"""Safely read a file, returning None if it doesn't exist or on error."""
|
| 45 |
+
if not os.path.exists(file_path):
|
| 46 |
+
return None
|
| 47 |
+
try:
|
| 48 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 49 |
+
return f.read()
|
| 50 |
+
except Exception as e:
|
| 51 |
+
logger.error(f"Error reading file {file_path}: {e}")
|
| 52 |
+
return None
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# --- Deep Research Agent Specific Logic ---
|
| 56 |
+
|
| 57 |
+
async def run_deep_research(webui_manager: WebuiManager, components: Dict[Component, Any]) -> AsyncGenerator[
|
| 58 |
+
Dict[Component, Any], None]:
|
| 59 |
+
"""Handles initializing and running the DeepResearchAgent."""
|
| 60 |
+
|
| 61 |
+
# --- Get Components ---
|
| 62 |
+
research_task_comp = webui_manager.get_component_by_id("deep_research_agent.research_task")
|
| 63 |
+
resume_task_id_comp = webui_manager.get_component_by_id("deep_research_agent.resume_task_id")
|
| 64 |
+
parallel_num_comp = webui_manager.get_component_by_id("deep_research_agent.parallel_num")
|
| 65 |
+
save_dir_comp = webui_manager.get_component_by_id(
|
| 66 |
+
"deep_research_agent.max_query") # Note: component ID seems misnamed in original code
|
| 67 |
+
start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
|
| 68 |
+
stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
|
| 69 |
+
markdown_display_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_display")
|
| 70 |
+
markdown_download_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_download")
|
| 71 |
+
mcp_server_config_comp = webui_manager.get_component_by_id("deep_research_agent.mcp_server_config")
|
| 72 |
+
|
| 73 |
+
# --- 1. Get Task and Settings ---
|
| 74 |
+
task_topic = components.get(research_task_comp, "").strip()
|
| 75 |
+
task_id_to_resume = components.get(resume_task_id_comp, "").strip() or None
|
| 76 |
+
max_parallel_agents = int(components.get(parallel_num_comp, 1))
|
| 77 |
+
base_save_dir = components.get(save_dir_comp, "./tmp/deep_research").strip()
|
| 78 |
+
safe_root_dir = "./tmp/deep_research"
|
| 79 |
+
normalized_base_save_dir = os.path.abspath(os.path.normpath(base_save_dir))
|
| 80 |
+
if os.path.commonpath([normalized_base_save_dir, os.path.abspath(safe_root_dir)]) != os.path.abspath(safe_root_dir):
|
| 81 |
+
logger.warning(f"Unsafe base_save_dir detected: {base_save_dir}. Using default directory.")
|
| 82 |
+
normalized_base_save_dir = os.path.abspath(safe_root_dir)
|
| 83 |
+
base_save_dir = normalized_base_save_dir
|
| 84 |
+
mcp_server_config_str = components.get(mcp_server_config_comp)
|
| 85 |
+
mcp_config = json.loads(mcp_server_config_str) if mcp_server_config_str else None
|
| 86 |
+
|
| 87 |
+
if not task_topic:
|
| 88 |
+
gr.Warning("Please enter a research task.")
|
| 89 |
+
yield {start_button_comp: gr.update(interactive=True)} # Re-enable start button
|
| 90 |
+
return
|
| 91 |
+
|
| 92 |
+
# Store base save dir for stop handler
|
| 93 |
+
webui_manager.dr_save_dir = base_save_dir
|
| 94 |
+
os.makedirs(base_save_dir, exist_ok=True)
|
| 95 |
+
|
| 96 |
+
# --- 2. Initial UI Update ---
|
| 97 |
+
yield {
|
| 98 |
+
start_button_comp: gr.update(value="⏳ Running...", interactive=False),
|
| 99 |
+
stop_button_comp: gr.update(interactive=True),
|
| 100 |
+
research_task_comp: gr.update(interactive=False),
|
| 101 |
+
resume_task_id_comp: gr.update(interactive=False),
|
| 102 |
+
parallel_num_comp: gr.update(interactive=False),
|
| 103 |
+
save_dir_comp: gr.update(interactive=False),
|
| 104 |
+
markdown_display_comp: gr.update(value="Starting research..."),
|
| 105 |
+
markdown_download_comp: gr.update(value=None, interactive=False)
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
agent_task = None
|
| 109 |
+
running_task_id = None
|
| 110 |
+
plan_file_path = None
|
| 111 |
+
report_file_path = None
|
| 112 |
+
last_plan_content = None
|
| 113 |
+
last_plan_mtime = 0
|
| 114 |
+
|
| 115 |
+
try:
|
| 116 |
+
# --- 3. Get LLM and Browser Config from other tabs ---
|
| 117 |
+
# Access settings values via components dict, getting IDs from webui_manager
|
| 118 |
+
def get_setting(tab: str, key: str, default: Any = None):
|
| 119 |
+
comp = webui_manager.id_to_component.get(f"{tab}.{key}")
|
| 120 |
+
return components.get(comp, default) if comp else default
|
| 121 |
+
|
| 122 |
+
# LLM Config (from agent_settings tab)
|
| 123 |
+
llm_provider_name = get_setting("agent_settings", "llm_provider")
|
| 124 |
+
llm_model_name = get_setting("agent_settings", "llm_model_name")
|
| 125 |
+
llm_temperature = max(get_setting("agent_settings", "llm_temperature", 0.5), 0.5)
|
| 126 |
+
llm_base_url = get_setting("agent_settings", "llm_base_url")
|
| 127 |
+
llm_api_key = get_setting("agent_settings", "llm_api_key")
|
| 128 |
+
ollama_num_ctx = get_setting("agent_settings", "ollama_num_ctx")
|
| 129 |
+
|
| 130 |
+
llm = await _initialize_llm(
|
| 131 |
+
llm_provider_name, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
|
| 132 |
+
ollama_num_ctx if llm_provider_name == "ollama" else None
|
| 133 |
+
)
|
| 134 |
+
if not llm:
|
| 135 |
+
raise ValueError("LLM Initialization failed. Please check Agent Settings.")
|
| 136 |
+
|
| 137 |
+
# Browser Config (from browser_settings tab)
|
| 138 |
+
# Note: DeepResearchAgent constructor takes a dict, not full Browser/Context objects
|
| 139 |
+
browser_config_dict = {
|
| 140 |
+
"headless": get_setting("browser_settings", "headless", False),
|
| 141 |
+
"disable_security": get_setting("browser_settings", "disable_security", False),
|
| 142 |
+
"browser_binary_path": get_setting("browser_settings", "browser_binary_path"),
|
| 143 |
+
"user_data_dir": get_setting("browser_settings", "browser_user_data_dir"),
|
| 144 |
+
"window_width": int(get_setting("browser_settings", "window_w", 1280)),
|
| 145 |
+
"window_height": int(get_setting("browser_settings", "window_h", 1100)),
|
| 146 |
+
# Add other relevant fields if DeepResearchAgent accepts them
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
# --- 4. Initialize or Get Agent ---
|
| 150 |
+
if not webui_manager.dr_agent:
|
| 151 |
+
webui_manager.dr_agent = DeepResearchAgent(
|
| 152 |
+
llm=llm,
|
| 153 |
+
browser_config=browser_config_dict,
|
| 154 |
+
mcp_server_config=mcp_config
|
| 155 |
+
)
|
| 156 |
+
logger.info("DeepResearchAgent initialized.")
|
| 157 |
+
|
| 158 |
+
# --- 5. Start Agent Run ---
|
| 159 |
+
agent_run_coro = webui_manager.dr_agent.run(
|
| 160 |
+
topic=task_topic,
|
| 161 |
+
task_id=task_id_to_resume,
|
| 162 |
+
save_dir=base_save_dir,
|
| 163 |
+
max_parallel_browsers=max_parallel_agents
|
| 164 |
+
)
|
| 165 |
+
agent_task = asyncio.create_task(agent_run_coro)
|
| 166 |
+
webui_manager.dr_current_task = agent_task
|
| 167 |
+
|
| 168 |
+
# Wait briefly for the agent to start and potentially create the task ID/folder
|
| 169 |
+
await asyncio.sleep(1.0)
|
| 170 |
+
|
| 171 |
+
# Determine the actual task ID being used (agent sets this)
|
| 172 |
+
running_task_id = webui_manager.dr_agent.current_task_id
|
| 173 |
+
if not running_task_id:
|
| 174 |
+
# Agent might not have set it yet, try to get from result later? Risky.
|
| 175 |
+
# Or derive from resume_task_id if provided?
|
| 176 |
+
running_task_id = task_id_to_resume
|
| 177 |
+
if not running_task_id:
|
| 178 |
+
logger.warning("Could not determine running task ID immediately.")
|
| 179 |
+
# We can still monitor, but might miss initial plan if ID needed for path
|
| 180 |
+
else:
|
| 181 |
+
logger.info(f"Assuming task ID based on resume ID: {running_task_id}")
|
| 182 |
+
else:
|
| 183 |
+
logger.info(f"Agent started with Task ID: {running_task_id}")
|
| 184 |
+
|
| 185 |
+
webui_manager.dr_task_id = running_task_id # Store for stop handler
|
| 186 |
+
|
| 187 |
+
# --- 6. Monitor Progress via research_plan.md ---
|
| 188 |
+
if running_task_id:
|
| 189 |
+
task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
|
| 190 |
+
plan_file_path = os.path.join(task_specific_dir, "research_plan.md")
|
| 191 |
+
report_file_path = os.path.join(task_specific_dir, "report.md")
|
| 192 |
+
logger.info(f"Monitoring plan file: {plan_file_path}")
|
| 193 |
+
else:
|
| 194 |
+
logger.warning("Cannot monitor plan file: Task ID unknown.")
|
| 195 |
+
plan_file_path = None
|
| 196 |
+
last_plan_content = None
|
| 197 |
+
while not agent_task.done():
|
| 198 |
+
update_dict = {}
|
| 199 |
+
update_dict[resume_task_id_comp] = gr.update(value=running_task_id)
|
| 200 |
+
agent_stopped = getattr(webui_manager.dr_agent, 'stopped', False)
|
| 201 |
+
if agent_stopped:
|
| 202 |
+
logger.info("Stop signal detected from agent state.")
|
| 203 |
+
break # Exit monitoring loop
|
| 204 |
+
|
| 205 |
+
# Check and update research plan display
|
| 206 |
+
if plan_file_path:
|
| 207 |
+
try:
|
| 208 |
+
current_mtime = os.path.getmtime(plan_file_path) if os.path.exists(plan_file_path) else 0
|
| 209 |
+
if current_mtime > last_plan_mtime:
|
| 210 |
+
logger.info(f"Detected change in {plan_file_path}")
|
| 211 |
+
plan_content = _read_file_safe(plan_file_path)
|
| 212 |
+
if last_plan_content is None or (
|
| 213 |
+
plan_content is not None and plan_content != last_plan_content):
|
| 214 |
+
update_dict[markdown_display_comp] = gr.update(value=plan_content)
|
| 215 |
+
last_plan_content = plan_content
|
| 216 |
+
last_plan_mtime = current_mtime
|
| 217 |
+
elif plan_content is None:
|
| 218 |
+
# File might have been deleted or became unreadable
|
| 219 |
+
last_plan_mtime = 0 # Reset to force re-read attempt later
|
| 220 |
+
except Exception as e:
|
| 221 |
+
logger.warning(f"Error checking/reading plan file {plan_file_path}: {e}")
|
| 222 |
+
# Avoid continuous logging for the same error
|
| 223 |
+
await asyncio.sleep(2.0)
|
| 224 |
+
|
| 225 |
+
# Yield updates if any
|
| 226 |
+
if update_dict:
|
| 227 |
+
yield update_dict
|
| 228 |
+
|
| 229 |
+
await asyncio.sleep(1.0) # Check file changes every second
|
| 230 |
+
|
| 231 |
+
# --- 7. Task Finalization ---
|
| 232 |
+
logger.info("Agent task processing finished. Awaiting final result...")
|
| 233 |
+
final_result_dict = await agent_task # Get result or raise exception
|
| 234 |
+
logger.info(f"Agent run completed. Result keys: {final_result_dict.keys() if final_result_dict else 'None'}")
|
| 235 |
+
|
| 236 |
+
# Try to get task ID from result if not known before
|
| 237 |
+
if not running_task_id and final_result_dict and 'task_id' in final_result_dict:
|
| 238 |
+
running_task_id = final_result_dict['task_id']
|
| 239 |
+
webui_manager.dr_task_id = running_task_id
|
| 240 |
+
task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
|
| 241 |
+
report_file_path = os.path.join(task_specific_dir, "report.md")
|
| 242 |
+
logger.info(f"Task ID confirmed from result: {running_task_id}")
|
| 243 |
+
|
| 244 |
+
final_ui_update = {}
|
| 245 |
+
if report_file_path and os.path.exists(report_file_path):
|
| 246 |
+
logger.info(f"Loading final report from: {report_file_path}")
|
| 247 |
+
report_content = _read_file_safe(report_file_path)
|
| 248 |
+
if report_content:
|
| 249 |
+
final_ui_update[markdown_display_comp] = gr.update(value=report_content)
|
| 250 |
+
final_ui_update[markdown_download_comp] = gr.File(value=report_file_path,
|
| 251 |
+
label=f"Report ({running_task_id}.md)",
|
| 252 |
+
interactive=True)
|
| 253 |
+
else:
|
| 254 |
+
final_ui_update[markdown_display_comp] = gr.update(
|
| 255 |
+
value="# Research Complete\n\n*Error reading final report file.*")
|
| 256 |
+
elif final_result_dict and 'report' in final_result_dict:
|
| 257 |
+
logger.info("Using report content directly from agent result.")
|
| 258 |
+
# If agent directly returns report content
|
| 259 |
+
final_ui_update[markdown_display_comp] = gr.update(value=final_result_dict['report'])
|
| 260 |
+
# Cannot offer download if only content is available
|
| 261 |
+
final_ui_update[markdown_download_comp] = gr.update(value=None, label="Download Research Report",
|
| 262 |
+
interactive=False)
|
| 263 |
+
else:
|
| 264 |
+
logger.warning("Final report file not found and not in result dict.")
|
| 265 |
+
final_ui_update[markdown_display_comp] = gr.update(value="# Research Complete\n\n*Final report not found.*")
|
| 266 |
+
|
| 267 |
+
yield final_ui_update
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
except Exception as e:
|
| 271 |
+
logger.error(f"Error during Deep Research Agent execution: {e}", exc_info=True)
|
| 272 |
+
gr.Error(f"Research failed: {e}")
|
| 273 |
+
yield {markdown_display_comp: gr.update(value=f"# Research Failed\n\n**Error:**\n```\n{e}\n```")}
|
| 274 |
+
|
| 275 |
+
finally:
|
| 276 |
+
# --- 8. Final UI Reset ---
|
| 277 |
+
webui_manager.dr_current_task = None # Clear task reference
|
| 278 |
+
webui_manager.dr_task_id = None # Clear running task ID
|
| 279 |
+
|
| 280 |
+
yield {
|
| 281 |
+
start_button_comp: gr.update(value="▶️ Run", interactive=True),
|
| 282 |
+
stop_button_comp: gr.update(interactive=False),
|
| 283 |
+
research_task_comp: gr.update(interactive=True),
|
| 284 |
+
resume_task_id_comp: gr.update(value="", interactive=True),
|
| 285 |
+
parallel_num_comp: gr.update(interactive=True),
|
| 286 |
+
save_dir_comp: gr.update(interactive=True),
|
| 287 |
+
# Keep download button enabled if file exists
|
| 288 |
+
markdown_download_comp: gr.update() if report_file_path and os.path.exists(report_file_path) else gr.update(
|
| 289 |
+
interactive=False)
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
async def stop_deep_research(webui_manager: WebuiManager) -> Dict[Component, Any]:
|
| 294 |
+
"""Handles the Stop button click."""
|
| 295 |
+
logger.info("Stop button clicked for Deep Research.")
|
| 296 |
+
agent = webui_manager.dr_agent
|
| 297 |
+
task = webui_manager.dr_current_task
|
| 298 |
+
task_id = webui_manager.dr_task_id
|
| 299 |
+
base_save_dir = webui_manager.dr_save_dir
|
| 300 |
+
|
| 301 |
+
stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
|
| 302 |
+
start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
|
| 303 |
+
markdown_display_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_display")
|
| 304 |
+
markdown_download_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_download")
|
| 305 |
+
|
| 306 |
+
final_update = {
|
| 307 |
+
stop_button_comp: gr.update(interactive=False, value="⏹️ Stopping...")
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
if agent and task and not task.done():
|
| 311 |
+
logger.info("Signalling DeepResearchAgent to stop.")
|
| 312 |
+
try:
|
| 313 |
+
# Assuming stop is synchronous or sets a flag quickly
|
| 314 |
+
await agent.stop()
|
| 315 |
+
except Exception as e:
|
| 316 |
+
logger.error(f"Error calling agent.stop(): {e}")
|
| 317 |
+
|
| 318 |
+
# The run_deep_research loop should detect the stop and exit.
|
| 319 |
+
# We yield an intermediate "Stopping..." state. The final reset is done by run_deep_research.
|
| 320 |
+
|
| 321 |
+
# Try to show the final report if available after stopping
|
| 322 |
+
await asyncio.sleep(1.5) # Give agent a moment to write final files potentially
|
| 323 |
+
report_file_path = None
|
| 324 |
+
if task_id and base_save_dir:
|
| 325 |
+
report_file_path = os.path.join(base_save_dir, str(task_id), "report.md")
|
| 326 |
+
|
| 327 |
+
if report_file_path and os.path.exists(report_file_path):
|
| 328 |
+
report_content = _read_file_safe(report_file_path)
|
| 329 |
+
if report_content:
|
| 330 |
+
final_update[markdown_display_comp] = gr.update(
|
| 331 |
+
value=report_content + "\n\n---\n*Research stopped by user.*")
|
| 332 |
+
final_update[markdown_download_comp] = gr.File(value=report_file_path, label=f"Report ({task_id}.md)",
|
| 333 |
+
interactive=True)
|
| 334 |
+
else:
|
| 335 |
+
final_update[markdown_display_comp] = gr.update(
|
| 336 |
+
value="# Research Stopped\n\n*Error reading final report file after stop.*")
|
| 337 |
+
else:
|
| 338 |
+
final_update[markdown_display_comp] = gr.update(value="# Research Stopped by User")
|
| 339 |
+
|
| 340 |
+
# Keep start button disabled, run_deep_research finally block will re-enable it.
|
| 341 |
+
final_update[start_button_comp] = gr.update(interactive=False)
|
| 342 |
+
|
| 343 |
+
else:
|
| 344 |
+
logger.warning("Stop clicked but no active research task found.")
|
| 345 |
+
# Reset UI state just in case
|
| 346 |
+
final_update = {
|
| 347 |
+
start_button_comp: gr.update(interactive=True),
|
| 348 |
+
stop_button_comp: gr.update(interactive=False),
|
| 349 |
+
webui_manager.get_component_by_id("deep_research_agent.research_task"): gr.update(interactive=True),
|
| 350 |
+
webui_manager.get_component_by_id("deep_research_agent.resume_task_id"): gr.update(interactive=True),
|
| 351 |
+
webui_manager.get_component_by_id("deep_research_agent.max_iteration"): gr.update(interactive=True),
|
| 352 |
+
webui_manager.get_component_by_id("deep_research_agent.max_query"): gr.update(interactive=True),
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
return final_update
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
async def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
|
| 359 |
+
"""
|
| 360 |
+
Update the MCP server.
|
| 361 |
+
"""
|
| 362 |
+
if hasattr(webui_manager, "dr_agent") and webui_manager.dr_agent:
|
| 363 |
+
logger.warning("⚠️ Close controller because mcp file has changed!")
|
| 364 |
+
await webui_manager.dr_agent.close_mcp_client()
|
| 365 |
+
|
| 366 |
+
if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
|
| 367 |
+
logger.warning(f"{mcp_file} is not a valid MCP file.")
|
| 368 |
+
return None, gr.update(visible=False)
|
| 369 |
+
|
| 370 |
+
with open(mcp_file, 'r') as f:
|
| 371 |
+
mcp_server = json.load(f)
|
| 372 |
+
|
| 373 |
+
return json.dumps(mcp_server, indent=2), gr.update(visible=True)
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def create_deep_research_agent_tab(webui_manager: WebuiManager):
|
| 377 |
+
"""
|
| 378 |
+
Creates a deep research agent tab
|
| 379 |
+
"""
|
| 380 |
+
input_components = set(webui_manager.get_components())
|
| 381 |
+
tab_components = {}
|
| 382 |
+
|
| 383 |
+
with gr.Group():
|
| 384 |
+
with gr.Row():
|
| 385 |
+
mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
|
| 386 |
+
mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
|
| 387 |
+
|
| 388 |
+
with gr.Group():
|
| 389 |
+
research_task = gr.Textbox(label="Research Task", lines=5,
|
| 390 |
+
value="Give me a detailed travel plan to Switzerland from June 1st to 10th.",
|
| 391 |
+
interactive=True)
|
| 392 |
+
with gr.Row():
|
| 393 |
+
resume_task_id = gr.Textbox(label="Resume Task ID", value="",
|
| 394 |
+
interactive=True)
|
| 395 |
+
parallel_num = gr.Number(label="Parallel Agent Num", value=1,
|
| 396 |
+
precision=0,
|
| 397 |
+
interactive=True)
|
| 398 |
+
max_query = gr.Textbox(label="Research Save Dir", value="./tmp/deep_research",
|
| 399 |
+
interactive=True)
|
| 400 |
+
with gr.Row():
|
| 401 |
+
stop_button = gr.Button("⏹️ Stop", variant="stop", scale=2)
|
| 402 |
+
start_button = gr.Button("▶️ Run", variant="primary", scale=3)
|
| 403 |
+
with gr.Group():
|
| 404 |
+
markdown_display = gr.Markdown(label="Research Report")
|
| 405 |
+
markdown_download = gr.File(label="Download Research Report", interactive=False)
|
| 406 |
+
tab_components.update(
|
| 407 |
+
dict(
|
| 408 |
+
research_task=research_task,
|
| 409 |
+
parallel_num=parallel_num,
|
| 410 |
+
max_query=max_query,
|
| 411 |
+
start_button=start_button,
|
| 412 |
+
stop_button=stop_button,
|
| 413 |
+
markdown_display=markdown_display,
|
| 414 |
+
markdown_download=markdown_download,
|
| 415 |
+
resume_task_id=resume_task_id,
|
| 416 |
+
mcp_json_file=mcp_json_file,
|
| 417 |
+
mcp_server_config=mcp_server_config,
|
| 418 |
+
)
|
| 419 |
+
)
|
| 420 |
+
webui_manager.add_components("deep_research_agent", tab_components)
|
| 421 |
+
webui_manager.init_deep_research_agent()
|
| 422 |
+
|
| 423 |
+
async def update_wrapper(mcp_file):
|
| 424 |
+
"""Wrapper for handle_pause_resume."""
|
| 425 |
+
update_dict = await update_mcp_server(mcp_file, webui_manager)
|
| 426 |
+
yield update_dict
|
| 427 |
+
|
| 428 |
+
mcp_json_file.change(
|
| 429 |
+
update_wrapper,
|
| 430 |
+
inputs=[mcp_json_file],
|
| 431 |
+
outputs=[mcp_server_config, mcp_server_config]
|
| 432 |
+
)
|
| 433 |
+
|
| 434 |
+
dr_tab_outputs = list(tab_components.values())
|
| 435 |
+
all_managed_inputs = set(webui_manager.get_components())
|
| 436 |
+
|
| 437 |
+
# --- Define Event Handler Wrappers ---
|
| 438 |
+
async def start_wrapper(comps: Dict[Component, Any]) -> AsyncGenerator[Dict[Component, Any], None]:
|
| 439 |
+
async for update in run_deep_research(webui_manager, comps):
|
| 440 |
+
yield update
|
| 441 |
+
|
| 442 |
+
async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
| 443 |
+
update_dict = await stop_deep_research(webui_manager)
|
| 444 |
+
yield update_dict
|
| 445 |
+
|
| 446 |
+
# --- Connect Handlers ---
|
| 447 |
+
start_button.click(
|
| 448 |
+
fn=start_wrapper,
|
| 449 |
+
inputs=all_managed_inputs,
|
| 450 |
+
outputs=dr_tab_outputs
|
| 451 |
+
)
|
| 452 |
+
|
| 453 |
+
stop_button.click(
|
| 454 |
+
fn=stop_wrapper,
|
| 455 |
+
inputs=None,
|
| 456 |
+
outputs=dr_tab_outputs
|
| 457 |
+
)
|
src/webui/components/load_save_config_tab.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from gradio.components import Component
|
| 3 |
+
|
| 4 |
+
from src.webui.webui_manager import WebuiManager
|
| 5 |
+
from src.utils import config
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def create_load_save_config_tab(webui_manager: WebuiManager):
|
| 9 |
+
"""
|
| 10 |
+
Creates a load and save config tab.
|
| 11 |
+
"""
|
| 12 |
+
input_components = set(webui_manager.get_components())
|
| 13 |
+
tab_components = {}
|
| 14 |
+
|
| 15 |
+
config_file = gr.File(
|
| 16 |
+
label="Load UI Settings from json",
|
| 17 |
+
file_types=[".json"],
|
| 18 |
+
interactive=True
|
| 19 |
+
)
|
| 20 |
+
with gr.Row():
|
| 21 |
+
load_config_button = gr.Button("Load Config", variant="primary")
|
| 22 |
+
save_config_button = gr.Button("Save UI Settings", variant="primary")
|
| 23 |
+
|
| 24 |
+
config_status = gr.Textbox(
|
| 25 |
+
label="Status",
|
| 26 |
+
lines=2,
|
| 27 |
+
interactive=False
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
tab_components.update(dict(
|
| 31 |
+
load_config_button=load_config_button,
|
| 32 |
+
save_config_button=save_config_button,
|
| 33 |
+
config_status=config_status,
|
| 34 |
+
config_file=config_file,
|
| 35 |
+
))
|
| 36 |
+
|
| 37 |
+
webui_manager.add_components("load_save_config", tab_components)
|
| 38 |
+
|
| 39 |
+
save_config_button.click(
|
| 40 |
+
fn=webui_manager.save_config,
|
| 41 |
+
inputs=set(webui_manager.get_components()),
|
| 42 |
+
outputs=[config_status]
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
load_config_button.click(
|
| 46 |
+
fn=webui_manager.load_config,
|
| 47 |
+
inputs=[config_file],
|
| 48 |
+
outputs=webui_manager.get_components(),
|
| 49 |
+
)
|
| 50 |
+
|
src/webui/interface.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
from src.webui.webui_manager import WebuiManager
|
| 4 |
+
from src.webui.components.agent_settings_tab import create_agent_settings_tab
|
| 5 |
+
from src.webui.components.browser_settings_tab import create_browser_settings_tab
|
| 6 |
+
from src.webui.components.browser_use_agent_tab import create_browser_use_agent_tab
|
| 7 |
+
from src.webui.components.deep_research_agent_tab import create_deep_research_agent_tab
|
| 8 |
+
from src.webui.components.load_save_config_tab import create_load_save_config_tab
|
| 9 |
+
|
| 10 |
+
theme_map = {
|
| 11 |
+
"Default": gr.themes.Default(),
|
| 12 |
+
"Soft": gr.themes.Soft(),
|
| 13 |
+
"Monochrome": gr.themes.Monochrome(),
|
| 14 |
+
"Glass": gr.themes.Glass(),
|
| 15 |
+
"Origin": gr.themes.Origin(),
|
| 16 |
+
"Citrus": gr.themes.Citrus(),
|
| 17 |
+
"Ocean": gr.themes.Ocean(),
|
| 18 |
+
"Base": gr.themes.Base()
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def create_ui(theme_name="Ocean"):
|
| 23 |
+
css = """
|
| 24 |
+
.gradio-container {
|
| 25 |
+
width: 70vw !important;
|
| 26 |
+
max-width: 70% !important;
|
| 27 |
+
margin-left: auto !important;
|
| 28 |
+
margin-right: auto !important;
|
| 29 |
+
padding-top: 10px !important;
|
| 30 |
+
}
|
| 31 |
+
.header-text {
|
| 32 |
+
text-align: center;
|
| 33 |
+
margin-bottom: 20px;
|
| 34 |
+
}
|
| 35 |
+
.tab-header-text {
|
| 36 |
+
text-align: center;
|
| 37 |
+
}
|
| 38 |
+
.theme-section {
|
| 39 |
+
margin-bottom: 10px;
|
| 40 |
+
padding: 15px;
|
| 41 |
+
border-radius: 10px;
|
| 42 |
+
}
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
# dark mode in default
|
| 46 |
+
js_func = """
|
| 47 |
+
function refresh() {
|
| 48 |
+
const url = new URL(window.location);
|
| 49 |
+
|
| 50 |
+
if (url.searchParams.get('__theme') !== 'dark') {
|
| 51 |
+
url.searchParams.set('__theme', 'dark');
|
| 52 |
+
window.location.href = url.href;
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
ui_manager = WebuiManager()
|
| 58 |
+
|
| 59 |
+
with gr.Blocks(
|
| 60 |
+
title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js_func,
|
| 61 |
+
) as demo:
|
| 62 |
+
with gr.Row():
|
| 63 |
+
gr.Markdown(
|
| 64 |
+
"""
|
| 65 |
+
# 🌐 Browser Use WebUI
|
| 66 |
+
### Control your browser with AI assistance
|
| 67 |
+
""",
|
| 68 |
+
elem_classes=["header-text"],
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
with gr.Tabs() as tabs:
|
| 72 |
+
with gr.TabItem("⚙️ Agent Settings"):
|
| 73 |
+
create_agent_settings_tab(ui_manager)
|
| 74 |
+
|
| 75 |
+
with gr.TabItem("🌐 Browser Settings"):
|
| 76 |
+
create_browser_settings_tab(ui_manager)
|
| 77 |
+
|
| 78 |
+
with gr.TabItem("🤖 Run Agent"):
|
| 79 |
+
create_browser_use_agent_tab(ui_manager)
|
| 80 |
+
|
| 81 |
+
with gr.TabItem("🎁 Agent Marketplace"):
|
| 82 |
+
gr.Markdown(
|
| 83 |
+
"""
|
| 84 |
+
### Agents built on Browser-Use
|
| 85 |
+
""",
|
| 86 |
+
elem_classes=["tab-header-text"],
|
| 87 |
+
)
|
| 88 |
+
with gr.Tabs():
|
| 89 |
+
with gr.TabItem("Deep Research"):
|
| 90 |
+
create_deep_research_agent_tab(ui_manager)
|
| 91 |
+
|
| 92 |
+
with gr.TabItem("📁 Load & Save Config"):
|
| 93 |
+
create_load_save_config_tab(ui_manager)
|
| 94 |
+
|
| 95 |
+
return demo
|
src/webui/webui_manager.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from collections.abc import Generator
|
| 3 |
+
from typing import TYPE_CHECKING
|
| 4 |
+
import os
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import Optional, Dict, List
|
| 8 |
+
import uuid
|
| 9 |
+
import asyncio
|
| 10 |
+
import time
|
| 11 |
+
|
| 12 |
+
from gradio.components import Component
|
| 13 |
+
from browser_use.browser.browser import Browser
|
| 14 |
+
from browser_use.browser.context import BrowserContext
|
| 15 |
+
from browser_use.agent.service import Agent
|
| 16 |
+
from src.browser.custom_browser import CustomBrowser
|
| 17 |
+
from src.browser.custom_context import CustomBrowserContext
|
| 18 |
+
from src.controller.custom_controller import CustomController
|
| 19 |
+
from src.agent.deep_research.deep_research_agent import DeepResearchAgent
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class WebuiManager:
|
| 23 |
+
def __init__(self, settings_save_dir: str = "./tmp/webui_settings"):
|
| 24 |
+
self.id_to_component: dict[str, Component] = {}
|
| 25 |
+
self.component_to_id: dict[Component, str] = {}
|
| 26 |
+
|
| 27 |
+
self.settings_save_dir = settings_save_dir
|
| 28 |
+
os.makedirs(self.settings_save_dir, exist_ok=True)
|
| 29 |
+
|
| 30 |
+
def init_browser_use_agent(self) -> None:
|
| 31 |
+
"""
|
| 32 |
+
init browser use agent
|
| 33 |
+
"""
|
| 34 |
+
self.bu_agent: Optional[Agent] = None
|
| 35 |
+
self.bu_browser: Optional[CustomBrowser] = None
|
| 36 |
+
self.bu_browser_context: Optional[CustomBrowserContext] = None
|
| 37 |
+
self.bu_controller: Optional[CustomController] = None
|
| 38 |
+
self.bu_chat_history: List[Dict[str, Optional[str]]] = []
|
| 39 |
+
self.bu_response_event: Optional[asyncio.Event] = None
|
| 40 |
+
self.bu_user_help_response: Optional[str] = None
|
| 41 |
+
self.bu_current_task: Optional[asyncio.Task] = None
|
| 42 |
+
self.bu_agent_task_id: Optional[str] = None
|
| 43 |
+
|
| 44 |
+
def init_deep_research_agent(self) -> None:
|
| 45 |
+
"""
|
| 46 |
+
init deep research agent
|
| 47 |
+
"""
|
| 48 |
+
self.dr_agent: Optional[DeepResearchAgent] = None
|
| 49 |
+
self.dr_current_task = None
|
| 50 |
+
self.dr_agent_task_id: Optional[str] = None
|
| 51 |
+
self.dr_save_dir: Optional[str] = None
|
| 52 |
+
|
| 53 |
+
def add_components(self, tab_name: str, components_dict: dict[str, "Component"]) -> None:
|
| 54 |
+
"""
|
| 55 |
+
Add tab components
|
| 56 |
+
"""
|
| 57 |
+
for comp_name, component in components_dict.items():
|
| 58 |
+
comp_id = f"{tab_name}.{comp_name}"
|
| 59 |
+
self.id_to_component[comp_id] = component
|
| 60 |
+
self.component_to_id[component] = comp_id
|
| 61 |
+
|
| 62 |
+
def get_components(self) -> list["Component"]:
|
| 63 |
+
"""
|
| 64 |
+
Get all components
|
| 65 |
+
"""
|
| 66 |
+
return list(self.id_to_component.values())
|
| 67 |
+
|
| 68 |
+
def get_component_by_id(self, comp_id: str) -> "Component":
|
| 69 |
+
"""
|
| 70 |
+
Get component by id
|
| 71 |
+
"""
|
| 72 |
+
return self.id_to_component[comp_id]
|
| 73 |
+
|
| 74 |
+
def get_id_by_component(self, comp: "Component") -> str:
|
| 75 |
+
"""
|
| 76 |
+
Get id by component
|
| 77 |
+
"""
|
| 78 |
+
return self.component_to_id[comp]
|
| 79 |
+
|
| 80 |
+
def save_config(self, components: Dict["Component", str]) -> None:
|
| 81 |
+
"""
|
| 82 |
+
Save config
|
| 83 |
+
"""
|
| 84 |
+
cur_settings = {}
|
| 85 |
+
for comp in components:
|
| 86 |
+
if not isinstance(comp, gr.Button) and not isinstance(comp, gr.File) and str(
|
| 87 |
+
getattr(comp, "interactive", True)).lower() != "false":
|
| 88 |
+
comp_id = self.get_id_by_component(comp)
|
| 89 |
+
cur_settings[comp_id] = components[comp]
|
| 90 |
+
|
| 91 |
+
config_name = datetime.now().strftime("%Y%m%d-%H%M%S")
|
| 92 |
+
with open(os.path.join(self.settings_save_dir, f"{config_name}.json"), "w") as fw:
|
| 93 |
+
json.dump(cur_settings, fw, indent=4)
|
| 94 |
+
|
| 95 |
+
return os.path.join(self.settings_save_dir, f"{config_name}.json")
|
| 96 |
+
|
| 97 |
+
def load_config(self, config_path: str):
|
| 98 |
+
"""
|
| 99 |
+
Load config
|
| 100 |
+
"""
|
| 101 |
+
with open(config_path, "r") as fr:
|
| 102 |
+
ui_settings = json.load(fr)
|
| 103 |
+
|
| 104 |
+
update_components = {}
|
| 105 |
+
for comp_id, comp_val in ui_settings.items():
|
| 106 |
+
if comp_id in self.id_to_component:
|
| 107 |
+
comp = self.id_to_component[comp_id]
|
| 108 |
+
if comp.__class__.__name__ == "Chatbot":
|
| 109 |
+
update_components[comp] = comp.__class__(value=comp_val, type="messages")
|
| 110 |
+
else:
|
| 111 |
+
update_components[comp] = comp.__class__(value=comp_val)
|
| 112 |
+
if comp_id == "agent_settings.planner_llm_provider":
|
| 113 |
+
yield update_components # yield provider, let callback run
|
| 114 |
+
time.sleep(0.1) # wait for Gradio UI callback
|
| 115 |
+
|
| 116 |
+
config_status = self.id_to_component["load_save_config.config_status"]
|
| 117 |
+
update_components.update(
|
| 118 |
+
{
|
| 119 |
+
config_status: config_status.__class__(value=f"Successfully loaded config: {config_path}")
|
| 120 |
+
}
|
| 121 |
+
)
|
| 122 |
+
yield update_components
|
supervisord.conf
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[supervisord]
|
| 2 |
+
user=root
|
| 3 |
+
nodaemon=true
|
| 4 |
+
logfile=/dev/stdout
|
| 5 |
+
logfile_maxbytes=0
|
| 6 |
+
loglevel=error
|
| 7 |
+
|
| 8 |
+
[program:xvfb]
|
| 9 |
+
command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
|
| 10 |
+
autorestart=true
|
| 11 |
+
stdout_logfile=/dev/stdout
|
| 12 |
+
stdout_logfile_maxbytes=0
|
| 13 |
+
stderr_logfile=/dev/stderr
|
| 14 |
+
stderr_logfile_maxbytes=0
|
| 15 |
+
priority=100
|
| 16 |
+
startsecs=3
|
| 17 |
+
stopsignal=TERM
|
| 18 |
+
stopwaitsecs=10
|
| 19 |
+
|
| 20 |
+
[program:vnc_setup]
|
| 21 |
+
command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
|
| 22 |
+
autorestart=false
|
| 23 |
+
startsecs=0
|
| 24 |
+
priority=150
|
| 25 |
+
stdout_logfile=/dev/stdout
|
| 26 |
+
stdout_logfile_maxbytes=0
|
| 27 |
+
stderr_logfile=/dev/stderr
|
| 28 |
+
stderr_logfile_maxbytes=0
|
| 29 |
+
|
| 30 |
+
[program:x11vnc]
|
| 31 |
+
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
|
| 32 |
+
autorestart=true
|
| 33 |
+
stdout_logfile=/dev/stdout
|
| 34 |
+
stdout_logfile_maxbytes=0
|
| 35 |
+
stderr_logfile=/dev/stderr
|
| 36 |
+
stderr_logfile_maxbytes=0
|
| 37 |
+
priority=200
|
| 38 |
+
startretries=10
|
| 39 |
+
startsecs=10
|
| 40 |
+
stopsignal=TERM
|
| 41 |
+
stopwaitsecs=10
|
| 42 |
+
depends_on=vnc_setup,xvfb
|
| 43 |
+
|
| 44 |
+
[program:x11vnc_log]
|
| 45 |
+
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
|
| 46 |
+
autorestart=true
|
| 47 |
+
stdout_logfile=/dev/stdout
|
| 48 |
+
stdout_logfile_maxbytes=0
|
| 49 |
+
stderr_logfile=/dev/stderr
|
| 50 |
+
stderr_logfile_maxbytes=0
|
| 51 |
+
priority=250
|
| 52 |
+
stopsignal=TERM
|
| 53 |
+
stopwaitsecs=5
|
| 54 |
+
depends_on=x11vnc
|
| 55 |
+
|
| 56 |
+
[program:novnc]
|
| 57 |
+
command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc"
|
| 58 |
+
autorestart=true
|
| 59 |
+
stdout_logfile=/dev/stdout
|
| 60 |
+
stdout_logfile_maxbytes=0
|
| 61 |
+
stderr_logfile=/dev/stderr
|
| 62 |
+
stderr_logfile_maxbytes=0
|
| 63 |
+
priority=300
|
| 64 |
+
startretries=5
|
| 65 |
+
startsecs=3
|
| 66 |
+
depends_on=x11vnc
|
| 67 |
+
|
| 68 |
+
[program:webui]
|
| 69 |
+
command=python webui.py --ip 0.0.0.0 --port 7788
|
| 70 |
+
directory=/app
|
| 71 |
+
autorestart=true
|
| 72 |
+
stdout_logfile=/dev/stdout
|
| 73 |
+
stdout_logfile_maxbytes=0
|
| 74 |
+
stderr_logfile=/dev/stderr
|
| 75 |
+
stderr_logfile_maxbytes=0
|
| 76 |
+
priority=400
|
| 77 |
+
startretries=3
|
| 78 |
+
startsecs=3
|
| 79 |
+
stopsignal=TERM
|
| 80 |
+
stopwaitsecs=10
|
tests/test_agents.py
ADDED
|
@@ -0,0 +1,400 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pdb
|
| 2 |
+
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
sys.path.append(".")
|
| 9 |
+
import asyncio
|
| 10 |
+
import os
|
| 11 |
+
import sys
|
| 12 |
+
from pprint import pprint
|
| 13 |
+
|
| 14 |
+
from browser_use import Agent
|
| 15 |
+
from browser_use.agent.views import AgentHistoryList
|
| 16 |
+
|
| 17 |
+
from src.utils import utils
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
async def test_browser_use_agent():
|
| 21 |
+
from browser_use.browser.browser import Browser, BrowserConfig
|
| 22 |
+
from browser_use.browser.context import (
|
| 23 |
+
BrowserContextConfig
|
| 24 |
+
)
|
| 25 |
+
from browser_use.agent.service import Agent
|
| 26 |
+
|
| 27 |
+
from src.browser.custom_browser import CustomBrowser
|
| 28 |
+
from src.controller.custom_controller import CustomController
|
| 29 |
+
from src.utils import llm_provider
|
| 30 |
+
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
| 31 |
+
|
| 32 |
+
llm = llm_provider.get_llm_model(
|
| 33 |
+
provider="openai",
|
| 34 |
+
model_name="gpt-4o",
|
| 35 |
+
temperature=0.8,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# llm = llm_provider.get_llm_model(
|
| 39 |
+
# provider="google",
|
| 40 |
+
# model_name="gemini-2.0-flash",
|
| 41 |
+
# temperature=0.6,
|
| 42 |
+
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
| 43 |
+
# )
|
| 44 |
+
|
| 45 |
+
# llm = utils.get_llm_model(
|
| 46 |
+
# provider="deepseek",
|
| 47 |
+
# model_name="deepseek-reasoner",
|
| 48 |
+
# temperature=0.8
|
| 49 |
+
# )
|
| 50 |
+
|
| 51 |
+
# llm = utils.get_llm_model(
|
| 52 |
+
# provider="deepseek",
|
| 53 |
+
# model_name="deepseek-chat",
|
| 54 |
+
# temperature=0.8
|
| 55 |
+
# )
|
| 56 |
+
|
| 57 |
+
# llm = utils.get_llm_model(
|
| 58 |
+
# provider="ollama", model_name="qwen2.5:7b", temperature=0.5
|
| 59 |
+
# )
|
| 60 |
+
|
| 61 |
+
# llm = utils.get_llm_model(
|
| 62 |
+
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
| 63 |
+
# )
|
| 64 |
+
|
| 65 |
+
window_w, window_h = 1280, 1100
|
| 66 |
+
|
| 67 |
+
# llm = llm_provider.get_llm_model(
|
| 68 |
+
# provider="azure_openai",
|
| 69 |
+
# model_name="gpt-4o",
|
| 70 |
+
# temperature=0.5,
|
| 71 |
+
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
| 72 |
+
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
| 73 |
+
# )
|
| 74 |
+
|
| 75 |
+
mcp_server_config = {
|
| 76 |
+
"mcpServers": {
|
| 77 |
+
# "markitdown": {
|
| 78 |
+
# "command": "docker",
|
| 79 |
+
# "args": [
|
| 80 |
+
# "run",
|
| 81 |
+
# "--rm",
|
| 82 |
+
# "-i",
|
| 83 |
+
# "markitdown-mcp:latest"
|
| 84 |
+
# ]
|
| 85 |
+
# },
|
| 86 |
+
"desktop-commander": {
|
| 87 |
+
"command": "npx",
|
| 88 |
+
"args": [
|
| 89 |
+
"-y",
|
| 90 |
+
"@wonderwhy-er/desktop-commander"
|
| 91 |
+
]
|
| 92 |
+
},
|
| 93 |
+
}
|
| 94 |
+
}
|
| 95 |
+
controller = CustomController()
|
| 96 |
+
await controller.setup_mcp_client(mcp_server_config)
|
| 97 |
+
use_own_browser = True
|
| 98 |
+
use_vision = True # Set to False when using DeepSeek
|
| 99 |
+
|
| 100 |
+
max_actions_per_step = 10
|
| 101 |
+
browser = None
|
| 102 |
+
browser_context = None
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
extra_browser_args = []
|
| 106 |
+
if use_own_browser:
|
| 107 |
+
browser_binary_path = os.getenv("BROWSER_PATH", None)
|
| 108 |
+
if browser_binary_path == "":
|
| 109 |
+
browser_binary_path = None
|
| 110 |
+
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
|
| 111 |
+
if browser_user_data:
|
| 112 |
+
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
|
| 113 |
+
else:
|
| 114 |
+
browser_binary_path = None
|
| 115 |
+
browser = CustomBrowser(
|
| 116 |
+
config=BrowserConfig(
|
| 117 |
+
headless=False,
|
| 118 |
+
browser_binary_path=browser_binary_path,
|
| 119 |
+
extra_browser_args=extra_browser_args,
|
| 120 |
+
new_context_config=BrowserContextConfig(
|
| 121 |
+
window_width=window_w,
|
| 122 |
+
window_height=window_h,
|
| 123 |
+
)
|
| 124 |
+
)
|
| 125 |
+
)
|
| 126 |
+
browser_context = await browser.new_context(
|
| 127 |
+
config=BrowserContextConfig(
|
| 128 |
+
trace_path=None,
|
| 129 |
+
save_recording_path=None,
|
| 130 |
+
save_downloads_path="./tmp/downloads",
|
| 131 |
+
window_height=window_h,
|
| 132 |
+
window_width=window_w,
|
| 133 |
+
)
|
| 134 |
+
)
|
| 135 |
+
agent = BrowserUseAgent(
|
| 136 |
+
# task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'",
|
| 137 |
+
task="give me nvidia stock price",
|
| 138 |
+
llm=llm,
|
| 139 |
+
browser=browser,
|
| 140 |
+
browser_context=browser_context,
|
| 141 |
+
controller=controller,
|
| 142 |
+
use_vision=use_vision,
|
| 143 |
+
max_actions_per_step=max_actions_per_step,
|
| 144 |
+
generate_gif=True
|
| 145 |
+
)
|
| 146 |
+
history: AgentHistoryList = await agent.run(max_steps=100)
|
| 147 |
+
|
| 148 |
+
print("Final Result:")
|
| 149 |
+
pprint(history.final_result(), indent=4)
|
| 150 |
+
|
| 151 |
+
print("\nErrors:")
|
| 152 |
+
pprint(history.errors(), indent=4)
|
| 153 |
+
|
| 154 |
+
except Exception:
|
| 155 |
+
import traceback
|
| 156 |
+
traceback.print_exc()
|
| 157 |
+
finally:
|
| 158 |
+
if browser_context:
|
| 159 |
+
await browser_context.close()
|
| 160 |
+
if browser:
|
| 161 |
+
await browser.close()
|
| 162 |
+
if controller:
|
| 163 |
+
await controller.close_mcp_client()
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
async def test_browser_use_parallel():
|
| 167 |
+
from browser_use.browser.browser import Browser, BrowserConfig
|
| 168 |
+
from browser_use.browser.context import (
|
| 169 |
+
BrowserContextConfig,
|
| 170 |
+
)
|
| 171 |
+
from browser_use.agent.service import Agent
|
| 172 |
+
|
| 173 |
+
from src.browser.custom_browser import CustomBrowser
|
| 174 |
+
from src.controller.custom_controller import CustomController
|
| 175 |
+
from src.utils import llm_provider
|
| 176 |
+
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
| 177 |
+
|
| 178 |
+
# llm = utils.get_llm_model(
|
| 179 |
+
# provider="openai",
|
| 180 |
+
# model_name="gpt-4o",
|
| 181 |
+
# temperature=0.8,
|
| 182 |
+
# base_url=os.getenv("OPENAI_ENDPOINT", ""),
|
| 183 |
+
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
| 184 |
+
# )
|
| 185 |
+
|
| 186 |
+
# llm = utils.get_llm_model(
|
| 187 |
+
# provider="google",
|
| 188 |
+
# model_name="gemini-2.0-flash",
|
| 189 |
+
# temperature=0.6,
|
| 190 |
+
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
| 191 |
+
# )
|
| 192 |
+
|
| 193 |
+
# llm = utils.get_llm_model(
|
| 194 |
+
# provider="deepseek",
|
| 195 |
+
# model_name="deepseek-reasoner",
|
| 196 |
+
# temperature=0.8
|
| 197 |
+
# )
|
| 198 |
+
|
| 199 |
+
# llm = utils.get_llm_model(
|
| 200 |
+
# provider="deepseek",
|
| 201 |
+
# model_name="deepseek-chat",
|
| 202 |
+
# temperature=0.8
|
| 203 |
+
# )
|
| 204 |
+
|
| 205 |
+
# llm = utils.get_llm_model(
|
| 206 |
+
# provider="ollama", model_name="qwen2.5:7b", temperature=0.5
|
| 207 |
+
# )
|
| 208 |
+
|
| 209 |
+
# llm = utils.get_llm_model(
|
| 210 |
+
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
| 211 |
+
# )
|
| 212 |
+
|
| 213 |
+
window_w, window_h = 1280, 1100
|
| 214 |
+
|
| 215 |
+
llm = llm_provider.get_llm_model(
|
| 216 |
+
provider="azure_openai",
|
| 217 |
+
model_name="gpt-4o",
|
| 218 |
+
temperature=0.5,
|
| 219 |
+
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
| 220 |
+
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
mcp_server_config = {
|
| 224 |
+
"mcpServers": {
|
| 225 |
+
# "markitdown": {
|
| 226 |
+
# "command": "docker",
|
| 227 |
+
# "args": [
|
| 228 |
+
# "run",
|
| 229 |
+
# "--rm",
|
| 230 |
+
# "-i",
|
| 231 |
+
# "markitdown-mcp:latest"
|
| 232 |
+
# ]
|
| 233 |
+
# },
|
| 234 |
+
"desktop-commander": {
|
| 235 |
+
"command": "npx",
|
| 236 |
+
"args": [
|
| 237 |
+
"-y",
|
| 238 |
+
"@wonderwhy-er/desktop-commander"
|
| 239 |
+
]
|
| 240 |
+
},
|
| 241 |
+
# "filesystem": {
|
| 242 |
+
# "command": "npx",
|
| 243 |
+
# "args": [
|
| 244 |
+
# "-y",
|
| 245 |
+
# "@modelcontextprotocol/server-filesystem",
|
| 246 |
+
# "/Users/xxx/ai_workspace",
|
| 247 |
+
# ]
|
| 248 |
+
# },
|
| 249 |
+
}
|
| 250 |
+
}
|
| 251 |
+
controller = CustomController()
|
| 252 |
+
await controller.setup_mcp_client(mcp_server_config)
|
| 253 |
+
use_own_browser = True
|
| 254 |
+
use_vision = True # Set to False when using DeepSeek
|
| 255 |
+
|
| 256 |
+
max_actions_per_step = 10
|
| 257 |
+
browser = None
|
| 258 |
+
browser_context = None
|
| 259 |
+
|
| 260 |
+
try:
|
| 261 |
+
extra_browser_args = []
|
| 262 |
+
if use_own_browser:
|
| 263 |
+
browser_binary_path = os.getenv("BROWSER_PATH", None)
|
| 264 |
+
if browser_binary_path == "":
|
| 265 |
+
browser_binary_path = None
|
| 266 |
+
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
|
| 267 |
+
if browser_user_data:
|
| 268 |
+
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
|
| 269 |
+
else:
|
| 270 |
+
browser_binary_path = None
|
| 271 |
+
browser = CustomBrowser(
|
| 272 |
+
config=BrowserConfig(
|
| 273 |
+
headless=False,
|
| 274 |
+
browser_binary_path=browser_binary_path,
|
| 275 |
+
extra_browser_args=extra_browser_args,
|
| 276 |
+
new_context_config=BrowserContextConfig(
|
| 277 |
+
window_width=window_w,
|
| 278 |
+
window_height=window_h,
|
| 279 |
+
)
|
| 280 |
+
)
|
| 281 |
+
)
|
| 282 |
+
browser_context = await browser.new_context(
|
| 283 |
+
config=BrowserContextConfig(
|
| 284 |
+
trace_path=None,
|
| 285 |
+
save_recording_path=None,
|
| 286 |
+
save_downloads_path="./tmp/downloads",
|
| 287 |
+
window_height=window_h,
|
| 288 |
+
window_width=window_w,
|
| 289 |
+
force_new_context=True
|
| 290 |
+
)
|
| 291 |
+
)
|
| 292 |
+
agents = [
|
| 293 |
+
BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller)
|
| 294 |
+
for task in [
|
| 295 |
+
'Search Google for weather in Tokyo',
|
| 296 |
+
# 'Check Reddit front page title',
|
| 297 |
+
# 'Find NASA image of the day',
|
| 298 |
+
# 'Check top story on CNN',
|
| 299 |
+
# 'Search latest SpaceX launch date',
|
| 300 |
+
# 'Look up population of Paris',
|
| 301 |
+
'Find current time in Sydney',
|
| 302 |
+
'Check who won last Super Bowl',
|
| 303 |
+
# 'Search trending topics on Twitter',
|
| 304 |
+
]
|
| 305 |
+
]
|
| 306 |
+
|
| 307 |
+
history = await asyncio.gather(*[agent.run() for agent in agents])
|
| 308 |
+
print("Final Result:")
|
| 309 |
+
pprint(history.final_result(), indent=4)
|
| 310 |
+
|
| 311 |
+
print("\nErrors:")
|
| 312 |
+
pprint(history.errors(), indent=4)
|
| 313 |
+
|
| 314 |
+
pdb.set_trace()
|
| 315 |
+
|
| 316 |
+
except Exception:
|
| 317 |
+
import traceback
|
| 318 |
+
|
| 319 |
+
traceback.print_exc()
|
| 320 |
+
finally:
|
| 321 |
+
if browser_context:
|
| 322 |
+
await browser_context.close()
|
| 323 |
+
if browser:
|
| 324 |
+
await browser.close()
|
| 325 |
+
if controller:
|
| 326 |
+
await controller.close_mcp_client()
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
async def test_deep_research_agent():
|
| 330 |
+
from src.agent.deep_research.deep_research_agent import DeepResearchAgent, PLAN_FILENAME, REPORT_FILENAME
|
| 331 |
+
from src.utils import llm_provider
|
| 332 |
+
|
| 333 |
+
llm = llm_provider.get_llm_model(
|
| 334 |
+
provider="openai",
|
| 335 |
+
model_name="gpt-4o",
|
| 336 |
+
temperature=0.5
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
# llm = llm_provider.get_llm_model(
|
| 340 |
+
# provider="bedrock",
|
| 341 |
+
# )
|
| 342 |
+
|
| 343 |
+
mcp_server_config = {
|
| 344 |
+
"mcpServers": {
|
| 345 |
+
"desktop-commander": {
|
| 346 |
+
"command": "npx",
|
| 347 |
+
"args": [
|
| 348 |
+
"-y",
|
| 349 |
+
"@wonderwhy-er/desktop-commander"
|
| 350 |
+
]
|
| 351 |
+
},
|
| 352 |
+
}
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
|
| 356 |
+
agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
|
| 357 |
+
research_topic = "Give me investment advices of nvidia and tesla."
|
| 358 |
+
task_id_to_resume = "" # Set this to resume a previous task ID
|
| 359 |
+
|
| 360 |
+
print(f"Starting research on: {research_topic}")
|
| 361 |
+
|
| 362 |
+
try:
|
| 363 |
+
# Call run and wait for the final result dictionary
|
| 364 |
+
result = await agent.run(research_topic,
|
| 365 |
+
task_id=task_id_to_resume,
|
| 366 |
+
save_dir="./tmp/deep_research",
|
| 367 |
+
max_parallel_browsers=1,
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
print("\n--- Research Process Ended ---")
|
| 371 |
+
print(f"Status: {result.get('status')}")
|
| 372 |
+
print(f"Message: {result.get('message')}")
|
| 373 |
+
print(f"Task ID: {result.get('task_id')}")
|
| 374 |
+
|
| 375 |
+
# Check the final state for the report
|
| 376 |
+
final_state = result.get('final_state', {})
|
| 377 |
+
if final_state:
|
| 378 |
+
print("\n--- Final State Summary ---")
|
| 379 |
+
print(
|
| 380 |
+
f" Plan Steps Completed: {sum(1 for item in final_state.get('research_plan', []) if item.get('status') == 'completed')}")
|
| 381 |
+
print(f" Total Search Results Logged: {len(final_state.get('search_results', []))}")
|
| 382 |
+
if final_state.get("final_report"):
|
| 383 |
+
print(" Final Report: Generated (content omitted). You can find it in the output directory.")
|
| 384 |
+
# print("\n--- Final Report ---") # Optionally print report
|
| 385 |
+
# print(final_state["final_report"])
|
| 386 |
+
else:
|
| 387 |
+
print(" Final Report: Not generated.")
|
| 388 |
+
else:
|
| 389 |
+
print("Final state information not available.")
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
except Exception as e:
|
| 393 |
+
print(f"\n--- An unhandled error occurred outside the agent run ---")
|
| 394 |
+
print(e)
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
if __name__ == "__main__":
|
| 398 |
+
asyncio.run(test_browser_use_agent())
|
| 399 |
+
# asyncio.run(test_browser_use_parallel())
|
| 400 |
+
# asyncio.run(test_deep_research_agent())
|
tests/test_controller.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import pdb
|
| 3 |
+
import sys
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
sys.path.append(".")
|
| 7 |
+
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
async def test_mcp_client():
|
| 14 |
+
from src.utils.mcp_client import setup_mcp_client_and_tools, create_tool_param_model
|
| 15 |
+
|
| 16 |
+
test_server_config = {
|
| 17 |
+
"mcpServers": {
|
| 18 |
+
# "markitdown": {
|
| 19 |
+
# "command": "docker",
|
| 20 |
+
# "args": [
|
| 21 |
+
# "run",
|
| 22 |
+
# "--rm",
|
| 23 |
+
# "-i",
|
| 24 |
+
# "markitdown-mcp:latest"
|
| 25 |
+
# ]
|
| 26 |
+
# },
|
| 27 |
+
"desktop-commander": {
|
| 28 |
+
"command": "npx",
|
| 29 |
+
"args": [
|
| 30 |
+
"-y",
|
| 31 |
+
"@wonderwhy-er/desktop-commander"
|
| 32 |
+
]
|
| 33 |
+
},
|
| 34 |
+
# "filesystem": {
|
| 35 |
+
# "command": "npx",
|
| 36 |
+
# "args": [
|
| 37 |
+
# "-y",
|
| 38 |
+
# "@modelcontextprotocol/server-filesystem",
|
| 39 |
+
# "/Users/xxx/ai_workspace",
|
| 40 |
+
# ]
|
| 41 |
+
# },
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
mcp_tools, mcp_client = await setup_mcp_client_and_tools(test_server_config)
|
| 46 |
+
|
| 47 |
+
for tool in mcp_tools:
|
| 48 |
+
tool_param_model = create_tool_param_model(tool)
|
| 49 |
+
print(tool.name)
|
| 50 |
+
print(tool.description)
|
| 51 |
+
print(tool_param_model.model_json_schema())
|
| 52 |
+
pdb.set_trace()
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
async def test_controller_with_mcp():
|
| 56 |
+
import os
|
| 57 |
+
from src.controller.custom_controller import CustomController
|
| 58 |
+
from browser_use.controller.registry.views import ActionModel
|
| 59 |
+
|
| 60 |
+
mcp_server_config = {
|
| 61 |
+
"mcpServers": {
|
| 62 |
+
# "markitdown": {
|
| 63 |
+
# "command": "docker",
|
| 64 |
+
# "args": [
|
| 65 |
+
# "run",
|
| 66 |
+
# "--rm",
|
| 67 |
+
# "-i",
|
| 68 |
+
# "markitdown-mcp:latest"
|
| 69 |
+
# ]
|
| 70 |
+
# },
|
| 71 |
+
"desktop-commander": {
|
| 72 |
+
"command": "npx",
|
| 73 |
+
"args": [
|
| 74 |
+
"-y",
|
| 75 |
+
"@wonderwhy-er/desktop-commander"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
# "filesystem": {
|
| 79 |
+
# "command": "npx",
|
| 80 |
+
# "args": [
|
| 81 |
+
# "-y",
|
| 82 |
+
# "@modelcontextprotocol/server-filesystem",
|
| 83 |
+
# "/Users/xxx/ai_workspace",
|
| 84 |
+
# ]
|
| 85 |
+
# },
|
| 86 |
+
}
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
controller = CustomController()
|
| 90 |
+
await controller.setup_mcp_client(mcp_server_config)
|
| 91 |
+
action_name = "mcp.desktop-commander.execute_command"
|
| 92 |
+
action_info = controller.registry.registry.actions[action_name]
|
| 93 |
+
param_model = action_info.param_model
|
| 94 |
+
print(param_model.model_json_schema())
|
| 95 |
+
params = {"command": f"python ./tmp/test.py"
|
| 96 |
+
}
|
| 97 |
+
validated_params = param_model(**params)
|
| 98 |
+
ActionModel_ = controller.registry.create_action_model()
|
| 99 |
+
# Create ActionModel instance with the validated parameters
|
| 100 |
+
action_model = ActionModel_(**{action_name: validated_params})
|
| 101 |
+
result = await controller.act(action_model)
|
| 102 |
+
result = result.extracted_content
|
| 103 |
+
print(result)
|
| 104 |
+
if result and "Command is still running. Use read_output to get more output." in result and "PID" in \
|
| 105 |
+
result.split("\n")[0]:
|
| 106 |
+
pid = int(result.split("\n")[0].split("PID")[-1].strip())
|
| 107 |
+
action_name = "mcp.desktop-commander.read_output"
|
| 108 |
+
action_info = controller.registry.registry.actions[action_name]
|
| 109 |
+
param_model = action_info.param_model
|
| 110 |
+
print(param_model.model_json_schema())
|
| 111 |
+
params = {"pid": pid}
|
| 112 |
+
validated_params = param_model(**params)
|
| 113 |
+
action_model = ActionModel_(**{action_name: validated_params})
|
| 114 |
+
output_result = ""
|
| 115 |
+
while True:
|
| 116 |
+
time.sleep(1)
|
| 117 |
+
result = await controller.act(action_model)
|
| 118 |
+
result = result.extracted_content
|
| 119 |
+
if result:
|
| 120 |
+
pdb.set_trace()
|
| 121 |
+
output_result = result
|
| 122 |
+
break
|
| 123 |
+
print(output_result)
|
| 124 |
+
pdb.set_trace()
|
| 125 |
+
await controller.close_mcp_client()
|
| 126 |
+
pdb.set_trace()
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
if __name__ == '__main__':
|
| 130 |
+
# asyncio.run(test_mcp_client())
|
| 131 |
+
asyncio.run(test_controller_with_mcp())
|
tests/test_llm_api.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pdb
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 7 |
+
from langchain_ollama import ChatOllama
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
import sys
|
| 12 |
+
|
| 13 |
+
sys.path.append(".")
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@dataclass
|
| 17 |
+
class LLMConfig:
|
| 18 |
+
provider: str
|
| 19 |
+
model_name: str
|
| 20 |
+
temperature: float = 0.8
|
| 21 |
+
base_url: str = None
|
| 22 |
+
api_key: str = None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def create_message_content(text, image_path=None):
|
| 26 |
+
content = [{"type": "text", "text": text}]
|
| 27 |
+
image_format = "png" if image_path and image_path.endswith(".png") else "jpeg"
|
| 28 |
+
if image_path:
|
| 29 |
+
from src.utils import utils
|
| 30 |
+
image_data = utils.encode_image(image_path)
|
| 31 |
+
content.append({
|
| 32 |
+
"type": "image_url",
|
| 33 |
+
"image_url": {"url": f"data:image/{image_format};base64,{image_data}"}
|
| 34 |
+
})
|
| 35 |
+
return content
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def get_env_value(key, provider):
|
| 39 |
+
env_mappings = {
|
| 40 |
+
"openai": {"api_key": "OPENAI_API_KEY", "base_url": "OPENAI_ENDPOINT"},
|
| 41 |
+
"azure_openai": {"api_key": "AZURE_OPENAI_API_KEY", "base_url": "AZURE_OPENAI_ENDPOINT"},
|
| 42 |
+
"google": {"api_key": "GOOGLE_API_KEY"},
|
| 43 |
+
"deepseek": {"api_key": "DEEPSEEK_API_KEY", "base_url": "DEEPSEEK_ENDPOINT"},
|
| 44 |
+
"mistral": {"api_key": "MISTRAL_API_KEY", "base_url": "MISTRAL_ENDPOINT"},
|
| 45 |
+
"alibaba": {"api_key": "ALIBABA_API_KEY", "base_url": "ALIBABA_ENDPOINT"},
|
| 46 |
+
"moonshot": {"api_key": "MOONSHOT_API_KEY", "base_url": "MOONSHOT_ENDPOINT"},
|
| 47 |
+
"ibm": {"api_key": "IBM_API_KEY", "base_url": "IBM_ENDPOINT"}
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
if provider in env_mappings and key in env_mappings[provider]:
|
| 51 |
+
return os.getenv(env_mappings[provider][key], "")
|
| 52 |
+
return ""
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def test_llm(config, query, image_path=None, system_message=None):
|
| 56 |
+
from src.utils import utils, llm_provider
|
| 57 |
+
|
| 58 |
+
# Special handling for Ollama-based models
|
| 59 |
+
if config.provider == "ollama":
|
| 60 |
+
if "deepseek-r1" in config.model_name:
|
| 61 |
+
from src.utils.llm_provider import DeepSeekR1ChatOllama
|
| 62 |
+
llm = DeepSeekR1ChatOllama(model=config.model_name)
|
| 63 |
+
else:
|
| 64 |
+
llm = ChatOllama(model=config.model_name)
|
| 65 |
+
|
| 66 |
+
ai_msg = llm.invoke(query)
|
| 67 |
+
print(ai_msg.content)
|
| 68 |
+
if "deepseek-r1" in config.model_name:
|
| 69 |
+
pdb.set_trace()
|
| 70 |
+
return
|
| 71 |
+
|
| 72 |
+
# For other providers, use the standard configuration
|
| 73 |
+
llm = llm_provider.get_llm_model(
|
| 74 |
+
provider=config.provider,
|
| 75 |
+
model_name=config.model_name,
|
| 76 |
+
temperature=config.temperature,
|
| 77 |
+
base_url=config.base_url or get_env_value("base_url", config.provider),
|
| 78 |
+
api_key=config.api_key or get_env_value("api_key", config.provider)
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# Prepare messages for non-Ollama models
|
| 82 |
+
messages = []
|
| 83 |
+
if system_message:
|
| 84 |
+
messages.append(SystemMessage(content=create_message_content(system_message)))
|
| 85 |
+
messages.append(HumanMessage(content=create_message_content(query, image_path)))
|
| 86 |
+
ai_msg = llm.invoke(messages)
|
| 87 |
+
|
| 88 |
+
# Handle different response types
|
| 89 |
+
if hasattr(ai_msg, "reasoning_content"):
|
| 90 |
+
print(ai_msg.reasoning_content)
|
| 91 |
+
print(ai_msg.content)
|
| 92 |
+
|
| 93 |
+
def test_openai_model():
|
| 94 |
+
config = LLMConfig(provider="openai", model_name="gpt-4o")
|
| 95 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def test_google_model():
|
| 99 |
+
# Enable your API key first if you haven't: https://ai.google.dev/palm_docs/oauth_quickstart
|
| 100 |
+
config = LLMConfig(provider="google", model_name="gemini-2.0-flash-exp")
|
| 101 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def test_azure_openai_model():
|
| 105 |
+
config = LLMConfig(provider="azure_openai", model_name="gpt-4o")
|
| 106 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def test_deepseek_model():
|
| 110 |
+
config = LLMConfig(provider="deepseek", model_name="deepseek-chat")
|
| 111 |
+
test_llm(config, "Who are you?")
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def test_deepseek_r1_model():
|
| 115 |
+
config = LLMConfig(provider="deepseek", model_name="deepseek-reasoner")
|
| 116 |
+
test_llm(config, "Which is greater, 9.11 or 9.8?", system_message="You are a helpful AI assistant.")
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def test_ollama_model():
|
| 120 |
+
config = LLMConfig(provider="ollama", model_name="qwen2.5:7b")
|
| 121 |
+
test_llm(config, "Sing a ballad of LangChain.")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def test_deepseek_r1_ollama_model():
|
| 125 |
+
config = LLMConfig(provider="ollama", model_name="deepseek-r1:14b")
|
| 126 |
+
test_llm(config, "How many 'r's are in the word 'strawberry'?")
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def test_mistral_model():
|
| 130 |
+
config = LLMConfig(provider="mistral", model_name="pixtral-large-latest")
|
| 131 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def test_moonshot_model():
|
| 135 |
+
config = LLMConfig(provider="moonshot", model_name="moonshot-v1-32k-vision-preview")
|
| 136 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def test_ibm_model():
|
| 140 |
+
config = LLMConfig(provider="ibm", model_name="meta-llama/llama-4-maverick-17b-128e-instruct-fp8")
|
| 141 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def test_qwen_model():
|
| 145 |
+
config = LLMConfig(provider="alibaba", model_name="qwen-vl-max")
|
| 146 |
+
test_llm(config, "How many 'r's are in the word 'strawberry'?")
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
if __name__ == "__main__":
|
| 150 |
+
# test_openai_model()
|
| 151 |
+
# test_google_model()
|
| 152 |
+
test_azure_openai_model()
|
| 153 |
+
# test_deepseek_model()
|
| 154 |
+
# test_ollama_model()
|
| 155 |
+
# test_deepseek_r1_model()
|
| 156 |
+
# test_deepseek_r1_ollama_model()
|
| 157 |
+
# test_mistral_model()
|
| 158 |
+
# test_ibm_model()
|
| 159 |
+
# test_qwen_model()
|
tests/test_playwright.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pdb
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def test_connect_browser():
|
| 8 |
+
import os
|
| 9 |
+
from playwright.sync_api import sync_playwright
|
| 10 |
+
|
| 11 |
+
chrome_exe = os.getenv("CHROME_PATH", "")
|
| 12 |
+
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
|
| 13 |
+
|
| 14 |
+
with sync_playwright() as p:
|
| 15 |
+
browser = p.chromium.launch_persistent_context(
|
| 16 |
+
user_data_dir=chrome_use_data,
|
| 17 |
+
executable_path=chrome_exe,
|
| 18 |
+
headless=False # Keep browser window visible
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
page = browser.new_page()
|
| 22 |
+
page.goto("https://mail.google.com/mail/u/0/#inbox")
|
| 23 |
+
page.wait_for_load_state()
|
| 24 |
+
|
| 25 |
+
input("Press the Enter key to close the browser...")
|
| 26 |
+
|
| 27 |
+
browser.close()
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
if __name__ == '__main__':
|
| 31 |
+
test_connect_browser()
|
webui.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
load_dotenv()
|
| 3 |
+
import argparse
|
| 4 |
+
from src.webui.interface import theme_map, create_ui
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def main():
|
| 8 |
+
parser = argparse.ArgumentParser(description="Gradio WebUI for Browser Agent")
|
| 9 |
+
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
|
| 10 |
+
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
|
| 11 |
+
parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
|
| 12 |
+
args = parser.parse_args()
|
| 13 |
+
|
| 14 |
+
demo = create_ui(theme_name=args.theme)
|
| 15 |
+
demo.queue().launch(server_name=args.ip, server_port=args.port)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
if __name__ == '__main__':
|
| 19 |
+
main()
|