Spaces:
Sleeping
Sleeping
Merge branch 'master' into gui
Browse files- .github/workflows/CI.yml +17 -23
- .github/workflows/CI_Windows.yml +8 -14
- .github/workflows/CI_docker.yml +6 -12
- .github/workflows/CI_docker_large_nightly.yml +2 -2
- .github/workflows/CI_large_nightly.yml +3 -3
- .github/workflows/CI_mac.yml +7 -13
- .github/workflows/docker_deploy.yml +6 -6
- .github/workflows/update_backend.yml +0 -1
- .gitignore +2 -0
- .pre-commit-config.yaml +3 -3
- Dockerfile +2 -2
- README.md +1 -1
- benchmarks/hyperparamopt.py +1 -0
- benchmarks/print_best_model.py +1 -0
- docs/examples.md +3 -1
- docs/generate_papers.py +1 -0
- environment.yml +1 -2
- examples/pysr_demo.ipynb +1 -1
- pyproject.toml +16 -2
- pysr/denoising.py +18 -4
- pysr/deprecated.py +1 -0
- pysr/export_jax.py +4 -1
- pysr/export_latex.py +13 -0
- pysr/export_numpy.py +11 -2
- pysr/export_sympy.py +15 -6
- pysr/export_torch.py +9 -6
- pysr/feature_selection.py +20 -3
- pysr/julia_helpers.py +17 -5
- pysr/julia_import.py +9 -21
- pysr/juliapkg.json +1 -1
- pysr/param_groupings.yml +1 -0
- pysr/sklearn_monkeypatch.py +1 -2
- pysr/sr.py +381 -193
- pysr/test/__main__.py +1 -0
- pysr/test/params.py +1 -1
- pysr/test/test.py +220 -106
- pysr/test/test_jax.py +41 -10
- pysr/test/test_startup.py +3 -6
- pysr/test/test_torch.py +36 -2
- pysr/utils.py +22 -2
- requirements.txt +2 -3
.github/workflows/CI.yml
CHANGED
|
@@ -5,20 +5,14 @@ on:
|
|
| 5 |
branches:
|
| 6 |
- '**'
|
| 7 |
paths:
|
| 8 |
-
- '
|
| 9 |
-
- 'pysr/**'
|
| 10 |
-
- '.github/workflows/CI.yml'
|
| 11 |
-
- 'setup.py'
|
| 12 |
tags:
|
| 13 |
- 'v*.*.*'
|
| 14 |
pull_request:
|
| 15 |
branches:
|
| 16 |
-
- '
|
| 17 |
paths:
|
| 18 |
-
- '
|
| 19 |
-
- 'pysr/**'
|
| 20 |
-
- '.github/workflows/CI.yml'
|
| 21 |
-
- 'setup.py'
|
| 22 |
|
| 23 |
jobs:
|
| 24 |
test:
|
|
@@ -32,12 +26,12 @@ jobs:
|
|
| 32 |
strategy:
|
| 33 |
matrix:
|
| 34 |
julia-version: ['1']
|
| 35 |
-
python-version: ['3.
|
| 36 |
os: [ubuntu-latest]
|
| 37 |
test-id: [main]
|
| 38 |
include:
|
| 39 |
- julia-version: '1.6'
|
| 40 |
-
python-version: '3.
|
| 41 |
os: ubuntu-latest
|
| 42 |
test-id: include
|
| 43 |
- julia-version: '1'
|
|
@@ -48,11 +42,11 @@ jobs:
|
|
| 48 |
steps:
|
| 49 |
- uses: actions/checkout@v4
|
| 50 |
- name: "Set up Julia"
|
| 51 |
-
uses: julia-actions/setup-julia@
|
| 52 |
with:
|
| 53 |
version: ${{ matrix.julia-version }}
|
| 54 |
- name: "Cache Julia"
|
| 55 |
-
uses: julia-actions/cache@
|
| 56 |
with:
|
| 57 |
cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
|
| 58 |
cache-packages: false
|
|
@@ -90,7 +84,7 @@ jobs:
|
|
| 90 |
- name: "Coveralls"
|
| 91 |
env:
|
| 92 |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 93 |
-
COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }}
|
| 94 |
COVERALLS_PARALLEL: true
|
| 95 |
run: coveralls --service=github
|
| 96 |
|
|
@@ -99,11 +93,11 @@ jobs:
|
|
| 99 |
strategy:
|
| 100 |
matrix:
|
| 101 |
os: ['ubuntu-latest']
|
| 102 |
-
python-version: ['3.
|
| 103 |
julia-version: ['1']
|
| 104 |
include:
|
| 105 |
- os: ubuntu-latest
|
| 106 |
-
python-version: '3.
|
| 107 |
julia-version: '1.6'
|
| 108 |
steps:
|
| 109 |
- uses: actions/checkout@v4
|
|
@@ -122,7 +116,7 @@ jobs:
|
|
| 122 |
shell: bash -l {0}
|
| 123 |
strategy:
|
| 124 |
matrix:
|
| 125 |
-
python-version: ['3.
|
| 126 |
os: ['ubuntu-latest']
|
| 127 |
|
| 128 |
steps:
|
|
@@ -144,7 +138,7 @@ jobs:
|
|
| 144 |
activate-environment: pysr-test
|
| 145 |
environment-file: environment.yml
|
| 146 |
- name: "Cache Julia"
|
| 147 |
-
uses: julia-actions/cache@
|
| 148 |
with:
|
| 149 |
cache-name: ${{ matrix.os }}-conda-${{ matrix.python-version }}
|
| 150 |
cache-packages: false
|
|
@@ -181,8 +175,8 @@ jobs:
|
|
| 181 |
strategy:
|
| 182 |
matrix:
|
| 183 |
python-version:
|
| 184 |
-
- '3.
|
| 185 |
-
- '3.
|
| 186 |
os: ['ubuntu-latest']
|
| 187 |
|
| 188 |
steps:
|
|
@@ -199,10 +193,10 @@ jobs:
|
|
| 199 |
pip install mypy
|
| 200 |
- name: "Install additional dependencies"
|
| 201 |
run: python -m pip install jax jaxlib torch
|
| 202 |
-
if: ${{ matrix.python-version != '3.
|
| 203 |
- name: "Run mypy"
|
| 204 |
run: python -m mypy --install-types --non-interactive pysr
|
| 205 |
-
if: ${{ matrix.python-version != '3.
|
| 206 |
- name: "Run compatible mypy"
|
| 207 |
run: python -m mypy --ignore-missing-imports pysr
|
| 208 |
-
if: ${{ matrix.python-version == '3.
|
|
|
|
| 5 |
branches:
|
| 6 |
- '**'
|
| 7 |
paths:
|
| 8 |
+
- '**'
|
|
|
|
|
|
|
|
|
|
| 9 |
tags:
|
| 10 |
- 'v*.*.*'
|
| 11 |
pull_request:
|
| 12 |
branches:
|
| 13 |
+
- 'master'
|
| 14 |
paths:
|
| 15 |
+
- '**'
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
jobs:
|
| 18 |
test:
|
|
|
|
| 26 |
strategy:
|
| 27 |
matrix:
|
| 28 |
julia-version: ['1']
|
| 29 |
+
python-version: ['3.12']
|
| 30 |
os: [ubuntu-latest]
|
| 31 |
test-id: [main]
|
| 32 |
include:
|
| 33 |
- julia-version: '1.6'
|
| 34 |
+
python-version: '3.8'
|
| 35 |
os: ubuntu-latest
|
| 36 |
test-id: include
|
| 37 |
- julia-version: '1'
|
|
|
|
| 42 |
steps:
|
| 43 |
- uses: actions/checkout@v4
|
| 44 |
- name: "Set up Julia"
|
| 45 |
+
uses: julia-actions/setup-julia@v2
|
| 46 |
with:
|
| 47 |
version: ${{ matrix.julia-version }}
|
| 48 |
- name: "Cache Julia"
|
| 49 |
+
uses: julia-actions/cache@v2
|
| 50 |
with:
|
| 51 |
cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
|
| 52 |
cache-packages: false
|
|
|
|
| 84 |
- name: "Coveralls"
|
| 85 |
env:
|
| 86 |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 87 |
+
COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }}-${{ matrix.test-id }}
|
| 88 |
COVERALLS_PARALLEL: true
|
| 89 |
run: coveralls --service=github
|
| 90 |
|
|
|
|
| 93 |
strategy:
|
| 94 |
matrix:
|
| 95 |
os: ['ubuntu-latest']
|
| 96 |
+
python-version: ['3.12']
|
| 97 |
julia-version: ['1']
|
| 98 |
include:
|
| 99 |
- os: ubuntu-latest
|
| 100 |
+
python-version: '3.8'
|
| 101 |
julia-version: '1.6'
|
| 102 |
steps:
|
| 103 |
- uses: actions/checkout@v4
|
|
|
|
| 116 |
shell: bash -l {0}
|
| 117 |
strategy:
|
| 118 |
matrix:
|
| 119 |
+
python-version: ['3.12']
|
| 120 |
os: ['ubuntu-latest']
|
| 121 |
|
| 122 |
steps:
|
|
|
|
| 138 |
activate-environment: pysr-test
|
| 139 |
environment-file: environment.yml
|
| 140 |
- name: "Cache Julia"
|
| 141 |
+
uses: julia-actions/cache@v2
|
| 142 |
with:
|
| 143 |
cache-name: ${{ matrix.os }}-conda-${{ matrix.python-version }}
|
| 144 |
cache-packages: false
|
|
|
|
| 175 |
strategy:
|
| 176 |
matrix:
|
| 177 |
python-version:
|
| 178 |
+
- '3.12'
|
| 179 |
+
- '3.8'
|
| 180 |
os: ['ubuntu-latest']
|
| 181 |
|
| 182 |
steps:
|
|
|
|
| 193 |
pip install mypy
|
| 194 |
- name: "Install additional dependencies"
|
| 195 |
run: python -m pip install jax jaxlib torch
|
| 196 |
+
if: ${{ matrix.python-version != '3.8' }}
|
| 197 |
- name: "Run mypy"
|
| 198 |
run: python -m mypy --install-types --non-interactive pysr
|
| 199 |
+
if: ${{ matrix.python-version != '3.8' }}
|
| 200 |
- name: "Run compatible mypy"
|
| 201 |
run: python -m mypy --ignore-missing-imports pysr
|
| 202 |
+
if: ${{ matrix.python-version == '3.8' }}
|
.github/workflows/CI_Windows.yml
CHANGED
|
@@ -3,22 +3,16 @@ name: Windows
|
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
-
- '
|
| 7 |
paths:
|
| 8 |
-
- '
|
| 9 |
-
- 'pysr/**'
|
| 10 |
-
- '.github/workflows/CI_Windows.yml'
|
| 11 |
-
- 'setup.py'
|
| 12 |
tags:
|
| 13 |
- 'v*.*.*'
|
| 14 |
pull_request:
|
| 15 |
branches:
|
| 16 |
-
- '
|
| 17 |
paths:
|
| 18 |
-
- '
|
| 19 |
-
- 'pysr/**'
|
| 20 |
-
- '.github/workflows/CI_Windows.yml'
|
| 21 |
-
- 'setup.py'
|
| 22 |
|
| 23 |
jobs:
|
| 24 |
test:
|
|
@@ -30,17 +24,17 @@ jobs:
|
|
| 30 |
strategy:
|
| 31 |
matrix:
|
| 32 |
julia-version: ['1']
|
| 33 |
-
python-version: ['3.
|
| 34 |
os: [windows-latest]
|
| 35 |
|
| 36 |
steps:
|
| 37 |
- uses: actions/checkout@v4
|
| 38 |
- name: "Set up Julia"
|
| 39 |
-
uses: julia-actions/setup-julia@
|
| 40 |
with:
|
| 41 |
version: ${{ matrix.julia-version }}
|
| 42 |
- name: "Cache Julia"
|
| 43 |
-
uses: julia-actions/cache@
|
| 44 |
with:
|
| 45 |
cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
|
| 46 |
cache-packages: false
|
|
@@ -52,7 +46,7 @@ jobs:
|
|
| 52 |
- name: "Install PySR"
|
| 53 |
run: |
|
| 54 |
python -m pip install --upgrade pip
|
| 55 |
-
pip install pytest nbval
|
| 56 |
pip install .
|
| 57 |
python -c 'import pysr'
|
| 58 |
- name: "Run tests"
|
|
|
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
+
- 'master'
|
| 7 |
paths:
|
| 8 |
+
- '**'
|
|
|
|
|
|
|
|
|
|
| 9 |
tags:
|
| 10 |
- 'v*.*.*'
|
| 11 |
pull_request:
|
| 12 |
branches:
|
| 13 |
+
- 'master'
|
| 14 |
paths:
|
| 15 |
+
- '**'
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
jobs:
|
| 18 |
test:
|
|
|
|
| 24 |
strategy:
|
| 25 |
matrix:
|
| 26 |
julia-version: ['1']
|
| 27 |
+
python-version: ['3.12']
|
| 28 |
os: [windows-latest]
|
| 29 |
|
| 30 |
steps:
|
| 31 |
- uses: actions/checkout@v4
|
| 32 |
- name: "Set up Julia"
|
| 33 |
+
uses: julia-actions/setup-julia@v2
|
| 34 |
with:
|
| 35 |
version: ${{ matrix.julia-version }}
|
| 36 |
- name: "Cache Julia"
|
| 37 |
+
uses: julia-actions/cache@v2
|
| 38 |
with:
|
| 39 |
cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
|
| 40 |
cache-packages: false
|
|
|
|
| 46 |
- name: "Install PySR"
|
| 47 |
run: |
|
| 48 |
python -m pip install --upgrade pip
|
| 49 |
+
pip install pytest nbval "numpy<2.0.0"
|
| 50 |
pip install .
|
| 51 |
python -c 'import pysr'
|
| 52 |
- name: "Run tests"
|
.github/workflows/CI_docker.yml
CHANGED
|
@@ -3,22 +3,16 @@ name: Docker
|
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
-
- '
|
| 7 |
paths:
|
| 8 |
-
- '
|
| 9 |
-
|
| 10 |
-
- '
|
| 11 |
-
- 'setup.py'
|
| 12 |
-
- 'Dockerfile'
|
| 13 |
pull_request:
|
| 14 |
branches:
|
| 15 |
-
- '
|
| 16 |
paths:
|
| 17 |
-
- '
|
| 18 |
-
- 'pysr/**'
|
| 19 |
-
- '.github/workflows/CI_docker.yml'
|
| 20 |
-
- 'setup.py'
|
| 21 |
-
- 'Dockerfile'
|
| 22 |
|
| 23 |
jobs:
|
| 24 |
test:
|
|
|
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
+
- 'master'
|
| 7 |
paths:
|
| 8 |
+
- '**'
|
| 9 |
+
tags:
|
| 10 |
+
- 'v*.*.*'
|
|
|
|
|
|
|
| 11 |
pull_request:
|
| 12 |
branches:
|
| 13 |
+
- 'master'
|
| 14 |
paths:
|
| 15 |
+
- '**'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
jobs:
|
| 18 |
test:
|
.github/workflows/CI_docker_large_nightly.yml
CHANGED
|
@@ -19,7 +19,7 @@ jobs:
|
|
| 19 |
fail-fast: false
|
| 20 |
matrix:
|
| 21 |
julia-version: ['1.6', '1']
|
| 22 |
-
python-version: ['3.
|
| 23 |
os: [ubuntu-latest]
|
| 24 |
arch: ['linux/amd64', 'linux/arm64']
|
| 25 |
|
|
@@ -27,7 +27,7 @@ jobs:
|
|
| 27 |
steps:
|
| 28 |
- uses: actions/checkout@v4
|
| 29 |
- name: Set up QEMU
|
| 30 |
-
uses: docker/setup-qemu-action@
|
| 31 |
with:
|
| 32 |
platforms: all
|
| 33 |
- name: Build docker
|
|
|
|
| 19 |
fail-fast: false
|
| 20 |
matrix:
|
| 21 |
julia-version: ['1.6', '1']
|
| 22 |
+
python-version: ['3.8', '3.12']
|
| 23 |
os: [ubuntu-latest]
|
| 24 |
arch: ['linux/amd64', 'linux/arm64']
|
| 25 |
|
|
|
|
| 27 |
steps:
|
| 28 |
- uses: actions/checkout@v4
|
| 29 |
- name: Set up QEMU
|
| 30 |
+
uses: docker/setup-qemu-action@v3
|
| 31 |
with:
|
| 32 |
platforms: all
|
| 33 |
- name: Build docker
|
.github/workflows/CI_large_nightly.yml
CHANGED
|
@@ -23,14 +23,14 @@ jobs:
|
|
| 23 |
strategy:
|
| 24 |
fail-fast: false
|
| 25 |
matrix:
|
| 26 |
-
julia-version: ['1.6', '1.8', '1.
|
| 27 |
-
python-version: ['3.
|
| 28 |
os: [ubuntu-latest, macos-latest, windows-latest]
|
| 29 |
|
| 30 |
steps:
|
| 31 |
- uses: actions/checkout@v4
|
| 32 |
- name: "Set up Julia"
|
| 33 |
-
uses: julia-actions/setup-julia@
|
| 34 |
with:
|
| 35 |
version: ${{ matrix.julia-version }}
|
| 36 |
- name: "Set up Python"
|
|
|
|
| 23 |
strategy:
|
| 24 |
fail-fast: false
|
| 25 |
matrix:
|
| 26 |
+
julia-version: ['1.6', '1.8', '1.10']
|
| 27 |
+
python-version: ['3.8', '3.10', '3.12']
|
| 28 |
os: [ubuntu-latest, macos-latest, windows-latest]
|
| 29 |
|
| 30 |
steps:
|
| 31 |
- uses: actions/checkout@v4
|
| 32 |
- name: "Set up Julia"
|
| 33 |
+
uses: julia-actions/setup-julia@v2
|
| 34 |
with:
|
| 35 |
version: ${{ matrix.julia-version }}
|
| 36 |
- name: "Set up Python"
|
.github/workflows/CI_mac.yml
CHANGED
|
@@ -3,22 +3,16 @@ name: macOS
|
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
-
- '
|
| 7 |
paths:
|
| 8 |
-
- '
|
| 9 |
-
- 'pysr/**'
|
| 10 |
-
- '.github/workflows/CI_mac.yml'
|
| 11 |
-
- 'setup.py'
|
| 12 |
tags:
|
| 13 |
- 'v*.*.*'
|
| 14 |
pull_request:
|
| 15 |
branches:
|
| 16 |
-
- '
|
| 17 |
paths:
|
| 18 |
-
- '
|
| 19 |
-
- 'pysr/**'
|
| 20 |
-
- '.github/workflows/CI_mac.yml'
|
| 21 |
-
- 'setup.py'
|
| 22 |
|
| 23 |
jobs:
|
| 24 |
test:
|
|
@@ -30,17 +24,17 @@ jobs:
|
|
| 30 |
strategy:
|
| 31 |
matrix:
|
| 32 |
julia-version: ['1']
|
| 33 |
-
python-version: ['3.
|
| 34 |
os: [macos-latest]
|
| 35 |
|
| 36 |
steps:
|
| 37 |
- uses: actions/checkout@v4
|
| 38 |
- name: "Set up Julia"
|
| 39 |
-
uses: julia-actions/setup-julia@
|
| 40 |
with:
|
| 41 |
version: ${{ matrix.julia-version }}
|
| 42 |
- name: "Cache Julia"
|
| 43 |
-
uses: julia-actions/cache@
|
| 44 |
with:
|
| 45 |
cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
|
| 46 |
cache-packages: false
|
|
|
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
+
- 'master'
|
| 7 |
paths:
|
| 8 |
+
- '**'
|
|
|
|
|
|
|
|
|
|
| 9 |
tags:
|
| 10 |
- 'v*.*.*'
|
| 11 |
pull_request:
|
| 12 |
branches:
|
| 13 |
+
- 'master'
|
| 14 |
paths:
|
| 15 |
+
- '**'
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
jobs:
|
| 18 |
test:
|
|
|
|
| 24 |
strategy:
|
| 25 |
matrix:
|
| 26 |
julia-version: ['1']
|
| 27 |
+
python-version: ['3.12']
|
| 28 |
os: [macos-latest]
|
| 29 |
|
| 30 |
steps:
|
| 31 |
- uses: actions/checkout@v4
|
| 32 |
- name: "Set up Julia"
|
| 33 |
+
uses: julia-actions/setup-julia@v2
|
| 34 |
with:
|
| 35 |
version: ${{ matrix.julia-version }}
|
| 36 |
- name: "Cache Julia"
|
| 37 |
+
uses: julia-actions/cache@v2
|
| 38 |
with:
|
| 39 |
cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
|
| 40 |
cache-packages: false
|
.github/workflows/docker_deploy.yml
CHANGED
|
@@ -18,19 +18,19 @@ jobs:
|
|
| 18 |
matrix:
|
| 19 |
os: [ubuntu-latest]
|
| 20 |
arch: [linux/amd64]
|
| 21 |
-
python-version: [3.
|
| 22 |
-
julia-version: [1.
|
| 23 |
steps:
|
| 24 |
- name: Checkout
|
| 25 |
uses: actions/checkout@v4
|
| 26 |
- name: Login to Docker Hub
|
| 27 |
-
uses: docker/login-action@
|
| 28 |
if: github.event_name != 'pull_request'
|
| 29 |
with:
|
| 30 |
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
| 31 |
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
| 32 |
- name: Login to GitHub registry
|
| 33 |
-
uses: docker/login-action@
|
| 34 |
if: github.event_name != 'pull_request'
|
| 35 |
with:
|
| 36 |
registry: ghcr.io
|
|
@@ -55,11 +55,11 @@ jobs:
|
|
| 55 |
type=sha
|
| 56 |
type=raw,value=latest,enable={{is_default_branch}}
|
| 57 |
- name: Set up QEMU
|
| 58 |
-
uses: docker/setup-qemu-action@
|
| 59 |
- name: Set up Docker Buildx
|
| 60 |
uses: docker/setup-buildx-action@v3
|
| 61 |
- name: Build and push
|
| 62 |
-
uses: docker/build-push-action@
|
| 63 |
with:
|
| 64 |
context: .
|
| 65 |
platforms: ${{ matrix.arch }}
|
|
|
|
| 18 |
matrix:
|
| 19 |
os: [ubuntu-latest]
|
| 20 |
arch: [linux/amd64]
|
| 21 |
+
python-version: [3.12.3]
|
| 22 |
+
julia-version: [1.10.3]
|
| 23 |
steps:
|
| 24 |
- name: Checkout
|
| 25 |
uses: actions/checkout@v4
|
| 26 |
- name: Login to Docker Hub
|
| 27 |
+
uses: docker/login-action@v3
|
| 28 |
if: github.event_name != 'pull_request'
|
| 29 |
with:
|
| 30 |
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
| 31 |
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
| 32 |
- name: Login to GitHub registry
|
| 33 |
+
uses: docker/login-action@v3
|
| 34 |
if: github.event_name != 'pull_request'
|
| 35 |
with:
|
| 36 |
registry: ghcr.io
|
|
|
|
| 55 |
type=sha
|
| 56 |
type=raw,value=latest,enable={{is_default_branch}}
|
| 57 |
- name: Set up QEMU
|
| 58 |
+
uses: docker/setup-qemu-action@v3
|
| 59 |
- name: Set up Docker Buildx
|
| 60 |
uses: docker/setup-buildx-action@v3
|
| 61 |
- name: Build and push
|
| 62 |
+
uses: docker/build-push-action@v6
|
| 63 |
with:
|
| 64 |
context: .
|
| 65 |
platforms: ${{ matrix.arch }}
|
.github/workflows/update_backend.yml
CHANGED
|
@@ -40,7 +40,6 @@ jobs:
|
|
| 40 |
- name: "Create PR if necessary"
|
| 41 |
uses: peter-evans/create-pull-request@v6
|
| 42 |
with:
|
| 43 |
-
token: ${{ secrets.REPO_SCOPED_TOKEN }}
|
| 44 |
title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}"
|
| 45 |
body: |
|
| 46 |
This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml`
|
|
|
|
| 40 |
- name: "Create PR if necessary"
|
| 41 |
uses: peter-evans/create-pull-request@v6
|
| 42 |
with:
|
|
|
|
| 43 |
title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}"
|
| 44 |
body: |
|
| 45 |
This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml`
|
.gitignore
CHANGED
|
@@ -23,3 +23,5 @@ site
|
|
| 23 |
**/*.code-workspace
|
| 24 |
**/*.tar.gz
|
| 25 |
venv
|
|
|
|
|
|
|
|
|
| 23 |
**/*.code-workspace
|
| 24 |
**/*.tar.gz
|
| 25 |
venv
|
| 26 |
+
requirements-dev.lock
|
| 27 |
+
requirements.lock
|
.pre-commit-config.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
repos:
|
| 2 |
# General linting
|
| 3 |
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 4 |
-
rev: v4.
|
| 5 |
hooks:
|
| 6 |
- id: trailing-whitespace
|
| 7 |
- id: end-of-file-fixer
|
|
@@ -9,14 +9,14 @@ repos:
|
|
| 9 |
- id: check-added-large-files
|
| 10 |
# General formatting
|
| 11 |
- repo: https://github.com/psf/black
|
| 12 |
-
rev:
|
| 13 |
hooks:
|
| 14 |
- id: black
|
| 15 |
- id: black-jupyter
|
| 16 |
exclude: pysr/test/test_nb.ipynb
|
| 17 |
# Stripping notebooks
|
| 18 |
- repo: https://github.com/kynan/nbstripout
|
| 19 |
-
rev: 0.
|
| 20 |
hooks:
|
| 21 |
- id: nbstripout
|
| 22 |
exclude: pysr/test/test_nb.ipynb
|
|
|
|
| 1 |
repos:
|
| 2 |
# General linting
|
| 3 |
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 4 |
+
rev: v4.6.0
|
| 5 |
hooks:
|
| 6 |
- id: trailing-whitespace
|
| 7 |
- id: end-of-file-fixer
|
|
|
|
| 9 |
- id: check-added-large-files
|
| 10 |
# General formatting
|
| 11 |
- repo: https://github.com/psf/black
|
| 12 |
+
rev: 24.4.2
|
| 13 |
hooks:
|
| 14 |
- id: black
|
| 15 |
- id: black-jupyter
|
| 16 |
exclude: pysr/test/test_nb.ipynb
|
| 17 |
# Stripping notebooks
|
| 18 |
- repo: https://github.com/kynan/nbstripout
|
| 19 |
+
rev: 0.7.1
|
| 20 |
hooks:
|
| 21 |
- id: nbstripout
|
| 22 |
exclude: pysr/test/test_nb.ipynb
|
Dockerfile
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
# This builds a dockerfile containing a working copy of PySR
|
| 2 |
# with all pre-requisites installed.
|
| 3 |
|
| 4 |
-
ARG JLVERSION=1.10.
|
| 5 |
-
ARG PYVERSION=3.
|
| 6 |
ARG BASE_IMAGE=bullseye
|
| 7 |
|
| 8 |
FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl
|
|
|
|
| 1 |
# This builds a dockerfile containing a working copy of PySR
|
| 2 |
# with all pre-requisites installed.
|
| 3 |
|
| 4 |
+
ARG JLVERSION=1.10.4
|
| 5 |
+
ARG PYVERSION=3.12.2
|
| 6 |
ARG BASE_IMAGE=bullseye
|
| 7 |
|
| 8 |
FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl
|
README.md
CHANGED
|
@@ -297,7 +297,7 @@ model = PySRRegressor(
|
|
| 297 |
# ^ Higher precision calculations.
|
| 298 |
warm_start=True,
|
| 299 |
# ^ Start from where left off.
|
| 300 |
-
|
| 301 |
# ^ Faster evaluation (experimental)
|
| 302 |
extra_sympy_mappings={"cos2": lambda x: sympy.cos(x)**2},
|
| 303 |
# extra_torch_mappings={sympy.cos: torch.cos},
|
|
|
|
| 297 |
# ^ Higher precision calculations.
|
| 298 |
warm_start=True,
|
| 299 |
# ^ Start from where left off.
|
| 300 |
+
turbo=True,
|
| 301 |
# ^ Faster evaluation (experimental)
|
| 302 |
extra_sympy_mappings={"cos2": lambda x: sympy.cos(x)**2},
|
| 303 |
# extra_torch_mappings={sympy.cos: torch.cos},
|
benchmarks/hyperparamopt.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
"""Start a hyperoptimization from a single node"""
|
|
|
|
| 2 |
import pickle as pkl
|
| 3 |
import sys
|
| 4 |
|
|
|
|
| 1 |
"""Start a hyperoptimization from a single node"""
|
| 2 |
+
|
| 3 |
import pickle as pkl
|
| 4 |
import sys
|
| 5 |
|
benchmarks/print_best_model.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
"""Print the best model parameters and loss"""
|
|
|
|
| 2 |
import pickle as pkl
|
| 3 |
from pprint import PrettyPrinter
|
| 4 |
|
|
|
|
| 1 |
"""Print the best model parameters and loss"""
|
| 2 |
+
|
| 3 |
import pickle as pkl
|
| 4 |
from pprint import PrettyPrinter
|
| 5 |
|
docs/examples.md
CHANGED
|
@@ -428,7 +428,7 @@ the evaluation, as we simply evaluated each argument and divided the result) int
|
|
| 428 |
`((2.3554819 + -0.3554746) - (x1 * (x0 * x0)))` and
|
| 429 |
`(-1.0000019 - (x2 * x2))`, meaning that our discovered equation is
|
| 430 |
equal to:
|
| 431 |
-
$\frac{x_0^2 x_1 - 2.0000073}{x_2^2
|
| 432 |
is nearly the same as the true equation!
|
| 433 |
|
| 434 |
## 10. Dimensional constraints
|
|
@@ -520,6 +520,8 @@ a constant `"2.6353e-22[m sβ»Β²]"`.
|
|
| 520 |
|
| 521 |
Note that this expression has a large dynamic range so may be difficult to find. Consider searching with a larger `niterations` if needed.
|
| 522 |
|
|
|
|
|
|
|
| 523 |
|
| 524 |
## 11. Additional features
|
| 525 |
|
|
|
|
| 428 |
`((2.3554819 + -0.3554746) - (x1 * (x0 * x0)))` and
|
| 429 |
`(-1.0000019 - (x2 * x2))`, meaning that our discovered equation is
|
| 430 |
equal to:
|
| 431 |
+
$\frac{x_0^2 x_1 - 2.0000073}{x_2^2 + 1.0000019}$, which
|
| 432 |
is nearly the same as the true equation!
|
| 433 |
|
| 434 |
## 10. Dimensional constraints
|
|
|
|
| 520 |
|
| 521 |
Note that this expression has a large dynamic range so may be difficult to find. Consider searching with a larger `niterations` if needed.
|
| 522 |
|
| 523 |
+
Note that you can also search for exclusively dimensionless constants by settings
|
| 524 |
+
`dimensionless_constants_only` to `true`.
|
| 525 |
|
| 526 |
## 11. Additional features
|
| 527 |
|
docs/generate_papers.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
"""This script generates the papers.md file from the papers.yml file."""
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
import yaml
|
|
|
|
| 1 |
"""This script generates the papers.md file from the papers.yml file."""
|
| 2 |
+
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
import yaml
|
environment.yml
CHANGED
|
@@ -2,11 +2,10 @@ name: test
|
|
| 2 |
channels:
|
| 3 |
- conda-forge
|
| 4 |
dependencies:
|
| 5 |
-
- python>=3.
|
| 6 |
- sympy>=1.0.0,<2.0.0
|
| 7 |
- pandas>=0.21.0,<3.0.0
|
| 8 |
- numpy>=1.13.0,<2.0.0
|
| 9 |
- scikit-learn>=1.0.0,<2.0.0
|
| 10 |
- pyjuliacall>=0.9.15,<0.10.0
|
| 11 |
- click>=7.0.0,<9.0.0
|
| 12 |
-
- typing_extensions>=4.0.0,<5.0.0
|
|
|
|
| 2 |
channels:
|
| 3 |
- conda-forge
|
| 4 |
dependencies:
|
| 5 |
+
- python>=3.8
|
| 6 |
- sympy>=1.0.0,<2.0.0
|
| 7 |
- pandas>=0.21.0,<3.0.0
|
| 8 |
- numpy>=1.13.0,<2.0.0
|
| 9 |
- scikit-learn>=1.0.0,<2.0.0
|
| 10 |
- pyjuliacall>=0.9.15,<0.10.0
|
| 11 |
- click>=7.0.0,<9.0.0
|
|
|
examples/pysr_demo.ipynb
CHANGED
|
@@ -396,7 +396,7 @@
|
|
| 396 |
"id": "wbWHyOjl2_kX"
|
| 397 |
},
|
| 398 |
"source": [
|
| 399 |
-
"Since `quart` is arguably more complex than the other operators, you can also give it a different complexity, using, e.g., `complexity_of_operators={\"quart\": 2}` to give it a complexity of 2 (instead of the default
|
| 400 |
"\n",
|
| 401 |
"\n",
|
| 402 |
"One can also add a binary operator, with, e.g., `\"myoperator(x, y) = x^2 * y\"`. All Julia operators that work on scalar 32-bit floating point values are available.\n",
|
|
|
|
| 396 |
"id": "wbWHyOjl2_kX"
|
| 397 |
},
|
| 398 |
"source": [
|
| 399 |
+
"Since `quart` is arguably more complex than the other operators, you can also give it a different complexity, using, e.g., `complexity_of_operators={\"quart\": 2}` to give it a complexity of 2 (instead of the default 1). You can also define custom complexities for variables and constants (`complexity_of_variables` and `complexity_of_constants`, respectively - both take a single number).\n",
|
| 400 |
"\n",
|
| 401 |
"\n",
|
| 402 |
"One can also add a binary operator, with, e.g., `\"myoperator(x, y) = x^2 * y\"`. All Julia operators that work on scalar 32-bit floating point values are available.\n",
|
pyproject.toml
CHANGED
|
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "pysr"
|
| 7 |
-
version = "0.
|
| 8 |
authors = [
|
| 9 |
{name = "Miles Cranmer", email = "miles.cranmer@gmail.com"},
|
| 10 |
]
|
| 11 |
description = "Simple and efficient symbolic regression"
|
| 12 |
readme = {file = "README.md", content-type = "text/markdown"}
|
| 13 |
license = {file = "LICENSE"}
|
| 14 |
-
requires-python = ">=3.
|
| 15 |
classifiers = [
|
| 16 |
"Programming Language :: Python :: 3",
|
| 17 |
"Operating System :: OS Independent",
|
|
@@ -29,3 +29,17 @@ dependencies = {file = "requirements.txt"}
|
|
| 29 |
|
| 30 |
[tool.isort]
|
| 31 |
profile = "black"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "pysr"
|
| 7 |
+
version = "0.19.0"
|
| 8 |
authors = [
|
| 9 |
{name = "Miles Cranmer", email = "miles.cranmer@gmail.com"},
|
| 10 |
]
|
| 11 |
description = "Simple and efficient symbolic regression"
|
| 12 |
readme = {file = "README.md", content-type = "text/markdown"}
|
| 13 |
license = {file = "LICENSE"}
|
| 14 |
+
requires-python = ">=3.8"
|
| 15 |
classifiers = [
|
| 16 |
"Programming Language :: Python :: 3",
|
| 17 |
"Operating System :: OS Independent",
|
|
|
|
| 29 |
|
| 30 |
[tool.isort]
|
| 31 |
profile = "black"
|
| 32 |
+
|
| 33 |
+
[tool.rye]
|
| 34 |
+
dev-dependencies = [
|
| 35 |
+
"pre-commit>=3.7.0",
|
| 36 |
+
"ipython>=8.23.0",
|
| 37 |
+
"ipykernel>=6.29.4",
|
| 38 |
+
"mypy>=1.10.0",
|
| 39 |
+
"jax[cpu]>=0.4.26",
|
| 40 |
+
"torch>=2.3.0",
|
| 41 |
+
"pandas-stubs>=2.2.1.240316",
|
| 42 |
+
"types-pytz>=2024.1.0.20240417",
|
| 43 |
+
"types-openpyxl>=3.1.0.20240428",
|
| 44 |
+
"coverage>=7.5.3",
|
| 45 |
+
]
|
pysr/denoising.py
CHANGED
|
@@ -1,8 +1,17 @@
|
|
| 1 |
"""Functions for denoising data during preprocessing."""
|
|
|
|
|
|
|
|
|
|
| 2 |
import numpy as np
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
-
def denoise(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""Denoise the dataset using a Gaussian process."""
|
| 7 |
from sklearn.gaussian_process import GaussianProcessRegressor
|
| 8 |
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
|
|
@@ -14,12 +23,17 @@ def denoise(X, y, Xresampled=None, random_state=None):
|
|
| 14 |
gpr.fit(X, y)
|
| 15 |
|
| 16 |
if Xresampled is not None:
|
| 17 |
-
return Xresampled, gpr.predict(Xresampled)
|
| 18 |
|
| 19 |
-
return X, gpr.predict(X)
|
| 20 |
|
| 21 |
|
| 22 |
-
def multi_denoise(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
"""Perform `denoise` along each column of `y` independently."""
|
| 24 |
y = np.stack(
|
| 25 |
[
|
|
|
|
| 1 |
"""Functions for denoising data during preprocessing."""
|
| 2 |
+
|
| 3 |
+
from typing import Optional, Tuple, cast
|
| 4 |
+
|
| 5 |
import numpy as np
|
| 6 |
+
from numpy import ndarray
|
| 7 |
|
| 8 |
|
| 9 |
+
def denoise(
|
| 10 |
+
X: ndarray,
|
| 11 |
+
y: ndarray,
|
| 12 |
+
Xresampled: Optional[ndarray] = None,
|
| 13 |
+
random_state: Optional[np.random.RandomState] = None,
|
| 14 |
+
) -> Tuple[ndarray, ndarray]:
|
| 15 |
"""Denoise the dataset using a Gaussian process."""
|
| 16 |
from sklearn.gaussian_process import GaussianProcessRegressor
|
| 17 |
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
|
|
|
|
| 23 |
gpr.fit(X, y)
|
| 24 |
|
| 25 |
if Xresampled is not None:
|
| 26 |
+
return Xresampled, cast(ndarray, gpr.predict(Xresampled))
|
| 27 |
|
| 28 |
+
return X, cast(ndarray, gpr.predict(X))
|
| 29 |
|
| 30 |
|
| 31 |
+
def multi_denoise(
|
| 32 |
+
X: ndarray,
|
| 33 |
+
y: ndarray,
|
| 34 |
+
Xresampled: Optional[ndarray] = None,
|
| 35 |
+
random_state: Optional[np.random.RandomState] = None,
|
| 36 |
+
):
|
| 37 |
"""Perform `denoise` along each column of `y` independently."""
|
| 38 |
y = np.stack(
|
| 39 |
[
|
pysr/deprecated.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
"""Various functions to deprecate features."""
|
|
|
|
| 2 |
import warnings
|
| 3 |
|
| 4 |
from .julia_import import jl
|
|
|
|
| 1 |
"""Various functions to deprecate features."""
|
| 2 |
+
|
| 3 |
import warnings
|
| 4 |
|
| 5 |
from .julia_import import jl
|
pysr/export_jax.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import sympy
|
| 2 |
|
| 3 |
# Special since need to reduce arguments.
|
|
@@ -55,7 +56,9 @@ def sympy2jaxtext(expr, parameters, symbols_in, extra_jax_mappings=None):
|
|
| 55 |
if issubclass(expr.func, sympy.Float):
|
| 56 |
parameters.append(float(expr))
|
| 57 |
return f"parameters[{len(parameters) - 1}]"
|
| 58 |
-
elif issubclass(expr.func, sympy.Rational)
|
|
|
|
|
|
|
| 59 |
return f"{float(expr)}"
|
| 60 |
elif issubclass(expr.func, sympy.Integer):
|
| 61 |
return f"{int(expr)}"
|
|
|
|
| 1 |
+
import numpy as np # noqa: F401
|
| 2 |
import sympy
|
| 3 |
|
| 4 |
# Special since need to reduce arguments.
|
|
|
|
| 56 |
if issubclass(expr.func, sympy.Float):
|
| 57 |
parameters.append(float(expr))
|
| 58 |
return f"parameters[{len(parameters) - 1}]"
|
| 59 |
+
elif issubclass(expr.func, sympy.Rational) or issubclass(
|
| 60 |
+
expr.func, sympy.NumberSymbol
|
| 61 |
+
):
|
| 62 |
return f"{float(expr)}"
|
| 63 |
elif issubclass(expr.func, sympy.Integer):
|
| 64 |
return f"{int(expr)}"
|
pysr/export_latex.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
"""Functions to help export PySR equations to LaTeX."""
|
|
|
|
| 2 |
from typing import List, Optional, Tuple
|
| 3 |
|
| 4 |
import pandas as pd
|
|
@@ -152,3 +153,15 @@ def sympy2multilatextable(
|
|
| 152 |
]
|
| 153 |
|
| 154 |
return "\n\n".join(latex_tables)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Functions to help export PySR equations to LaTeX."""
|
| 2 |
+
|
| 3 |
from typing import List, Optional, Tuple
|
| 4 |
|
| 5 |
import pandas as pd
|
|
|
|
| 153 |
]
|
| 154 |
|
| 155 |
return "\n\n".join(latex_tables)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def with_preamble(table_string: str) -> str:
|
| 159 |
+
preamble_string = [
|
| 160 |
+
r"\usepackage{breqn}",
|
| 161 |
+
r"\usepackage{booktabs}",
|
| 162 |
+
"",
|
| 163 |
+
"...",
|
| 164 |
+
"",
|
| 165 |
+
table_string,
|
| 166 |
+
]
|
| 167 |
+
return "\n".join(preamble_string)
|
pysr/export_numpy.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
| 1 |
"""Code for exporting discovered expressions to numpy"""
|
|
|
|
| 2 |
import warnings
|
|
|
|
| 3 |
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
-
from
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
|
@@ -13,6 +16,10 @@ def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
|
| 13 |
class CallableEquation:
|
| 14 |
"""Simple wrapper for numpy lambda functions built with sympy"""
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
def __init__(self, eqn, sympy_symbols, selection=None):
|
| 17 |
self._sympy = eqn
|
| 18 |
self._sympy_symbols = sympy_symbols
|
|
@@ -28,8 +35,9 @@ class CallableEquation:
|
|
| 28 |
return self._lambda(
|
| 29 |
**{k: X[k].values for k in map(str, self._sympy_symbols)}
|
| 30 |
) * np.ones(expected_shape)
|
|
|
|
| 31 |
if self._selection is not None:
|
| 32 |
-
if X.shape[1] !=
|
| 33 |
warnings.warn(
|
| 34 |
"`X` should be of shape (n_samples, len(self._selection)). "
|
| 35 |
"Automatically filtering `X` to selection. "
|
|
@@ -37,6 +45,7 @@ class CallableEquation:
|
|
| 37 |
"this may lead to incorrect predictions and other errors."
|
| 38 |
)
|
| 39 |
X = X[:, self._selection]
|
|
|
|
| 40 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
| 41 |
|
| 42 |
@property
|
|
|
|
| 1 |
"""Code for exporting discovered expressions to numpy"""
|
| 2 |
+
|
| 3 |
import warnings
|
| 4 |
+
from typing import List, Union
|
| 5 |
|
| 6 |
import numpy as np
|
| 7 |
import pandas as pd
|
| 8 |
+
from numpy.typing import NDArray
|
| 9 |
+
from sympy import Expr, Symbol, lambdify
|
| 10 |
|
| 11 |
|
| 12 |
def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
|
|
|
| 16 |
class CallableEquation:
|
| 17 |
"""Simple wrapper for numpy lambda functions built with sympy"""
|
| 18 |
|
| 19 |
+
_sympy: Expr
|
| 20 |
+
_sympy_symbols: List[Symbol]
|
| 21 |
+
_selection: Union[NDArray[np.bool_], None]
|
| 22 |
+
|
| 23 |
def __init__(self, eqn, sympy_symbols, selection=None):
|
| 24 |
self._sympy = eqn
|
| 25 |
self._sympy_symbols = sympy_symbols
|
|
|
|
| 35 |
return self._lambda(
|
| 36 |
**{k: X[k].values for k in map(str, self._sympy_symbols)}
|
| 37 |
) * np.ones(expected_shape)
|
| 38 |
+
|
| 39 |
if self._selection is not None:
|
| 40 |
+
if X.shape[1] != self._selection.sum():
|
| 41 |
warnings.warn(
|
| 42 |
"`X` should be of shape (n_samples, len(self._selection)). "
|
| 43 |
"Automatically filtering `X` to selection. "
|
|
|
|
| 45 |
"this may lead to incorrect predictions and other errors."
|
| 46 |
)
|
| 47 |
X = X[:, self._selection]
|
| 48 |
+
|
| 49 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
| 50 |
|
| 51 |
@property
|
pysr/export_sympy.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
| 1 |
"""Define utilities to export to sympy"""
|
|
|
|
| 2 |
from typing import Callable, Dict, List, Optional
|
| 3 |
|
| 4 |
import sympy
|
| 5 |
from sympy import sympify
|
| 6 |
|
|
|
|
|
|
|
| 7 |
sympy_mappings = {
|
| 8 |
"div": lambda x, y: x / y,
|
| 9 |
"mult": lambda x, y: x * y,
|
|
@@ -29,8 +32,8 @@ sympy_mappings = {
|
|
| 29 |
"acosh": lambda x: sympy.acosh(x),
|
| 30 |
"acosh_abs": lambda x: sympy.acosh(abs(x) + 1),
|
| 31 |
"asinh": sympy.asinh,
|
| 32 |
-
"atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1),
|
| 33 |
-
"atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1),
|
| 34 |
"abs": abs,
|
| 35 |
"mod": sympy.Mod,
|
| 36 |
"erf": sympy.erf,
|
|
@@ -50,6 +53,7 @@ sympy_mappings = {
|
|
| 50 |
"round": lambda x: sympy.ceiling(x - 0.5),
|
| 51 |
"max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)),
|
| 52 |
"min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)),
|
|
|
|
| 53 |
"cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)),
|
| 54 |
"logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)),
|
| 55 |
"logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)),
|
|
@@ -58,13 +62,13 @@ sympy_mappings = {
|
|
| 58 |
|
| 59 |
|
| 60 |
def create_sympy_symbols_map(
|
| 61 |
-
feature_names_in:
|
| 62 |
) -> Dict[str, sympy.Symbol]:
|
| 63 |
return {variable: sympy.Symbol(variable) for variable in feature_names_in}
|
| 64 |
|
| 65 |
|
| 66 |
def create_sympy_symbols(
|
| 67 |
-
feature_names_in:
|
| 68 |
) -> List[sympy.Symbol]:
|
| 69 |
return [sympy.Symbol(variable) for variable in feature_names_in]
|
| 70 |
|
|
@@ -72,7 +76,7 @@ def create_sympy_symbols(
|
|
| 72 |
def pysr2sympy(
|
| 73 |
equation: str,
|
| 74 |
*,
|
| 75 |
-
feature_names_in: Optional[
|
| 76 |
extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
|
| 77 |
):
|
| 78 |
if feature_names_in is None:
|
|
@@ -83,7 +87,12 @@ def pysr2sympy(
|
|
| 83 |
**sympy_mappings,
|
| 84 |
}
|
| 85 |
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
def assert_valid_sympy_symbol(var_name: str) -> None:
|
|
|
|
| 1 |
"""Define utilities to export to sympy"""
|
| 2 |
+
|
| 3 |
from typing import Callable, Dict, List, Optional
|
| 4 |
|
| 5 |
import sympy
|
| 6 |
from sympy import sympify
|
| 7 |
|
| 8 |
+
from .utils import ArrayLike
|
| 9 |
+
|
| 10 |
sympy_mappings = {
|
| 11 |
"div": lambda x, y: x / y,
|
| 12 |
"mult": lambda x, y: x * y,
|
|
|
|
| 32 |
"acosh": lambda x: sympy.acosh(x),
|
| 33 |
"acosh_abs": lambda x: sympy.acosh(abs(x) + 1),
|
| 34 |
"asinh": sympy.asinh,
|
| 35 |
+
"atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
|
| 36 |
+
"atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
|
| 37 |
"abs": abs,
|
| 38 |
"mod": sympy.Mod,
|
| 39 |
"erf": sympy.erf,
|
|
|
|
| 53 |
"round": lambda x: sympy.ceiling(x - 0.5),
|
| 54 |
"max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)),
|
| 55 |
"min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)),
|
| 56 |
+
"greater": lambda x, y: sympy.Piecewise((1.0, x > y), (0.0, True)),
|
| 57 |
"cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)),
|
| 58 |
"logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)),
|
| 59 |
"logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)),
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
def create_sympy_symbols_map(
|
| 65 |
+
feature_names_in: ArrayLike[str],
|
| 66 |
) -> Dict[str, sympy.Symbol]:
|
| 67 |
return {variable: sympy.Symbol(variable) for variable in feature_names_in}
|
| 68 |
|
| 69 |
|
| 70 |
def create_sympy_symbols(
|
| 71 |
+
feature_names_in: ArrayLike[str],
|
| 72 |
) -> List[sympy.Symbol]:
|
| 73 |
return [sympy.Symbol(variable) for variable in feature_names_in]
|
| 74 |
|
|
|
|
| 76 |
def pysr2sympy(
|
| 77 |
equation: str,
|
| 78 |
*,
|
| 79 |
+
feature_names_in: Optional[ArrayLike[str]] = None,
|
| 80 |
extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
|
| 81 |
):
|
| 82 |
if feature_names_in is None:
|
|
|
|
| 87 |
**sympy_mappings,
|
| 88 |
}
|
| 89 |
|
| 90 |
+
try:
|
| 91 |
+
return sympify(equation, locals=local_sympy_mappings, evaluate=False)
|
| 92 |
+
except TypeError as e:
|
| 93 |
+
if "got an unexpected keyword argument 'evaluate'" in str(e):
|
| 94 |
+
return sympify(equation, locals=local_sympy_mappings)
|
| 95 |
+
raise TypeError(f"Error processing equation '{equation}'") from e
|
| 96 |
|
| 97 |
|
| 98 |
def assert_valid_sympy_symbol(var_name: str) -> None:
|
pysr/export_torch.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
| 1 |
-
|
| 2 |
-
# From https://github.com/patrick-kidger/sympytorch
|
| 3 |
-
# Copied here to allow PySR-specific tweaks
|
| 4 |
-
#####
|
| 5 |
|
| 6 |
import collections as co
|
| 7 |
import functools as ft
|
| 8 |
|
|
|
|
| 9 |
import sympy
|
| 10 |
|
| 11 |
|
|
@@ -84,7 +82,7 @@ def _initialize_torch():
|
|
| 84 |
}
|
| 85 |
|
| 86 |
class _Node(torch.nn.Module):
|
| 87 |
-
"""
|
| 88 |
|
| 89 |
def __init__(self, *, expr, _memodict, _func_lookup, **kwargs):
|
| 90 |
super().__init__(**kwargs)
|
|
@@ -116,6 +114,11 @@ def _initialize_torch():
|
|
| 116 |
self._value = int(expr)
|
| 117 |
self._torch_func = lambda: self._value
|
| 118 |
self._args = ()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
elif issubclass(expr.func, sympy.Symbol):
|
| 120 |
self._name = expr.name
|
| 121 |
self._torch_func = lambda value: value
|
|
@@ -156,7 +159,7 @@ def _initialize_torch():
|
|
| 156 |
return self._torch_func(*args)
|
| 157 |
|
| 158 |
class _SingleSymPyModule(torch.nn.Module):
|
| 159 |
-
"""
|
| 160 |
|
| 161 |
def __init__(
|
| 162 |
self, expression, symbols_in, selection=None, extra_funcs=None, **kwargs
|
|
|
|
| 1 |
+
# Fork of https://github.com/patrick-kidger/sympytorch
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import collections as co
|
| 4 |
import functools as ft
|
| 5 |
|
| 6 |
+
import numpy as np # noqa: F401
|
| 7 |
import sympy
|
| 8 |
|
| 9 |
|
|
|
|
| 82 |
}
|
| 83 |
|
| 84 |
class _Node(torch.nn.Module):
|
| 85 |
+
"""Forked from https://github.com/patrick-kidger/sympytorch"""
|
| 86 |
|
| 87 |
def __init__(self, *, expr, _memodict, _func_lookup, **kwargs):
|
| 88 |
super().__init__(**kwargs)
|
|
|
|
| 114 |
self._value = int(expr)
|
| 115 |
self._torch_func = lambda: self._value
|
| 116 |
self._args = ()
|
| 117 |
+
elif issubclass(expr.func, sympy.NumberSymbol):
|
| 118 |
+
# Can get here from exp(1) or exact pi
|
| 119 |
+
self._value = float(expr)
|
| 120 |
+
self._torch_func = lambda: self._value
|
| 121 |
+
self._args = ()
|
| 122 |
elif issubclass(expr.func, sympy.Symbol):
|
| 123 |
self._name = expr.name
|
| 124 |
self._torch_func = lambda value: value
|
|
|
|
| 159 |
return self._torch_func(*args)
|
| 160 |
|
| 161 |
class _SingleSymPyModule(torch.nn.Module):
|
| 162 |
+
"""Forked from https://github.com/patrick-kidger/sympytorch"""
|
| 163 |
|
| 164 |
def __init__(
|
| 165 |
self, expression, symbols_in, selection=None, extra_funcs=None, **kwargs
|
pysr/feature_selection.py
CHANGED
|
@@ -1,8 +1,20 @@
|
|
| 1 |
"""Functions for doing feature selection during preprocessing."""
|
|
|
|
|
|
|
|
|
|
| 2 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
-
def run_feature_selection(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
Find most important features.
|
| 8 |
|
|
@@ -20,11 +32,16 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
|
|
| 20 |
selector = SelectFromModel(
|
| 21 |
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
|
| 22 |
)
|
| 23 |
-
return selector.get_support(indices=
|
| 24 |
|
| 25 |
|
| 26 |
# Function has not been removed only due to usage in module tests
|
| 27 |
-
def _handle_feature_selection(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
if select_k_features is not None:
|
| 29 |
selection = run_feature_selection(X, y, select_k_features)
|
| 30 |
print(f"Using features {[variable_names[i] for i in selection]}")
|
|
|
|
| 1 |
"""Functions for doing feature selection during preprocessing."""
|
| 2 |
+
|
| 3 |
+
from typing import Optional, cast
|
| 4 |
+
|
| 5 |
import numpy as np
|
| 6 |
+
from numpy import ndarray
|
| 7 |
+
from numpy.typing import NDArray
|
| 8 |
+
|
| 9 |
+
from .utils import ArrayLike
|
| 10 |
|
| 11 |
|
| 12 |
+
def run_feature_selection(
|
| 13 |
+
X: ndarray,
|
| 14 |
+
y: ndarray,
|
| 15 |
+
select_k_features: int,
|
| 16 |
+
random_state: Optional[np.random.RandomState] = None,
|
| 17 |
+
) -> NDArray[np.bool_]:
|
| 18 |
"""
|
| 19 |
Find most important features.
|
| 20 |
|
|
|
|
| 32 |
selector = SelectFromModel(
|
| 33 |
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
|
| 34 |
)
|
| 35 |
+
return cast(NDArray[np.bool_], selector.get_support(indices=False))
|
| 36 |
|
| 37 |
|
| 38 |
# Function has not been removed only due to usage in module tests
|
| 39 |
+
def _handle_feature_selection(
|
| 40 |
+
X: ndarray,
|
| 41 |
+
select_k_features: Optional[int],
|
| 42 |
+
y: ndarray,
|
| 43 |
+
variable_names: ArrayLike[str],
|
| 44 |
+
):
|
| 45 |
if select_k_features is not None:
|
| 46 |
selection = run_feature_selection(X, y, select_k_features)
|
| 47 |
print(f"Using features {[variable_names[i] for i in selection]}")
|
pysr/julia_helpers.py
CHANGED
|
@@ -1,11 +1,16 @@
|
|
| 1 |
"""Functions for initializing the Julia environment and installing deps."""
|
| 2 |
|
|
|
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from juliacall import convert as jl_convert # type: ignore
|
|
|
|
| 5 |
|
| 6 |
from .deprecated import init_julia, install
|
| 7 |
from .julia_import import jl
|
| 8 |
|
|
|
|
|
|
|
| 9 |
jl.seval("using Serialization: Serialization")
|
| 10 |
jl.seval("using PythonCall: PythonCall")
|
| 11 |
|
|
@@ -22,24 +27,31 @@ def _escape_filename(filename):
|
|
| 22 |
return str_repr
|
| 23 |
|
| 24 |
|
| 25 |
-
def _load_cluster_manager(cluster_manager):
|
| 26 |
jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
|
| 27 |
return jl.seval(f"addprocs_{cluster_manager}")
|
| 28 |
|
| 29 |
|
| 30 |
-
def jl_array(x):
|
| 31 |
if x is None:
|
| 32 |
return None
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
-
def jl_serialize(obj):
|
| 37 |
buf = jl.IOBuffer()
|
| 38 |
Serialization.serialize(buf, obj)
|
| 39 |
return np.array(jl.take_b(buf))
|
| 40 |
|
| 41 |
|
| 42 |
-
def jl_deserialize(s):
|
| 43 |
if s is None:
|
| 44 |
return s
|
| 45 |
buf = jl.IOBuffer()
|
|
|
|
| 1 |
"""Functions for initializing the Julia environment and installing deps."""
|
| 2 |
|
| 3 |
+
from typing import Any, Callable, Union, cast
|
| 4 |
+
|
| 5 |
import numpy as np
|
| 6 |
from juliacall import convert as jl_convert # type: ignore
|
| 7 |
+
from numpy.typing import NDArray
|
| 8 |
|
| 9 |
from .deprecated import init_julia, install
|
| 10 |
from .julia_import import jl
|
| 11 |
|
| 12 |
+
jl_convert = cast(Callable[[Any, Any], Any], jl_convert)
|
| 13 |
+
|
| 14 |
jl.seval("using Serialization: Serialization")
|
| 15 |
jl.seval("using PythonCall: PythonCall")
|
| 16 |
|
|
|
|
| 27 |
return str_repr
|
| 28 |
|
| 29 |
|
| 30 |
+
def _load_cluster_manager(cluster_manager: str):
|
| 31 |
jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
|
| 32 |
return jl.seval(f"addprocs_{cluster_manager}")
|
| 33 |
|
| 34 |
|
| 35 |
+
def jl_array(x, dtype=None):
|
| 36 |
if x is None:
|
| 37 |
return None
|
| 38 |
+
elif dtype is None:
|
| 39 |
+
return jl_convert(jl.Array, x)
|
| 40 |
+
else:
|
| 41 |
+
return jl_convert(jl.Array[dtype], x)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def jl_is_function(f) -> bool:
|
| 45 |
+
return cast(bool, jl.seval("op -> op isa Function")(f))
|
| 46 |
|
| 47 |
|
| 48 |
+
def jl_serialize(obj: Any) -> NDArray[np.uint8]:
|
| 49 |
buf = jl.IOBuffer()
|
| 50 |
Serialization.serialize(buf, obj)
|
| 51 |
return np.array(jl.take_b(buf))
|
| 52 |
|
| 53 |
|
| 54 |
+
def jl_deserialize(s: Union[NDArray[np.uint8], None]):
|
| 55 |
if s is None:
|
| 56 |
return s
|
| 57 |
buf = jl.IOBuffer()
|
pysr/julia_import.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import warnings
|
|
|
|
|
|
|
| 4 |
|
| 5 |
# Check if JuliaCall is already loaded, and if so, warn the user
|
| 6 |
# about the relevant environment variables. If not loaded,
|
|
@@ -35,31 +37,17 @@ else:
|
|
| 35 |
os.environ[k] = os.environ.get(k, default)
|
| 36 |
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
| 41 |
|
| 42 |
-
|
| 43 |
-
autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS", "yes")
|
| 44 |
-
if autoload_extensions in {"yes", ""} and jl_version >= (1, 9, 0):
|
| 45 |
-
try:
|
| 46 |
-
get_ipython = sys.modules["IPython"].get_ipython
|
| 47 |
|
| 48 |
-
|
| 49 |
-
raise ImportError("console")
|
| 50 |
|
| 51 |
-
print(
|
| 52 |
-
"Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable."
|
| 53 |
-
)
|
| 54 |
|
| 55 |
-
|
| 56 |
-
get_ipython().run_line_magic("load_ext", "juliacall")
|
| 57 |
-
except Exception:
|
| 58 |
-
pass
|
| 59 |
-
elif autoload_extensions not in {"no", "yes", ""}:
|
| 60 |
-
warnings.warn(
|
| 61 |
-
"PYSR_AUTOLOAD_EXTENSIONS environment variable is set to something other than 'yes' or 'no' or ''."
|
| 62 |
-
)
|
| 63 |
|
| 64 |
jl.seval("using SymbolicRegression")
|
| 65 |
SymbolicRegression = jl.SymbolicRegression
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import warnings
|
| 4 |
+
from types import ModuleType
|
| 5 |
+
from typing import cast
|
| 6 |
|
| 7 |
# Check if JuliaCall is already loaded, and if so, warn the user
|
| 8 |
# about the relevant environment variables. If not loaded,
|
|
|
|
| 37 |
os.environ[k] = os.environ.get(k, default)
|
| 38 |
|
| 39 |
|
| 40 |
+
autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS")
|
| 41 |
+
if autoload_extensions is not None:
|
| 42 |
+
# Deprecated; so just pass to juliacall
|
| 43 |
+
os.environ["PYTHON_JULIACALL_AUTOLOAD_IPYTHON_EXTENSION"] = autoload_extensions
|
| 44 |
|
| 45 |
+
from juliacall import Main as jl # type: ignore
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
jl = cast(ModuleType, jl)
|
|
|
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
jl.seval("using SymbolicRegression")
|
| 53 |
SymbolicRegression = jl.SymbolicRegression
|
pysr/juliapkg.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"packages": {
|
| 4 |
"SymbolicRegression": {
|
| 5 |
"uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
|
| 6 |
-
"version": "=0.24.
|
| 7 |
},
|
| 8 |
"Serialization": {
|
| 9 |
"uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
|
|
|
|
| 3 |
"packages": {
|
| 4 |
"SymbolicRegression": {
|
| 5 |
"uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
|
| 6 |
+
"version": "=0.24.5"
|
| 7 |
},
|
| 8 |
"Serialization": {
|
| 9 |
"uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
|
pysr/param_groupings.yml
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
- loss_function
|
| 15 |
- model_selection
|
| 16 |
- dimensional_constraint_penalty
|
|
|
|
| 17 |
- Working with Complexities:
|
| 18 |
- parsimony
|
| 19 |
- constraints
|
|
|
|
| 14 |
- loss_function
|
| 15 |
- model_selection
|
| 16 |
- dimensional_constraint_penalty
|
| 17 |
+
- dimensionless_constants_only
|
| 18 |
- Working with Complexities:
|
| 19 |
- parsimony
|
| 20 |
- constraints
|
pysr/sklearn_monkeypatch.py
CHANGED
|
@@ -3,8 +3,7 @@
|
|
| 3 |
from sklearn.utils import validation
|
| 4 |
|
| 5 |
|
| 6 |
-
def _ensure_no_complex_data(*args, **kwargs):
|
| 7 |
-
...
|
| 8 |
|
| 9 |
|
| 10 |
try:
|
|
|
|
| 3 |
from sklearn.utils import validation
|
| 4 |
|
| 5 |
|
| 6 |
+
def _ensure_no_complex_data(*args, **kwargs): ...
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
try:
|
pysr/sr.py
CHANGED
|
@@ -8,27 +8,31 @@ import shutil
|
|
| 8 |
import sys
|
| 9 |
import tempfile
|
| 10 |
import warnings
|
|
|
|
| 11 |
from datetime import datetime
|
| 12 |
from io import StringIO
|
| 13 |
from multiprocessing import cpu_count
|
| 14 |
from pathlib import Path
|
| 15 |
-
from typing import Callable, Dict, List, Optional, Tuple, Union
|
| 16 |
-
|
| 17 |
-
if sys.version_info >= (3, 8):
|
| 18 |
-
from typing import Literal
|
| 19 |
-
else:
|
| 20 |
-
from typing_extensions import Literal
|
| 21 |
|
| 22 |
import numpy as np
|
| 23 |
import pandas as pd
|
|
|
|
|
|
|
| 24 |
from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
|
| 25 |
from sklearn.utils import check_array, check_consistent_length, check_random_state
|
| 26 |
-
from sklearn.utils.validation import _check_feature_names_in
|
|
|
|
| 27 |
|
| 28 |
from .denoising import denoise, multi_denoise
|
| 29 |
from .deprecated import DEPRECATED_KWARGS
|
| 30 |
from .export_jax import sympy2jax
|
| 31 |
-
from .export_latex import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
from .export_numpy import sympy2numpy
|
| 33 |
from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
|
| 34 |
from .export_torch import sympy2torch
|
|
@@ -40,17 +44,21 @@ from .julia_helpers import (
|
|
| 40 |
_load_cluster_manager,
|
| 41 |
jl_array,
|
| 42 |
jl_deserialize,
|
|
|
|
| 43 |
jl_serialize,
|
| 44 |
)
|
| 45 |
from .julia_import import SymbolicRegression, jl
|
| 46 |
from .utils import (
|
|
|
|
|
|
|
| 47 |
_csv_filename_to_pkl_filename,
|
| 48 |
_preprocess_julia_floats,
|
| 49 |
_safe_check_feature_names_in,
|
| 50 |
_subscriptify,
|
|
|
|
| 51 |
)
|
| 52 |
|
| 53 |
-
|
| 54 |
|
| 55 |
|
| 56 |
def _process_constraints(binary_operators, unary_operators, constraints):
|
|
@@ -113,7 +121,7 @@ def _maybe_create_inline_operators(
|
|
| 113 |
"and underscores are allowed."
|
| 114 |
)
|
| 115 |
if (extra_sympy_mappings is None) or (
|
| 116 |
-
not
|
| 117 |
):
|
| 118 |
raise ValueError(
|
| 119 |
f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
|
|
@@ -130,6 +138,7 @@ def _check_assertions(
|
|
| 130 |
X,
|
| 131 |
use_custom_variable_names,
|
| 132 |
variable_names,
|
|
|
|
| 133 |
weights,
|
| 134 |
y,
|
| 135 |
X_units,
|
|
@@ -154,6 +163,13 @@ def _check_assertions(
|
|
| 154 |
"and underscores are allowed."
|
| 155 |
)
|
| 156 |
assert_valid_sympy_symbol(var_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
if X_units is not None and len(X_units) != X.shape[1]:
|
| 158 |
raise ValueError(
|
| 159 |
"The number of units in `X_units` must equal the number of features in `X`."
|
|
@@ -178,6 +194,21 @@ def _check_assertions(
|
|
| 178 |
VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
|
| 179 |
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
| 182 |
"""
|
| 183 |
High-performance symbolic regression algorithm.
|
|
@@ -309,7 +340,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 309 |
`idx` argument to the function, which is `nothing`
|
| 310 |
for non-batched, and a 1D array of indices for batched.
|
| 311 |
Default is `None`.
|
| 312 |
-
complexity_of_operators : dict[str, float]
|
| 313 |
If you would like to use a complexity other than 1 for an
|
| 314 |
operator, specify the complexity here. For example,
|
| 315 |
`{"sin": 2, "+": 1}` would give a complexity of 2 for each use
|
|
@@ -318,16 +349,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 318 |
numbers for a complexity, and the total complexity of a tree
|
| 319 |
will be rounded to the nearest integer after computing.
|
| 320 |
Default is `None`.
|
| 321 |
-
complexity_of_constants : float
|
| 322 |
Complexity of constants. Default is `1`.
|
| 323 |
-
complexity_of_variables : float
|
| 324 |
-
|
|
|
|
|
|
|
|
|
|
| 325 |
parsimony : float
|
| 326 |
Multiplicative factor for how much to punish complexity.
|
| 327 |
Default is `0.0032`.
|
| 328 |
dimensional_constraint_penalty : float
|
| 329 |
Additive penalty for if dimensional analysis of an expression fails.
|
| 330 |
By default, this is `1000.0`.
|
|
|
|
|
|
|
|
|
|
| 331 |
use_frequency : bool
|
| 332 |
Whether to measure the frequency of complexities, and use that
|
| 333 |
instead of parsimony to explore equation space. Will naturally
|
|
@@ -603,22 +640,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 603 |
Units of each variable in the training dataset, `y`.
|
| 604 |
nout_ : int
|
| 605 |
Number of output dimensions.
|
| 606 |
-
selection_mask_ :
|
| 607 |
-
|
| 608 |
-
`select_k_features` is set.
|
| 609 |
tempdir_ : Path
|
| 610 |
Path to the temporary equations directory.
|
| 611 |
-
equation_file_ : str
|
| 612 |
Output equation file name produced by the julia backend.
|
| 613 |
julia_state_stream_ : ndarray
|
| 614 |
The serialized state for the julia SymbolicRegression.jl backend (after fitting),
|
| 615 |
stored as an array of uint8, produced by Julia's Serialization.serialize function.
|
| 616 |
-
julia_state_
|
| 617 |
-
The deserialized state.
|
| 618 |
julia_options_stream_ : ndarray
|
| 619 |
The serialized julia options, stored as an array of uint8,
|
| 620 |
-
julia_options_
|
| 621 |
-
The deserialized julia options.
|
| 622 |
equation_file_contents_ : list[pandas.DataFrame]
|
| 623 |
Contents of the equation file output by the Julia backend.
|
| 624 |
show_pickle_warnings_ : bool
|
|
@@ -665,6 +697,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 665 |
```
|
| 666 |
"""
|
| 667 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
def __init__(
|
| 669 |
self,
|
| 670 |
model_selection: Literal["best", "accuracy", "score"] = "best",
|
|
@@ -685,9 +733,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 685 |
loss_function: Optional[str] = None,
|
| 686 |
complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
|
| 687 |
complexity_of_constants: Union[int, float] = 1,
|
| 688 |
-
complexity_of_variables: Union[int, float] =
|
| 689 |
parsimony: float = 0.0032,
|
| 690 |
dimensional_constraint_penalty: Optional[float] = None,
|
|
|
|
| 691 |
use_frequency: bool = True,
|
| 692 |
use_frequency_in_tournament: bool = True,
|
| 693 |
adaptive_parsimony_scaling: float = 20.0,
|
|
@@ -783,6 +832,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 783 |
self.complexity_of_variables = complexity_of_variables
|
| 784 |
self.parsimony = parsimony
|
| 785 |
self.dimensional_constraint_penalty = dimensional_constraint_penalty
|
|
|
|
| 786 |
self.use_frequency = use_frequency
|
| 787 |
self.use_frequency_in_tournament = use_frequency_in_tournament
|
| 788 |
self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
|
|
@@ -863,15 +913,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 863 |
updated_kwarg_name = DEPRECATED_KWARGS[k]
|
| 864 |
setattr(self, updated_kwarg_name, v)
|
| 865 |
warnings.warn(
|
| 866 |
-
f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
|
| 867 |
"Please use that instead.",
|
| 868 |
FutureWarning,
|
| 869 |
)
|
| 870 |
# Handle kwargs that have been moved to the fit method
|
| 871 |
elif k in ["weights", "variable_names", "Xresampled"]:
|
| 872 |
warnings.warn(
|
| 873 |
-
f"{k} is a data
|
| 874 |
-
f"Ignoring parameter; please pass {k} during the call to fit instead.",
|
| 875 |
FutureWarning,
|
| 876 |
)
|
| 877 |
elif k == "julia_project":
|
|
@@ -888,21 +938,25 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 888 |
FutureWarning,
|
| 889 |
)
|
| 890 |
else:
|
| 891 |
-
|
| 892 |
-
|
|
|
|
| 893 |
)
|
|
|
|
|
|
|
|
|
|
| 894 |
|
| 895 |
@classmethod
|
| 896 |
def from_file(
|
| 897 |
cls,
|
| 898 |
-
equation_file,
|
| 899 |
*,
|
| 900 |
-
binary_operators=None,
|
| 901 |
-
unary_operators=None,
|
| 902 |
-
n_features_in=None,
|
| 903 |
-
feature_names_in=None,
|
| 904 |
-
selection_mask=None,
|
| 905 |
-
nout=1,
|
| 906 |
verbosity=1,
|
| 907 |
**pysr_kwargs,
|
| 908 |
):
|
|
@@ -911,7 +965,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 911 |
|
| 912 |
Parameters
|
| 913 |
----------
|
| 914 |
-
equation_file : str
|
| 915 |
Path to a pickle file containing a saved model, or a csv file
|
| 916 |
containing equations.
|
| 917 |
binary_operators : list[str]
|
|
@@ -926,8 +980,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 926 |
feature_names_in : list[str]
|
| 927 |
Names of the features passed to the model.
|
| 928 |
Not needed if loading from a pickle file.
|
| 929 |
-
selection_mask :
|
| 930 |
-
If using select_k_features
|
| 931 |
Not needed if loading from a pickle file.
|
| 932 |
nout : int
|
| 933 |
Number of outputs of the model.
|
|
@@ -983,7 +1037,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 983 |
|
| 984 |
# TODO: copy .bkup file if exists.
|
| 985 |
model = cls(
|
| 986 |
-
equation_file=equation_file,
|
| 987 |
binary_operators=binary_operators,
|
| 988 |
unary_operators=unary_operators,
|
| 989 |
**pysr_kwargs,
|
|
@@ -1003,7 +1057,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1003 |
model.display_feature_names_in_ = feature_names_in
|
| 1004 |
|
| 1005 |
if selection_mask is None:
|
| 1006 |
-
model.selection_mask_ = np.ones(n_features_in, dtype=
|
| 1007 |
else:
|
| 1008 |
model.selection_mask_ = selection_mask
|
| 1009 |
|
|
@@ -1030,7 +1084,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1030 |
all_equations = equations
|
| 1031 |
|
| 1032 |
for i, equations in enumerate(all_equations):
|
| 1033 |
-
selected = [""
|
| 1034 |
chosen_row = idx_model_selection(equations, self.model_selection)
|
| 1035 |
selected[chosen_row] = ">>>>"
|
| 1036 |
repr_equations = pd.DataFrame(
|
|
@@ -1063,15 +1117,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1063 |
Handle pickle serialization for PySRRegressor.
|
| 1064 |
|
| 1065 |
The Scikit-learn standard requires estimators to be serializable via
|
| 1066 |
-
`pickle.dumps()`. However,
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
Thus, for `PySRRegressor` to support pickle serialization, the
|
| 1070 |
-
`julia_state_stream_` attribute must be hidden from pickle. This will
|
| 1071 |
-
prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
|
| 1072 |
-
but does allow all other attributes of a fitted `PySRRegressor` estimator
|
| 1073 |
-
to be serialized. Note: Jax and Torch format equations are also removed
|
| 1074 |
-
from the pickled instance.
|
| 1075 |
"""
|
| 1076 |
state = self.__dict__
|
| 1077 |
show_pickle_warning = not (
|
|
@@ -1137,10 +1184,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1137 |
|
| 1138 |
@property
|
| 1139 |
def julia_options_(self):
|
|
|
|
| 1140 |
return jl_deserialize(self.julia_options_stream_)
|
| 1141 |
|
| 1142 |
@property
|
| 1143 |
def julia_state_(self):
|
|
|
|
| 1144 |
return jl_deserialize(self.julia_state_stream_)
|
| 1145 |
|
| 1146 |
@property
|
|
@@ -1153,7 +1202,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1153 |
)
|
| 1154 |
return self.julia_state_
|
| 1155 |
|
| 1156 |
-
def get_best(self, index=None):
|
| 1157 |
"""
|
| 1158 |
Get best equation using `model_selection`.
|
| 1159 |
|
|
@@ -1176,8 +1225,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1176 |
Raised when an invalid model selection strategy is provided.
|
| 1177 |
"""
|
| 1178 |
check_is_fitted(self, attributes=["equations_"])
|
| 1179 |
-
if self.equations_ is None:
|
| 1180 |
-
raise ValueError("No equations have been generated yet.")
|
| 1181 |
|
| 1182 |
if index is not None:
|
| 1183 |
if isinstance(self.equations_, list):
|
|
@@ -1185,16 +1232,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1185 |
index, list
|
| 1186 |
), "With multiple output features, index must be a list."
|
| 1187 |
return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
|
| 1188 |
-
|
|
|
|
|
|
|
| 1189 |
|
| 1190 |
if isinstance(self.equations_, list):
|
| 1191 |
return [
|
| 1192 |
-
eq.
|
| 1193 |
for eq in self.equations_
|
| 1194 |
]
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
|
|
|
|
|
|
|
|
|
|
| 1198 |
|
| 1199 |
def _setup_equation_file(self):
|
| 1200 |
"""
|
|
@@ -1219,7 +1271,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1219 |
self.equation_file_ = self.equation_file
|
| 1220 |
self.equation_file_contents_ = None
|
| 1221 |
|
| 1222 |
-
def
|
| 1223 |
"""
|
| 1224 |
Ensure parameters passed at initialization are valid.
|
| 1225 |
|
|
@@ -1277,59 +1329,57 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1277 |
f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
|
| 1278 |
)
|
| 1279 |
|
| 1280 |
-
|
| 1281 |
-
|
| 1282 |
-
|
| 1283 |
-
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
-
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
parameter_value = default_value
|
| 1298 |
else:
|
| 1299 |
-
#
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
)
|
| 1303 |
-
|
| 1304 |
-
elif parameter == "batch_size" and parameter_value < 1:
|
| 1305 |
-
warnings.warn(
|
| 1306 |
-
"Given `batch_size` must be greater than or equal to one. "
|
| 1307 |
-
"`batch_size` has been increased to equal one."
|
| 1308 |
-
)
|
| 1309 |
-
parameter_value = 1
|
| 1310 |
-
elif (
|
| 1311 |
-
parameter == "progress"
|
| 1312 |
-
and parameter_value
|
| 1313 |
-
and "buffer" not in sys.stdout.__dir__()
|
| 1314 |
-
):
|
| 1315 |
-
warnings.warn(
|
| 1316 |
-
"Note: it looks like you are running in Jupyter. "
|
| 1317 |
-
"The progress bar will be turned off."
|
| 1318 |
-
)
|
| 1319 |
-
parameter_value = False
|
| 1320 |
-
packed_modified_params[parameter] = parameter_value
|
| 1321 |
|
| 1322 |
assert (
|
| 1323 |
-
len(
|
| 1324 |
-
|
| 1325 |
-
|
| 1326 |
-
)
|
| 1327 |
|
| 1328 |
-
return
|
| 1329 |
|
| 1330 |
def _validate_and_set_fit_params(
|
| 1331 |
-
self,
|
| 1332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1333 |
"""
|
| 1334 |
Validate the parameters passed to the :term`fit` method.
|
| 1335 |
|
|
@@ -1349,12 +1399,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1349 |
Weight array of the same shape as `y`.
|
| 1350 |
Each element is how to weight the mean-square-error loss
|
| 1351 |
for that particular element of y.
|
| 1352 |
-
variable_names :
|
| 1353 |
-
Names of each
|
|
|
|
|
|
|
| 1354 |
X_units : list[str] of length n_features
|
| 1355 |
-
Units of each
|
| 1356 |
y_units : str | list[str] of length n_out
|
| 1357 |
-
Units of each
|
| 1358 |
|
| 1359 |
Returns
|
| 1360 |
-------
|
|
@@ -1398,6 +1450,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1398 |
"Please use valid names instead."
|
| 1399 |
)
|
| 1400 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1401 |
# Data validation and feature name fetching via sklearn
|
| 1402 |
# This method sets the n_features_in_ attribute
|
| 1403 |
if Xresampled is not None:
|
|
@@ -1405,7 +1473,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1405 |
if weights is not None:
|
| 1406 |
weights = check_array(weights, ensure_2d=False)
|
| 1407 |
check_consistent_length(weights, y)
|
| 1408 |
-
X, y = self.
|
| 1409 |
self.feature_names_in_ = _safe_check_feature_names_in(
|
| 1410 |
self, variable_names, generate_names=False
|
| 1411 |
)
|
|
@@ -1415,10 +1483,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1415 |
self.display_feature_names_in_ = np.array(
|
| 1416 |
[f"x{_subscriptify(i)}" for i in range(X.shape[1])]
|
| 1417 |
)
|
|
|
|
| 1418 |
else:
|
| 1419 |
self.display_feature_names_in_ = self.feature_names_in_
|
| 1420 |
-
|
| 1421 |
-
variable_names = self.feature_names_in_
|
| 1422 |
|
| 1423 |
# Handle multioutput data
|
| 1424 |
if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
|
|
@@ -1428,13 +1496,39 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1428 |
else:
|
| 1429 |
raise NotImplementedError("y shape not supported!")
|
| 1430 |
|
|
|
|
| 1431 |
self.X_units_ = copy.deepcopy(X_units)
|
| 1432 |
self.y_units_ = copy.deepcopy(y_units)
|
| 1433 |
|
| 1434 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1435 |
|
| 1436 |
def _pre_transform_training_data(
|
| 1437 |
-
self,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1438 |
):
|
| 1439 |
"""
|
| 1440 |
Transform the training data before fitting the symbolic regressor.
|
|
@@ -1443,17 +1537,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1443 |
|
| 1444 |
Parameters
|
| 1445 |
----------
|
| 1446 |
-
X : ndarray
|
| 1447 |
Training data of shape (n_samples, n_features).
|
| 1448 |
-
y : ndarray
|
| 1449 |
Target values of shape (n_samples,) or (n_samples, n_targets).
|
| 1450 |
Will be cast to X's dtype if necessary.
|
| 1451 |
-
Xresampled : ndarray |
|
| 1452 |
Resampled training data, of shape `(n_resampled, n_features)`,
|
| 1453 |
used for denoising.
|
| 1454 |
variable_names : list[str]
|
| 1455 |
Names of each variable in the training dataset, `X`.
|
| 1456 |
Of length `n_features`.
|
|
|
|
|
|
|
| 1457 |
X_units : list[str]
|
| 1458 |
Units of each variable in the training dataset, `X`.
|
| 1459 |
y_units : str | list[str]
|
|
@@ -1486,24 +1582,43 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1486 |
"""
|
| 1487 |
# Feature selection transformation
|
| 1488 |
if self.select_k_features:
|
| 1489 |
-
|
| 1490 |
X, y, self.select_k_features, random_state=random_state
|
| 1491 |
)
|
| 1492 |
-
X = X[:,
|
| 1493 |
|
| 1494 |
if Xresampled is not None:
|
| 1495 |
-
Xresampled = Xresampled[:,
|
| 1496 |
|
| 1497 |
# Reduce variable_names to selection
|
| 1498 |
-
variable_names =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1499 |
|
| 1500 |
if X_units is not None:
|
| 1501 |
-
X_units =
|
|
|
|
|
|
|
|
|
|
| 1502 |
self.X_units_ = copy.deepcopy(X_units)
|
| 1503 |
|
| 1504 |
# Re-perform data validation and feature name updating
|
| 1505 |
-
X, y = self.
|
| 1506 |
# Update feature names with selected variable names
|
|
|
|
| 1507 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
| 1508 |
self.display_feature_names_in_ = self.feature_names_in_
|
| 1509 |
print(f"Using features {self.feature_names_in_}")
|
|
@@ -1517,22 +1632,29 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1517 |
else:
|
| 1518 |
X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
|
| 1519 |
|
| 1520 |
-
return X, y, variable_names, X_units, y_units
|
| 1521 |
|
| 1522 |
-
def _run(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1523 |
"""
|
| 1524 |
Run the symbolic regression fitting process on the julia backend.
|
| 1525 |
|
| 1526 |
Parameters
|
| 1527 |
----------
|
| 1528 |
-
X : ndarray
|
| 1529 |
Training data of shape `(n_samples, n_features)`.
|
| 1530 |
-
y : ndarray
|
| 1531 |
Target values of shape `(n_samples,)` or `(n_samples, n_targets)`.
|
| 1532 |
Will be cast to `X`'s dtype if necessary.
|
| 1533 |
-
|
| 1534 |
-
|
| 1535 |
-
weights : ndarray |
|
| 1536 |
Weight array of the same shape as `y`.
|
| 1537 |
Each element is how to weight the mean-square-error loss
|
| 1538 |
for that particular element of y.
|
|
@@ -1551,24 +1673,27 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1551 |
"""
|
| 1552 |
# Need to be global as we don't want to recreate/reinstate julia for
|
| 1553 |
# every new instance of PySRRegressor
|
| 1554 |
-
global
|
| 1555 |
|
| 1556 |
# These are the parameters which may be modified from the ones
|
| 1557 |
# specified in init, so we define them here locally:
|
| 1558 |
-
binary_operators =
|
| 1559 |
-
unary_operators =
|
| 1560 |
-
maxdepth =
|
| 1561 |
-
constraints =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1562 |
nested_constraints = self.nested_constraints
|
| 1563 |
complexity_of_operators = self.complexity_of_operators
|
| 1564 |
-
|
| 1565 |
cluster_manager = self.cluster_manager
|
| 1566 |
-
batch_size = mutated_params["batch_size"]
|
| 1567 |
-
update_verbosity = mutated_params["update_verbosity"]
|
| 1568 |
-
progress = mutated_params["progress"]
|
| 1569 |
|
| 1570 |
# Start julia backend processes
|
| 1571 |
-
if not
|
| 1572 |
print("Compiling Julia backend...")
|
| 1573 |
|
| 1574 |
if cluster_manager is not None:
|
|
@@ -1607,6 +1732,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1607 |
complexity_of_operators_str += f"({k}) => {v}, "
|
| 1608 |
complexity_of_operators_str += ")"
|
| 1609 |
complexity_of_operators = jl.seval(complexity_of_operators_str)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1610 |
|
| 1611 |
custom_loss = jl.seval(
|
| 1612 |
str(self.elementwise_loss)
|
|
@@ -1643,16 +1772,30 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1643 |
optimize=self.weight_optimize,
|
| 1644 |
)
|
| 1645 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1646 |
# Call to Julia backend.
|
| 1647 |
# See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
|
| 1648 |
options = SymbolicRegression.Options(
|
| 1649 |
-
binary_operators=
|
| 1650 |
-
unary_operators=
|
| 1651 |
bin_constraints=jl_array(bin_constraints),
|
| 1652 |
una_constraints=jl_array(una_constraints),
|
| 1653 |
complexity_of_operators=complexity_of_operators,
|
| 1654 |
complexity_of_constants=self.complexity_of_constants,
|
| 1655 |
-
complexity_of_variables=
|
| 1656 |
nested_constraints=nested_constraints,
|
| 1657 |
elementwise_loss=custom_loss,
|
| 1658 |
loss_function=custom_full_objective,
|
|
@@ -1667,6 +1810,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1667 |
# These have the same name:
|
| 1668 |
parsimony=self.parsimony,
|
| 1669 |
dimensional_constraint_penalty=self.dimensional_constraint_penalty,
|
|
|
|
| 1670 |
alpha=self.alpha,
|
| 1671 |
maxdepth=maxdepth,
|
| 1672 |
fast_cycle=self.fast_cycle,
|
|
@@ -1678,9 +1822,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1678 |
fraction_replaced_hof=self.fraction_replaced_hof,
|
| 1679 |
should_simplify=self.should_simplify,
|
| 1680 |
should_optimize_constants=self.should_optimize_constants,
|
| 1681 |
-
warmup_maxsize_by=
|
| 1682 |
-
0.0 if self.warmup_maxsize_by is None else self.warmup_maxsize_by
|
| 1683 |
-
),
|
| 1684 |
use_frequency=self.use_frequency,
|
| 1685 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
| 1686 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
|
@@ -1787,7 +1929,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1787 |
if self.delete_tempfiles:
|
| 1788 |
shutil.rmtree(self.tempdir_)
|
| 1789 |
|
| 1790 |
-
|
| 1791 |
|
| 1792 |
return self
|
| 1793 |
|
|
@@ -1797,9 +1939,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1797 |
y,
|
| 1798 |
Xresampled=None,
|
| 1799 |
weights=None,
|
| 1800 |
-
variable_names: Optional[
|
| 1801 |
-
|
| 1802 |
-
|
|
|
|
|
|
|
|
|
|
| 1803 |
) -> "PySRRegressor":
|
| 1804 |
"""
|
| 1805 |
Search for equations to fit the dataset and store them in `self.equations_`.
|
|
@@ -1858,15 +2003,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1858 |
self.selection_mask_ = None
|
| 1859 |
self.julia_state_stream_ = None
|
| 1860 |
self.julia_options_stream_ = None
|
|
|
|
| 1861 |
self.X_units_ = None
|
| 1862 |
self.y_units_ = None
|
| 1863 |
|
| 1864 |
-
random_state = check_random_state(self.random_state) # For np random
|
| 1865 |
-
seed = random_state.get_state()[1][0] # For julia random
|
| 1866 |
-
|
| 1867 |
self._setup_equation_file()
|
| 1868 |
|
| 1869 |
-
|
| 1870 |
|
| 1871 |
(
|
| 1872 |
X,
|
|
@@ -1874,10 +2017,18 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1874 |
Xresampled,
|
| 1875 |
weights,
|
| 1876 |
variable_names,
|
|
|
|
| 1877 |
X_units,
|
| 1878 |
y_units,
|
| 1879 |
) = self._validate_and_set_fit_params(
|
| 1880 |
-
X,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1881 |
)
|
| 1882 |
|
| 1883 |
if X.shape[0] > 10000 and not self.batching:
|
|
@@ -1891,9 +2042,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1891 |
"More datapoints will lower the search speed."
|
| 1892 |
)
|
| 1893 |
|
|
|
|
|
|
|
|
|
|
| 1894 |
# Pre transformations (feature selection and denoising)
|
| 1895 |
-
X, y, variable_names, X_units, y_units =
|
| 1896 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1897 |
)
|
| 1898 |
|
| 1899 |
# Warn about large feature counts (still warn if feature count is large
|
|
@@ -1903,13 +2066,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1903 |
"Note: you are running with 10 features or more. "
|
| 1904 |
"Genetic algorithms like used in PySR scale poorly with large numbers of features. "
|
| 1905 |
"You should run PySR for more `niterations` to ensure it can find "
|
| 1906 |
-
"the correct variables, "
|
| 1907 |
-
"or, alternatively, do a dimensionality reduction beforehand. "
|
| 1908 |
-
"For example, `X = PCA(n_components=6).fit_transform(X)`, "
|
| 1909 |
-
"using scikit-learn's `PCA` class, "
|
| 1910 |
-
"will reduce the number of features to 6 in an interpretable way, "
|
| 1911 |
-
"as each resultant feature "
|
| 1912 |
-
"will be a linear combination of the original features. "
|
| 1913 |
)
|
| 1914 |
|
| 1915 |
# Assertion checks
|
|
@@ -1920,6 +2077,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1920 |
X,
|
| 1921 |
use_custom_variable_names,
|
| 1922 |
variable_names,
|
|
|
|
| 1923 |
weights,
|
| 1924 |
y,
|
| 1925 |
X_units,
|
|
@@ -1932,7 +2090,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1932 |
self._checkpoint()
|
| 1933 |
|
| 1934 |
# Perform the search:
|
| 1935 |
-
self._run(X, y,
|
| 1936 |
|
| 1937 |
# Then, after fit, we save again, so the pickle file contains
|
| 1938 |
# the equations:
|
|
@@ -1941,7 +2099,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1941 |
|
| 1942 |
return self
|
| 1943 |
|
| 1944 |
-
def refresh(self, checkpoint_file=None):
|
| 1945 |
"""
|
| 1946 |
Update self.equations_ with any new options passed.
|
| 1947 |
|
|
@@ -1950,11 +2108,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1950 |
|
| 1951 |
Parameters
|
| 1952 |
----------
|
| 1953 |
-
checkpoint_file : str
|
| 1954 |
Path to checkpoint hall of fame file to be loaded.
|
| 1955 |
The default will use the set `equation_file_`.
|
| 1956 |
"""
|
| 1957 |
-
if checkpoint_file:
|
| 1958 |
self.equation_file_ = checkpoint_file
|
| 1959 |
self.equation_file_contents_ = None
|
| 1960 |
check_is_fitted(self, attributes=["equation_file_"])
|
|
@@ -2006,7 +2164,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2006 |
if self.selection_mask_ is not None:
|
| 2007 |
# RangeIndex enforces column order allowing columns to
|
| 2008 |
# be correctly filtered with self.selection_mask_
|
| 2009 |
-
X = X.
|
| 2010 |
X.columns = self.feature_names_in_
|
| 2011 |
# Without feature information, CallableEquation/lambda_format equations
|
| 2012 |
# require that the column order of X matches that of the X used during
|
|
@@ -2016,14 +2174,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2016 |
# reordered/reindexed to match those of the transformed (denoised and
|
| 2017 |
# feature selected) X in fit.
|
| 2018 |
X = X.reindex(columns=self.feature_names_in_)
|
| 2019 |
-
X = self.
|
| 2020 |
|
| 2021 |
try:
|
| 2022 |
-
if
|
|
|
|
| 2023 |
return np.stack(
|
| 2024 |
[eq["lambda_format"](X) for eq in best_equation], axis=1
|
| 2025 |
)
|
| 2026 |
-
|
|
|
|
| 2027 |
except Exception as error:
|
| 2028 |
raise ValueError(
|
| 2029 |
"Failed to evaluate the expression. "
|
|
@@ -2053,9 +2213,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2053 |
"""
|
| 2054 |
self.refresh()
|
| 2055 |
best_equation = self.get_best(index=index)
|
| 2056 |
-
if
|
|
|
|
| 2057 |
return [eq["sympy_format"] for eq in best_equation]
|
| 2058 |
-
|
|
|
|
| 2059 |
|
| 2060 |
def latex(self, index=None, precision=3):
|
| 2061 |
"""
|
|
@@ -2115,9 +2277,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2115 |
self.set_params(output_jax_format=True)
|
| 2116 |
self.refresh()
|
| 2117 |
best_equation = self.get_best(index=index)
|
| 2118 |
-
if
|
|
|
|
| 2119 |
return [eq["jax_format"] for eq in best_equation]
|
| 2120 |
-
|
|
|
|
| 2121 |
|
| 2122 |
def pytorch(self, index=None):
|
| 2123 |
"""
|
|
@@ -2145,9 +2309,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2145 |
self.set_params(output_torch_format=True)
|
| 2146 |
self.refresh()
|
| 2147 |
best_equation = self.get_best(index=index)
|
| 2148 |
-
if
|
| 2149 |
return [eq["torch_format"] for eq in best_equation]
|
| 2150 |
-
|
|
|
|
| 2151 |
|
| 2152 |
def _read_equation_file(self):
|
| 2153 |
"""Read the hall of fame file created by `SymbolicRegression.jl`."""
|
|
@@ -2246,10 +2411,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2246 |
lastComplexity = 0
|
| 2247 |
sympy_format = []
|
| 2248 |
lambda_format = []
|
| 2249 |
-
|
| 2250 |
-
|
| 2251 |
-
if self.output_torch_format:
|
| 2252 |
-
torch_format = []
|
| 2253 |
|
| 2254 |
for _, eqn_row in output.iterrows():
|
| 2255 |
eqn = pysr2sympy(
|
|
@@ -2361,7 +2524,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2361 |
"""
|
| 2362 |
self.refresh()
|
| 2363 |
|
| 2364 |
-
if self.
|
| 2365 |
if indices is not None:
|
| 2366 |
assert isinstance(indices, list)
|
| 2367 |
assert isinstance(indices[0], list)
|
|
@@ -2370,7 +2533,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2370 |
table_string = sympy2multilatextable(
|
| 2371 |
self.equations_, indices=indices, precision=precision, columns=columns
|
| 2372 |
)
|
| 2373 |
-
|
| 2374 |
if indices is not None:
|
| 2375 |
assert isinstance(indices, list)
|
| 2376 |
assert isinstance(indices[0], int)
|
|
@@ -2378,15 +2541,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2378 |
table_string = sympy2latextable(
|
| 2379 |
self.equations_, indices=indices, precision=precision, columns=columns
|
| 2380 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2381 |
|
| 2382 |
-
|
| 2383 |
-
r"\usepackage{breqn}",
|
| 2384 |
-
r"\usepackage{booktabs}",
|
| 2385 |
-
"",
|
| 2386 |
-
"...",
|
| 2387 |
-
"",
|
| 2388 |
-
]
|
| 2389 |
-
return "\n".join(preamble_string + [table_string])
|
| 2390 |
|
| 2391 |
|
| 2392 |
def idx_model_selection(equations: pd.DataFrame, model_selection: str):
|
|
@@ -2404,3 +2565,30 @@ def idx_model_selection(equations: pd.DataFrame, model_selection: str):
|
|
| 2404 |
f"{model_selection} is not a valid model selection strategy."
|
| 2405 |
)
|
| 2406 |
return chosen_idx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import sys
|
| 9 |
import tempfile
|
| 10 |
import warnings
|
| 11 |
+
from dataclasses import dataclass, fields
|
| 12 |
from datetime import datetime
|
| 13 |
from io import StringIO
|
| 14 |
from multiprocessing import cpu_count
|
| 15 |
from pathlib import Path
|
| 16 |
+
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
import numpy as np
|
| 19 |
import pandas as pd
|
| 20 |
+
from numpy import ndarray
|
| 21 |
+
from numpy.typing import NDArray
|
| 22 |
from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
|
| 23 |
from sklearn.utils import check_array, check_consistent_length, check_random_state
|
| 24 |
+
from sklearn.utils.validation import _check_feature_names_in # type: ignore
|
| 25 |
+
from sklearn.utils.validation import check_is_fitted
|
| 26 |
|
| 27 |
from .denoising import denoise, multi_denoise
|
| 28 |
from .deprecated import DEPRECATED_KWARGS
|
| 29 |
from .export_jax import sympy2jax
|
| 30 |
+
from .export_latex import (
|
| 31 |
+
sympy2latex,
|
| 32 |
+
sympy2latextable,
|
| 33 |
+
sympy2multilatextable,
|
| 34 |
+
with_preamble,
|
| 35 |
+
)
|
| 36 |
from .export_numpy import sympy2numpy
|
| 37 |
from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
|
| 38 |
from .export_torch import sympy2torch
|
|
|
|
| 44 |
_load_cluster_manager,
|
| 45 |
jl_array,
|
| 46 |
jl_deserialize,
|
| 47 |
+
jl_is_function,
|
| 48 |
jl_serialize,
|
| 49 |
)
|
| 50 |
from .julia_import import SymbolicRegression, jl
|
| 51 |
from .utils import (
|
| 52 |
+
ArrayLike,
|
| 53 |
+
PathLike,
|
| 54 |
_csv_filename_to_pkl_filename,
|
| 55 |
_preprocess_julia_floats,
|
| 56 |
_safe_check_feature_names_in,
|
| 57 |
_subscriptify,
|
| 58 |
+
_suggest_keywords,
|
| 59 |
)
|
| 60 |
|
| 61 |
+
ALREADY_RAN = False
|
| 62 |
|
| 63 |
|
| 64 |
def _process_constraints(binary_operators, unary_operators, constraints):
|
|
|
|
| 121 |
"and underscores are allowed."
|
| 122 |
)
|
| 123 |
if (extra_sympy_mappings is None) or (
|
| 124 |
+
function_name not in extra_sympy_mappings
|
| 125 |
):
|
| 126 |
raise ValueError(
|
| 127 |
f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
|
|
|
|
| 138 |
X,
|
| 139 |
use_custom_variable_names,
|
| 140 |
variable_names,
|
| 141 |
+
complexity_of_variables,
|
| 142 |
weights,
|
| 143 |
y,
|
| 144 |
X_units,
|
|
|
|
| 163 |
"and underscores are allowed."
|
| 164 |
)
|
| 165 |
assert_valid_sympy_symbol(var_name)
|
| 166 |
+
if (
|
| 167 |
+
isinstance(complexity_of_variables, list)
|
| 168 |
+
and len(complexity_of_variables) != X.shape[1]
|
| 169 |
+
):
|
| 170 |
+
raise ValueError(
|
| 171 |
+
"The number of elements in `complexity_of_variables` must equal the number of features in `X`."
|
| 172 |
+
)
|
| 173 |
if X_units is not None and len(X_units) != X.shape[1]:
|
| 174 |
raise ValueError(
|
| 175 |
"The number of units in `X_units` must equal the number of features in `X`."
|
|
|
|
| 194 |
VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
|
| 195 |
|
| 196 |
|
| 197 |
+
@dataclass
|
| 198 |
+
class _DynamicallySetParams:
|
| 199 |
+
"""Defines some parameters that are set at runtime."""
|
| 200 |
+
|
| 201 |
+
binary_operators: List[str]
|
| 202 |
+
unary_operators: List[str]
|
| 203 |
+
maxdepth: int
|
| 204 |
+
constraints: Dict[str, str]
|
| 205 |
+
multithreading: bool
|
| 206 |
+
batch_size: int
|
| 207 |
+
update_verbosity: int
|
| 208 |
+
progress: bool
|
| 209 |
+
warmup_maxsize_by: float
|
| 210 |
+
|
| 211 |
+
|
| 212 |
class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
| 213 |
"""
|
| 214 |
High-performance symbolic regression algorithm.
|
|
|
|
| 340 |
`idx` argument to the function, which is `nothing`
|
| 341 |
for non-batched, and a 1D array of indices for batched.
|
| 342 |
Default is `None`.
|
| 343 |
+
complexity_of_operators : dict[str, Union[int, float]]
|
| 344 |
If you would like to use a complexity other than 1 for an
|
| 345 |
operator, specify the complexity here. For example,
|
| 346 |
`{"sin": 2, "+": 1}` would give a complexity of 2 for each use
|
|
|
|
| 349 |
numbers for a complexity, and the total complexity of a tree
|
| 350 |
will be rounded to the nearest integer after computing.
|
| 351 |
Default is `None`.
|
| 352 |
+
complexity_of_constants : int | float
|
| 353 |
Complexity of constants. Default is `1`.
|
| 354 |
+
complexity_of_variables : int | float
|
| 355 |
+
Global complexity of variables. To set different complexities for
|
| 356 |
+
different variables, pass a list of complexities to the `fit` method
|
| 357 |
+
with keyword `complexity_of_variables`. You cannot use both.
|
| 358 |
+
Default is `1`.
|
| 359 |
parsimony : float
|
| 360 |
Multiplicative factor for how much to punish complexity.
|
| 361 |
Default is `0.0032`.
|
| 362 |
dimensional_constraint_penalty : float
|
| 363 |
Additive penalty for if dimensional analysis of an expression fails.
|
| 364 |
By default, this is `1000.0`.
|
| 365 |
+
dimensionless_constants_only : bool
|
| 366 |
+
Whether to only search for dimensionless constants, if using units.
|
| 367 |
+
Default is `False`.
|
| 368 |
use_frequency : bool
|
| 369 |
Whether to measure the frequency of complexities, and use that
|
| 370 |
instead of parsimony to explore equation space. Will naturally
|
|
|
|
| 640 |
Units of each variable in the training dataset, `y`.
|
| 641 |
nout_ : int
|
| 642 |
Number of output dimensions.
|
| 643 |
+
selection_mask_ : ndarray of shape (`n_features_in_`,)
|
| 644 |
+
Mask of which features of `X` to use when `select_k_features` is set.
|
|
|
|
| 645 |
tempdir_ : Path
|
| 646 |
Path to the temporary equations directory.
|
| 647 |
+
equation_file_ : Union[str, Path]
|
| 648 |
Output equation file name produced by the julia backend.
|
| 649 |
julia_state_stream_ : ndarray
|
| 650 |
The serialized state for the julia SymbolicRegression.jl backend (after fitting),
|
| 651 |
stored as an array of uint8, produced by Julia's Serialization.serialize function.
|
|
|
|
|
|
|
| 652 |
julia_options_stream_ : ndarray
|
| 653 |
The serialized julia options, stored as an array of uint8,
|
|
|
|
|
|
|
| 654 |
equation_file_contents_ : list[pandas.DataFrame]
|
| 655 |
Contents of the equation file output by the Julia backend.
|
| 656 |
show_pickle_warnings_ : bool
|
|
|
|
| 697 |
```
|
| 698 |
"""
|
| 699 |
|
| 700 |
+
equations_: Union[pd.DataFrame, List[pd.DataFrame], None]
|
| 701 |
+
n_features_in_: int
|
| 702 |
+
feature_names_in_: ArrayLike[str]
|
| 703 |
+
display_feature_names_in_: ArrayLike[str]
|
| 704 |
+
complexity_of_variables_: Union[int, float, List[Union[int, float]], None]
|
| 705 |
+
X_units_: Union[ArrayLike[str], None]
|
| 706 |
+
y_units_: Union[str, ArrayLike[str], None]
|
| 707 |
+
nout_: int
|
| 708 |
+
selection_mask_: Union[NDArray[np.bool_], None]
|
| 709 |
+
tempdir_: Path
|
| 710 |
+
equation_file_: PathLike
|
| 711 |
+
julia_state_stream_: Union[NDArray[np.uint8], None]
|
| 712 |
+
julia_options_stream_: Union[NDArray[np.uint8], None]
|
| 713 |
+
equation_file_contents_: Union[List[pd.DataFrame], None]
|
| 714 |
+
show_pickle_warnings_: bool
|
| 715 |
+
|
| 716 |
def __init__(
|
| 717 |
self,
|
| 718 |
model_selection: Literal["best", "accuracy", "score"] = "best",
|
|
|
|
| 733 |
loss_function: Optional[str] = None,
|
| 734 |
complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
|
| 735 |
complexity_of_constants: Union[int, float] = 1,
|
| 736 |
+
complexity_of_variables: Optional[Union[int, float]] = None,
|
| 737 |
parsimony: float = 0.0032,
|
| 738 |
dimensional_constraint_penalty: Optional[float] = None,
|
| 739 |
+
dimensionless_constants_only: bool = False,
|
| 740 |
use_frequency: bool = True,
|
| 741 |
use_frequency_in_tournament: bool = True,
|
| 742 |
adaptive_parsimony_scaling: float = 20.0,
|
|
|
|
| 832 |
self.complexity_of_variables = complexity_of_variables
|
| 833 |
self.parsimony = parsimony
|
| 834 |
self.dimensional_constraint_penalty = dimensional_constraint_penalty
|
| 835 |
+
self.dimensionless_constants_only = dimensionless_constants_only
|
| 836 |
self.use_frequency = use_frequency
|
| 837 |
self.use_frequency_in_tournament = use_frequency_in_tournament
|
| 838 |
self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
|
|
|
|
| 913 |
updated_kwarg_name = DEPRECATED_KWARGS[k]
|
| 914 |
setattr(self, updated_kwarg_name, v)
|
| 915 |
warnings.warn(
|
| 916 |
+
f"`{k}` has been renamed to `{updated_kwarg_name}` in PySRRegressor. "
|
| 917 |
"Please use that instead.",
|
| 918 |
FutureWarning,
|
| 919 |
)
|
| 920 |
# Handle kwargs that have been moved to the fit method
|
| 921 |
elif k in ["weights", "variable_names", "Xresampled"]:
|
| 922 |
warnings.warn(
|
| 923 |
+
f"`{k}` is a data-dependent parameter and should be passed when fit is called. "
|
| 924 |
+
f"Ignoring parameter; please pass `{k}` during the call to fit instead.",
|
| 925 |
FutureWarning,
|
| 926 |
)
|
| 927 |
elif k == "julia_project":
|
|
|
|
| 938 |
FutureWarning,
|
| 939 |
)
|
| 940 |
else:
|
| 941 |
+
suggested_keywords = _suggest_keywords(PySRRegressor, k)
|
| 942 |
+
err_msg = (
|
| 943 |
+
f"`{k}` is not a valid keyword argument for PySRRegressor."
|
| 944 |
)
|
| 945 |
+
if len(suggested_keywords) > 0:
|
| 946 |
+
err_msg += f" Did you mean {', '.join(map(lambda s: f'`{s}`', suggested_keywords))}?"
|
| 947 |
+
raise TypeError(err_msg)
|
| 948 |
|
| 949 |
@classmethod
|
| 950 |
def from_file(
|
| 951 |
cls,
|
| 952 |
+
equation_file: PathLike,
|
| 953 |
*,
|
| 954 |
+
binary_operators: Optional[List[str]] = None,
|
| 955 |
+
unary_operators: Optional[List[str]] = None,
|
| 956 |
+
n_features_in: Optional[int] = None,
|
| 957 |
+
feature_names_in: Optional[ArrayLike[str]] = None,
|
| 958 |
+
selection_mask: Optional[NDArray[np.bool_]] = None,
|
| 959 |
+
nout: int = 1,
|
| 960 |
verbosity=1,
|
| 961 |
**pysr_kwargs,
|
| 962 |
):
|
|
|
|
| 965 |
|
| 966 |
Parameters
|
| 967 |
----------
|
| 968 |
+
equation_file : str or Path
|
| 969 |
Path to a pickle file containing a saved model, or a csv file
|
| 970 |
containing equations.
|
| 971 |
binary_operators : list[str]
|
|
|
|
| 980 |
feature_names_in : list[str]
|
| 981 |
Names of the features passed to the model.
|
| 982 |
Not needed if loading from a pickle file.
|
| 983 |
+
selection_mask : NDArray[np.bool_]
|
| 984 |
+
If using `select_k_features`, you must pass `model.selection_mask_` here.
|
| 985 |
Not needed if loading from a pickle file.
|
| 986 |
nout : int
|
| 987 |
Number of outputs of the model.
|
|
|
|
| 1037 |
|
| 1038 |
# TODO: copy .bkup file if exists.
|
| 1039 |
model = cls(
|
| 1040 |
+
equation_file=str(equation_file),
|
| 1041 |
binary_operators=binary_operators,
|
| 1042 |
unary_operators=unary_operators,
|
| 1043 |
**pysr_kwargs,
|
|
|
|
| 1057 |
model.display_feature_names_in_ = feature_names_in
|
| 1058 |
|
| 1059 |
if selection_mask is None:
|
| 1060 |
+
model.selection_mask_ = np.ones(n_features_in, dtype=np.bool_)
|
| 1061 |
else:
|
| 1062 |
model.selection_mask_ = selection_mask
|
| 1063 |
|
|
|
|
| 1084 |
all_equations = equations
|
| 1085 |
|
| 1086 |
for i, equations in enumerate(all_equations):
|
| 1087 |
+
selected = pd.Series([""] * len(equations), index=equations.index)
|
| 1088 |
chosen_row = idx_model_selection(equations, self.model_selection)
|
| 1089 |
selected[chosen_row] = ">>>>"
|
| 1090 |
repr_equations = pd.DataFrame(
|
|
|
|
| 1117 |
Handle pickle serialization for PySRRegressor.
|
| 1118 |
|
| 1119 |
The Scikit-learn standard requires estimators to be serializable via
|
| 1120 |
+
`pickle.dumps()`. However, some attributes do not support pickling
|
| 1121 |
+
and need to be hidden, such as the JAX and Torch representations.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1122 |
"""
|
| 1123 |
state = self.__dict__
|
| 1124 |
show_pickle_warning = not (
|
|
|
|
| 1184 |
|
| 1185 |
@property
|
| 1186 |
def julia_options_(self):
|
| 1187 |
+
"""The deserialized julia options."""
|
| 1188 |
return jl_deserialize(self.julia_options_stream_)
|
| 1189 |
|
| 1190 |
@property
|
| 1191 |
def julia_state_(self):
|
| 1192 |
+
"""The deserialized state."""
|
| 1193 |
return jl_deserialize(self.julia_state_stream_)
|
| 1194 |
|
| 1195 |
@property
|
|
|
|
| 1202 |
)
|
| 1203 |
return self.julia_state_
|
| 1204 |
|
| 1205 |
+
def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]:
|
| 1206 |
"""
|
| 1207 |
Get best equation using `model_selection`.
|
| 1208 |
|
|
|
|
| 1225 |
Raised when an invalid model selection strategy is provided.
|
| 1226 |
"""
|
| 1227 |
check_is_fitted(self, attributes=["equations_"])
|
|
|
|
|
|
|
| 1228 |
|
| 1229 |
if index is not None:
|
| 1230 |
if isinstance(self.equations_, list):
|
|
|
|
| 1232 |
index, list
|
| 1233 |
), "With multiple output features, index must be a list."
|
| 1234 |
return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
|
| 1235 |
+
else:
|
| 1236 |
+
equations_ = cast(pd.DataFrame, self.equations_)
|
| 1237 |
+
return cast(pd.Series, equations_.iloc[index])
|
| 1238 |
|
| 1239 |
if isinstance(self.equations_, list):
|
| 1240 |
return [
|
| 1241 |
+
cast(pd.Series, eq.loc[idx_model_selection(eq, self.model_selection)])
|
| 1242 |
for eq in self.equations_
|
| 1243 |
]
|
| 1244 |
+
else:
|
| 1245 |
+
equations_ = cast(pd.DataFrame, self.equations_)
|
| 1246 |
+
return cast(
|
| 1247 |
+
pd.Series,
|
| 1248 |
+
equations_.loc[idx_model_selection(equations_, self.model_selection)],
|
| 1249 |
+
)
|
| 1250 |
|
| 1251 |
def _setup_equation_file(self):
|
| 1252 |
"""
|
|
|
|
| 1271 |
self.equation_file_ = self.equation_file
|
| 1272 |
self.equation_file_contents_ = None
|
| 1273 |
|
| 1274 |
+
def _validate_and_modify_params(self) -> _DynamicallySetParams:
|
| 1275 |
"""
|
| 1276 |
Ensure parameters passed at initialization are valid.
|
| 1277 |
|
|
|
|
| 1329 |
f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
|
| 1330 |
)
|
| 1331 |
|
| 1332 |
+
param_container = _DynamicallySetParams(
|
| 1333 |
+
binary_operators=["+", "*", "-", "/"],
|
| 1334 |
+
unary_operators=[],
|
| 1335 |
+
maxdepth=self.maxsize,
|
| 1336 |
+
constraints={},
|
| 1337 |
+
multithreading=self.procs != 0 and self.cluster_manager is None,
|
| 1338 |
+
batch_size=1,
|
| 1339 |
+
update_verbosity=int(self.verbosity),
|
| 1340 |
+
progress=self.progress,
|
| 1341 |
+
warmup_maxsize_by=0.0,
|
| 1342 |
+
)
|
| 1343 |
+
|
| 1344 |
+
for param_name in map(lambda x: x.name, fields(_DynamicallySetParams)):
|
| 1345 |
+
user_param_value = getattr(self, param_name)
|
| 1346 |
+
if user_param_value is None:
|
| 1347 |
+
# Leave as the default in DynamicallySetParams
|
| 1348 |
+
...
|
|
|
|
| 1349 |
else:
|
| 1350 |
+
# If user has specified it, we will override the default.
|
| 1351 |
+
# However, there are some special cases to mutate it:
|
| 1352 |
+
new_param_value = _mutate_parameter(param_name, user_param_value)
|
| 1353 |
+
setattr(param_container, param_name, new_param_value)
|
| 1354 |
+
# TODO: This should just be part of the __init__ of _DynamicallySetParams
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1355 |
|
| 1356 |
assert (
|
| 1357 |
+
len(param_container.binary_operators) > 0
|
| 1358 |
+
or len(param_container.unary_operators) > 0
|
| 1359 |
+
), "At least one operator must be provided."
|
|
|
|
| 1360 |
|
| 1361 |
+
return param_container
|
| 1362 |
|
| 1363 |
def _validate_and_set_fit_params(
|
| 1364 |
+
self,
|
| 1365 |
+
X,
|
| 1366 |
+
y,
|
| 1367 |
+
Xresampled,
|
| 1368 |
+
weights,
|
| 1369 |
+
variable_names,
|
| 1370 |
+
complexity_of_variables,
|
| 1371 |
+
X_units,
|
| 1372 |
+
y_units,
|
| 1373 |
+
) -> Tuple[
|
| 1374 |
+
ndarray,
|
| 1375 |
+
ndarray,
|
| 1376 |
+
Optional[ndarray],
|
| 1377 |
+
Optional[ndarray],
|
| 1378 |
+
ArrayLike[str],
|
| 1379 |
+
Union[int, float, List[Union[int, float]]],
|
| 1380 |
+
Optional[ArrayLike[str]],
|
| 1381 |
+
Optional[Union[str, ArrayLike[str]]],
|
| 1382 |
+
]:
|
| 1383 |
"""
|
| 1384 |
Validate the parameters passed to the :term`fit` method.
|
| 1385 |
|
|
|
|
| 1399 |
Weight array of the same shape as `y`.
|
| 1400 |
Each element is how to weight the mean-square-error loss
|
| 1401 |
for that particular element of y.
|
| 1402 |
+
variable_names : ndarray of length n_features
|
| 1403 |
+
Names of each feature in the training dataset, `X`.
|
| 1404 |
+
complexity_of_variables : int | float | list[int | float]
|
| 1405 |
+
Complexity of each feature in the training dataset, `X`.
|
| 1406 |
X_units : list[str] of length n_features
|
| 1407 |
+
Units of each feature in the training dataset, `X`.
|
| 1408 |
y_units : str | list[str] of length n_out
|
| 1409 |
+
Units of each feature in the training dataset, `y`.
|
| 1410 |
|
| 1411 |
Returns
|
| 1412 |
-------
|
|
|
|
| 1450 |
"Please use valid names instead."
|
| 1451 |
)
|
| 1452 |
|
| 1453 |
+
if (
|
| 1454 |
+
complexity_of_variables is not None
|
| 1455 |
+
and self.complexity_of_variables is not None
|
| 1456 |
+
):
|
| 1457 |
+
raise ValueError(
|
| 1458 |
+
"You cannot set `complexity_of_variables` at both `fit` and `__init__`. "
|
| 1459 |
+
"Pass it at `__init__` to set it to global default, OR use `fit` to set it for "
|
| 1460 |
+
"each variable individually."
|
| 1461 |
+
)
|
| 1462 |
+
elif complexity_of_variables is not None:
|
| 1463 |
+
complexity_of_variables = complexity_of_variables
|
| 1464 |
+
elif self.complexity_of_variables is not None:
|
| 1465 |
+
complexity_of_variables = self.complexity_of_variables
|
| 1466 |
+
else:
|
| 1467 |
+
complexity_of_variables = 1
|
| 1468 |
+
|
| 1469 |
# Data validation and feature name fetching via sklearn
|
| 1470 |
# This method sets the n_features_in_ attribute
|
| 1471 |
if Xresampled is not None:
|
|
|
|
| 1473 |
if weights is not None:
|
| 1474 |
weights = check_array(weights, ensure_2d=False)
|
| 1475 |
check_consistent_length(weights, y)
|
| 1476 |
+
X, y = self._validate_data_X_y(X, y)
|
| 1477 |
self.feature_names_in_ = _safe_check_feature_names_in(
|
| 1478 |
self, variable_names, generate_names=False
|
| 1479 |
)
|
|
|
|
| 1483 |
self.display_feature_names_in_ = np.array(
|
| 1484 |
[f"x{_subscriptify(i)}" for i in range(X.shape[1])]
|
| 1485 |
)
|
| 1486 |
+
variable_names = self.feature_names_in_
|
| 1487 |
else:
|
| 1488 |
self.display_feature_names_in_ = self.feature_names_in_
|
| 1489 |
+
variable_names = self.feature_names_in_
|
|
|
|
| 1490 |
|
| 1491 |
# Handle multioutput data
|
| 1492 |
if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
|
|
|
|
| 1496 |
else:
|
| 1497 |
raise NotImplementedError("y shape not supported!")
|
| 1498 |
|
| 1499 |
+
self.complexity_of_variables_ = copy.deepcopy(complexity_of_variables)
|
| 1500 |
self.X_units_ = copy.deepcopy(X_units)
|
| 1501 |
self.y_units_ = copy.deepcopy(y_units)
|
| 1502 |
|
| 1503 |
+
return (
|
| 1504 |
+
X,
|
| 1505 |
+
y,
|
| 1506 |
+
Xresampled,
|
| 1507 |
+
weights,
|
| 1508 |
+
variable_names,
|
| 1509 |
+
complexity_of_variables,
|
| 1510 |
+
X_units,
|
| 1511 |
+
y_units,
|
| 1512 |
+
)
|
| 1513 |
+
|
| 1514 |
+
def _validate_data_X_y(self, X, y) -> Tuple[ndarray, ndarray]:
|
| 1515 |
+
raw_out = self._validate_data(X=X, y=y, reset=True, multi_output=True) # type: ignore
|
| 1516 |
+
return cast(Tuple[ndarray, ndarray], raw_out)
|
| 1517 |
+
|
| 1518 |
+
def _validate_data_X(self, X) -> Tuple[ndarray]:
|
| 1519 |
+
raw_out = self._validate_data(X=X, reset=False) # type: ignore
|
| 1520 |
+
return cast(Tuple[ndarray], raw_out)
|
| 1521 |
|
| 1522 |
def _pre_transform_training_data(
|
| 1523 |
+
self,
|
| 1524 |
+
X: ndarray,
|
| 1525 |
+
y: ndarray,
|
| 1526 |
+
Xresampled: Union[ndarray, None],
|
| 1527 |
+
variable_names: ArrayLike[str],
|
| 1528 |
+
complexity_of_variables: Union[int, float, List[Union[int, float]]],
|
| 1529 |
+
X_units: Union[ArrayLike[str], None],
|
| 1530 |
+
y_units: Union[ArrayLike[str], str, None],
|
| 1531 |
+
random_state: np.random.RandomState,
|
| 1532 |
):
|
| 1533 |
"""
|
| 1534 |
Transform the training data before fitting the symbolic regressor.
|
|
|
|
| 1537 |
|
| 1538 |
Parameters
|
| 1539 |
----------
|
| 1540 |
+
X : ndarray
|
| 1541 |
Training data of shape (n_samples, n_features).
|
| 1542 |
+
y : ndarray
|
| 1543 |
Target values of shape (n_samples,) or (n_samples, n_targets).
|
| 1544 |
Will be cast to X's dtype if necessary.
|
| 1545 |
+
Xresampled : ndarray | None
|
| 1546 |
Resampled training data, of shape `(n_resampled, n_features)`,
|
| 1547 |
used for denoising.
|
| 1548 |
variable_names : list[str]
|
| 1549 |
Names of each variable in the training dataset, `X`.
|
| 1550 |
Of length `n_features`.
|
| 1551 |
+
complexity_of_variables : int | float | list[int | float]
|
| 1552 |
+
Complexity of each variable in the training dataset, `X`.
|
| 1553 |
X_units : list[str]
|
| 1554 |
Units of each variable in the training dataset, `X`.
|
| 1555 |
y_units : str | list[str]
|
|
|
|
| 1582 |
"""
|
| 1583 |
# Feature selection transformation
|
| 1584 |
if self.select_k_features:
|
| 1585 |
+
selection_mask = run_feature_selection(
|
| 1586 |
X, y, self.select_k_features, random_state=random_state
|
| 1587 |
)
|
| 1588 |
+
X = X[:, selection_mask]
|
| 1589 |
|
| 1590 |
if Xresampled is not None:
|
| 1591 |
+
Xresampled = Xresampled[:, selection_mask]
|
| 1592 |
|
| 1593 |
# Reduce variable_names to selection
|
| 1594 |
+
variable_names = cast(
|
| 1595 |
+
ArrayLike[str],
|
| 1596 |
+
[
|
| 1597 |
+
variable_names[i]
|
| 1598 |
+
for i in range(len(variable_names))
|
| 1599 |
+
if selection_mask[i]
|
| 1600 |
+
],
|
| 1601 |
+
)
|
| 1602 |
+
|
| 1603 |
+
if isinstance(complexity_of_variables, list):
|
| 1604 |
+
complexity_of_variables = [
|
| 1605 |
+
complexity_of_variables[i]
|
| 1606 |
+
for i in range(len(complexity_of_variables))
|
| 1607 |
+
if selection_mask[i]
|
| 1608 |
+
]
|
| 1609 |
+
self.complexity_of_variables_ = copy.deepcopy(complexity_of_variables)
|
| 1610 |
|
| 1611 |
if X_units is not None:
|
| 1612 |
+
X_units = cast(
|
| 1613 |
+
ArrayLike[str],
|
| 1614 |
+
[X_units[i] for i in range(len(X_units)) if selection_mask[i]],
|
| 1615 |
+
)
|
| 1616 |
self.X_units_ = copy.deepcopy(X_units)
|
| 1617 |
|
| 1618 |
# Re-perform data validation and feature name updating
|
| 1619 |
+
X, y = self._validate_data_X_y(X, y)
|
| 1620 |
# Update feature names with selected variable names
|
| 1621 |
+
self.selection_mask_ = selection_mask
|
| 1622 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
| 1623 |
self.display_feature_names_in_ = self.feature_names_in_
|
| 1624 |
print(f"Using features {self.feature_names_in_}")
|
|
|
|
| 1632 |
else:
|
| 1633 |
X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
|
| 1634 |
|
| 1635 |
+
return X, y, variable_names, complexity_of_variables, X_units, y_units
|
| 1636 |
|
| 1637 |
+
def _run(
|
| 1638 |
+
self,
|
| 1639 |
+
X: ndarray,
|
| 1640 |
+
y: ndarray,
|
| 1641 |
+
runtime_params: _DynamicallySetParams,
|
| 1642 |
+
weights: Optional[ndarray],
|
| 1643 |
+
seed: int,
|
| 1644 |
+
):
|
| 1645 |
"""
|
| 1646 |
Run the symbolic regression fitting process on the julia backend.
|
| 1647 |
|
| 1648 |
Parameters
|
| 1649 |
----------
|
| 1650 |
+
X : ndarray
|
| 1651 |
Training data of shape `(n_samples, n_features)`.
|
| 1652 |
+
y : ndarray
|
| 1653 |
Target values of shape `(n_samples,)` or `(n_samples, n_targets)`.
|
| 1654 |
Will be cast to `X`'s dtype if necessary.
|
| 1655 |
+
runtime_params : DynamicallySetParams
|
| 1656 |
+
Dynamically set versions of some parameters passed in __init__.
|
| 1657 |
+
weights : ndarray | None
|
| 1658 |
Weight array of the same shape as `y`.
|
| 1659 |
Each element is how to weight the mean-square-error loss
|
| 1660 |
for that particular element of y.
|
|
|
|
| 1673 |
"""
|
| 1674 |
# Need to be global as we don't want to recreate/reinstate julia for
|
| 1675 |
# every new instance of PySRRegressor
|
| 1676 |
+
global ALREADY_RAN
|
| 1677 |
|
| 1678 |
# These are the parameters which may be modified from the ones
|
| 1679 |
# specified in init, so we define them here locally:
|
| 1680 |
+
binary_operators = runtime_params.binary_operators
|
| 1681 |
+
unary_operators = runtime_params.unary_operators
|
| 1682 |
+
maxdepth = runtime_params.maxdepth
|
| 1683 |
+
constraints = runtime_params.constraints
|
| 1684 |
+
multithreading = runtime_params.multithreading
|
| 1685 |
+
batch_size = runtime_params.batch_size
|
| 1686 |
+
update_verbosity = runtime_params.update_verbosity
|
| 1687 |
+
progress = runtime_params.progress
|
| 1688 |
+
warmup_maxsize_by = runtime_params.warmup_maxsize_by
|
| 1689 |
+
|
| 1690 |
nested_constraints = self.nested_constraints
|
| 1691 |
complexity_of_operators = self.complexity_of_operators
|
| 1692 |
+
complexity_of_variables = self.complexity_of_variables_
|
| 1693 |
cluster_manager = self.cluster_manager
|
|
|
|
|
|
|
|
|
|
| 1694 |
|
| 1695 |
# Start julia backend processes
|
| 1696 |
+
if not ALREADY_RAN and update_verbosity != 0:
|
| 1697 |
print("Compiling Julia backend...")
|
| 1698 |
|
| 1699 |
if cluster_manager is not None:
|
|
|
|
| 1732 |
complexity_of_operators_str += f"({k}) => {v}, "
|
| 1733 |
complexity_of_operators_str += ")"
|
| 1734 |
complexity_of_operators = jl.seval(complexity_of_operators_str)
|
| 1735 |
+
# TODO: Refactor this into helper function
|
| 1736 |
+
|
| 1737 |
+
if isinstance(complexity_of_variables, list):
|
| 1738 |
+
complexity_of_variables = jl_array(complexity_of_variables)
|
| 1739 |
|
| 1740 |
custom_loss = jl.seval(
|
| 1741 |
str(self.elementwise_loss)
|
|
|
|
| 1772 |
optimize=self.weight_optimize,
|
| 1773 |
)
|
| 1774 |
|
| 1775 |
+
jl_binary_operators: List[Any] = []
|
| 1776 |
+
jl_unary_operators: List[Any] = []
|
| 1777 |
+
for input_list, output_list, name in [
|
| 1778 |
+
(binary_operators, jl_binary_operators, "binary"),
|
| 1779 |
+
(unary_operators, jl_unary_operators, "unary"),
|
| 1780 |
+
]:
|
| 1781 |
+
for op in input_list:
|
| 1782 |
+
jl_op = jl.seval(op)
|
| 1783 |
+
if not jl_is_function(jl_op):
|
| 1784 |
+
raise ValueError(
|
| 1785 |
+
f"When building `{name}_operators`, `'{op}'` did not return a Julia function"
|
| 1786 |
+
)
|
| 1787 |
+
output_list.append(jl_op)
|
| 1788 |
+
|
| 1789 |
# Call to Julia backend.
|
| 1790 |
# See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
|
| 1791 |
options = SymbolicRegression.Options(
|
| 1792 |
+
binary_operators=jl_array(jl_binary_operators, dtype=jl.Function),
|
| 1793 |
+
unary_operators=jl_array(jl_unary_operators, dtype=jl.Function),
|
| 1794 |
bin_constraints=jl_array(bin_constraints),
|
| 1795 |
una_constraints=jl_array(una_constraints),
|
| 1796 |
complexity_of_operators=complexity_of_operators,
|
| 1797 |
complexity_of_constants=self.complexity_of_constants,
|
| 1798 |
+
complexity_of_variables=complexity_of_variables,
|
| 1799 |
nested_constraints=nested_constraints,
|
| 1800 |
elementwise_loss=custom_loss,
|
| 1801 |
loss_function=custom_full_objective,
|
|
|
|
| 1810 |
# These have the same name:
|
| 1811 |
parsimony=self.parsimony,
|
| 1812 |
dimensional_constraint_penalty=self.dimensional_constraint_penalty,
|
| 1813 |
+
dimensionless_constants_only=self.dimensionless_constants_only,
|
| 1814 |
alpha=self.alpha,
|
| 1815 |
maxdepth=maxdepth,
|
| 1816 |
fast_cycle=self.fast_cycle,
|
|
|
|
| 1822 |
fraction_replaced_hof=self.fraction_replaced_hof,
|
| 1823 |
should_simplify=self.should_simplify,
|
| 1824 |
should_optimize_constants=self.should_optimize_constants,
|
| 1825 |
+
warmup_maxsize_by=warmup_maxsize_by,
|
|
|
|
|
|
|
| 1826 |
use_frequency=self.use_frequency,
|
| 1827 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
| 1828 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
|
|
|
| 1929 |
if self.delete_tempfiles:
|
| 1930 |
shutil.rmtree(self.tempdir_)
|
| 1931 |
|
| 1932 |
+
ALREADY_RAN = True
|
| 1933 |
|
| 1934 |
return self
|
| 1935 |
|
|
|
|
| 1939 |
y,
|
| 1940 |
Xresampled=None,
|
| 1941 |
weights=None,
|
| 1942 |
+
variable_names: Optional[ArrayLike[str]] = None,
|
| 1943 |
+
complexity_of_variables: Optional[
|
| 1944 |
+
Union[int, float, List[Union[int, float]]]
|
| 1945 |
+
] = None,
|
| 1946 |
+
X_units: Optional[ArrayLike[str]] = None,
|
| 1947 |
+
y_units: Optional[Union[str, ArrayLike[str]]] = None,
|
| 1948 |
) -> "PySRRegressor":
|
| 1949 |
"""
|
| 1950 |
Search for equations to fit the dataset and store them in `self.equations_`.
|
|
|
|
| 2003 |
self.selection_mask_ = None
|
| 2004 |
self.julia_state_stream_ = None
|
| 2005 |
self.julia_options_stream_ = None
|
| 2006 |
+
self.complexity_of_variables_ = None
|
| 2007 |
self.X_units_ = None
|
| 2008 |
self.y_units_ = None
|
| 2009 |
|
|
|
|
|
|
|
|
|
|
| 2010 |
self._setup_equation_file()
|
| 2011 |
|
| 2012 |
+
runtime_params = self._validate_and_modify_params()
|
| 2013 |
|
| 2014 |
(
|
| 2015 |
X,
|
|
|
|
| 2017 |
Xresampled,
|
| 2018 |
weights,
|
| 2019 |
variable_names,
|
| 2020 |
+
complexity_of_variables,
|
| 2021 |
X_units,
|
| 2022 |
y_units,
|
| 2023 |
) = self._validate_and_set_fit_params(
|
| 2024 |
+
X,
|
| 2025 |
+
y,
|
| 2026 |
+
Xresampled,
|
| 2027 |
+
weights,
|
| 2028 |
+
variable_names,
|
| 2029 |
+
complexity_of_variables,
|
| 2030 |
+
X_units,
|
| 2031 |
+
y_units,
|
| 2032 |
)
|
| 2033 |
|
| 2034 |
if X.shape[0] > 10000 and not self.batching:
|
|
|
|
| 2042 |
"More datapoints will lower the search speed."
|
| 2043 |
)
|
| 2044 |
|
| 2045 |
+
random_state = check_random_state(self.random_state) # For np random
|
| 2046 |
+
seed = cast(int, random_state.randint(0, 2**31 - 1)) # For julia random
|
| 2047 |
+
|
| 2048 |
# Pre transformations (feature selection and denoising)
|
| 2049 |
+
X, y, variable_names, complexity_of_variables, X_units, y_units = (
|
| 2050 |
+
self._pre_transform_training_data(
|
| 2051 |
+
X,
|
| 2052 |
+
y,
|
| 2053 |
+
Xresampled,
|
| 2054 |
+
variable_names,
|
| 2055 |
+
complexity_of_variables,
|
| 2056 |
+
X_units,
|
| 2057 |
+
y_units,
|
| 2058 |
+
random_state,
|
| 2059 |
+
)
|
| 2060 |
)
|
| 2061 |
|
| 2062 |
# Warn about large feature counts (still warn if feature count is large
|
|
|
|
| 2066 |
"Note: you are running with 10 features or more. "
|
| 2067 |
"Genetic algorithms like used in PySR scale poorly with large numbers of features. "
|
| 2068 |
"You should run PySR for more `niterations` to ensure it can find "
|
| 2069 |
+
"the correct variables, and consider using a larger `maxsize`."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2070 |
)
|
| 2071 |
|
| 2072 |
# Assertion checks
|
|
|
|
| 2077 |
X,
|
| 2078 |
use_custom_variable_names,
|
| 2079 |
variable_names,
|
| 2080 |
+
complexity_of_variables,
|
| 2081 |
weights,
|
| 2082 |
y,
|
| 2083 |
X_units,
|
|
|
|
| 2090 |
self._checkpoint()
|
| 2091 |
|
| 2092 |
# Perform the search:
|
| 2093 |
+
self._run(X, y, runtime_params, weights=weights, seed=seed)
|
| 2094 |
|
| 2095 |
# Then, after fit, we save again, so the pickle file contains
|
| 2096 |
# the equations:
|
|
|
|
| 2099 |
|
| 2100 |
return self
|
| 2101 |
|
| 2102 |
+
def refresh(self, checkpoint_file: Optional[PathLike] = None) -> None:
|
| 2103 |
"""
|
| 2104 |
Update self.equations_ with any new options passed.
|
| 2105 |
|
|
|
|
| 2108 |
|
| 2109 |
Parameters
|
| 2110 |
----------
|
| 2111 |
+
checkpoint_file : str or Path
|
| 2112 |
Path to checkpoint hall of fame file to be loaded.
|
| 2113 |
The default will use the set `equation_file_`.
|
| 2114 |
"""
|
| 2115 |
+
if checkpoint_file is not None:
|
| 2116 |
self.equation_file_ = checkpoint_file
|
| 2117 |
self.equation_file_contents_ = None
|
| 2118 |
check_is_fitted(self, attributes=["equation_file_"])
|
|
|
|
| 2164 |
if self.selection_mask_ is not None:
|
| 2165 |
# RangeIndex enforces column order allowing columns to
|
| 2166 |
# be correctly filtered with self.selection_mask_
|
| 2167 |
+
X = X[X.columns[self.selection_mask_]]
|
| 2168 |
X.columns = self.feature_names_in_
|
| 2169 |
# Without feature information, CallableEquation/lambda_format equations
|
| 2170 |
# require that the column order of X matches that of the X used during
|
|
|
|
| 2174 |
# reordered/reindexed to match those of the transformed (denoised and
|
| 2175 |
# feature selected) X in fit.
|
| 2176 |
X = X.reindex(columns=self.feature_names_in_)
|
| 2177 |
+
X = self._validate_data_X(X)
|
| 2178 |
|
| 2179 |
try:
|
| 2180 |
+
if isinstance(best_equation, list):
|
| 2181 |
+
assert self.nout_ > 1
|
| 2182 |
return np.stack(
|
| 2183 |
[eq["lambda_format"](X) for eq in best_equation], axis=1
|
| 2184 |
)
|
| 2185 |
+
else:
|
| 2186 |
+
return best_equation["lambda_format"](X)
|
| 2187 |
except Exception as error:
|
| 2188 |
raise ValueError(
|
| 2189 |
"Failed to evaluate the expression. "
|
|
|
|
| 2213 |
"""
|
| 2214 |
self.refresh()
|
| 2215 |
best_equation = self.get_best(index=index)
|
| 2216 |
+
if isinstance(best_equation, list):
|
| 2217 |
+
assert self.nout_ > 1
|
| 2218 |
return [eq["sympy_format"] for eq in best_equation]
|
| 2219 |
+
else:
|
| 2220 |
+
return best_equation["sympy_format"]
|
| 2221 |
|
| 2222 |
def latex(self, index=None, precision=3):
|
| 2223 |
"""
|
|
|
|
| 2277 |
self.set_params(output_jax_format=True)
|
| 2278 |
self.refresh()
|
| 2279 |
best_equation = self.get_best(index=index)
|
| 2280 |
+
if isinstance(best_equation, list):
|
| 2281 |
+
assert self.nout_ > 1
|
| 2282 |
return [eq["jax_format"] for eq in best_equation]
|
| 2283 |
+
else:
|
| 2284 |
+
return best_equation["jax_format"]
|
| 2285 |
|
| 2286 |
def pytorch(self, index=None):
|
| 2287 |
"""
|
|
|
|
| 2309 |
self.set_params(output_torch_format=True)
|
| 2310 |
self.refresh()
|
| 2311 |
best_equation = self.get_best(index=index)
|
| 2312 |
+
if isinstance(best_equation, list):
|
| 2313 |
return [eq["torch_format"] for eq in best_equation]
|
| 2314 |
+
else:
|
| 2315 |
+
return best_equation["torch_format"]
|
| 2316 |
|
| 2317 |
def _read_equation_file(self):
|
| 2318 |
"""Read the hall of fame file created by `SymbolicRegression.jl`."""
|
|
|
|
| 2411 |
lastComplexity = 0
|
| 2412 |
sympy_format = []
|
| 2413 |
lambda_format = []
|
| 2414 |
+
jax_format = []
|
| 2415 |
+
torch_format = []
|
|
|
|
|
|
|
| 2416 |
|
| 2417 |
for _, eqn_row in output.iterrows():
|
| 2418 |
eqn = pysr2sympy(
|
|
|
|
| 2524 |
"""
|
| 2525 |
self.refresh()
|
| 2526 |
|
| 2527 |
+
if isinstance(self.equations_, list):
|
| 2528 |
if indices is not None:
|
| 2529 |
assert isinstance(indices, list)
|
| 2530 |
assert isinstance(indices[0], list)
|
|
|
|
| 2533 |
table_string = sympy2multilatextable(
|
| 2534 |
self.equations_, indices=indices, precision=precision, columns=columns
|
| 2535 |
)
|
| 2536 |
+
elif isinstance(self.equations_, pd.DataFrame):
|
| 2537 |
if indices is not None:
|
| 2538 |
assert isinstance(indices, list)
|
| 2539 |
assert isinstance(indices[0], int)
|
|
|
|
| 2541 |
table_string = sympy2latextable(
|
| 2542 |
self.equations_, indices=indices, precision=precision, columns=columns
|
| 2543 |
)
|
| 2544 |
+
else:
|
| 2545 |
+
raise ValueError(
|
| 2546 |
+
"Invalid type for equations_ to pass to `latex_table`. "
|
| 2547 |
+
"Expected a DataFrame or a list of DataFrames."
|
| 2548 |
+
)
|
| 2549 |
|
| 2550 |
+
return with_preamble(table_string)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2551 |
|
| 2552 |
|
| 2553 |
def idx_model_selection(equations: pd.DataFrame, model_selection: str):
|
|
|
|
| 2565 |
f"{model_selection} is not a valid model selection strategy."
|
| 2566 |
)
|
| 2567 |
return chosen_idx
|
| 2568 |
+
|
| 2569 |
+
|
| 2570 |
+
def _mutate_parameter(param_name: str, param_value):
|
| 2571 |
+
if param_name in ["binary_operators", "unary_operators"] and isinstance(
|
| 2572 |
+
param_value, str
|
| 2573 |
+
):
|
| 2574 |
+
return [param_value]
|
| 2575 |
+
|
| 2576 |
+
if param_name == "batch_size" and param_value < 1:
|
| 2577 |
+
warnings.warn(
|
| 2578 |
+
"Given `batch_size` must be greater than or equal to one. "
|
| 2579 |
+
"`batch_size` has been increased to equal one."
|
| 2580 |
+
)
|
| 2581 |
+
return 1
|
| 2582 |
+
|
| 2583 |
+
if (
|
| 2584 |
+
param_name == "progress"
|
| 2585 |
+
and param_value == True
|
| 2586 |
+
and "buffer" not in sys.stdout.__dir__()
|
| 2587 |
+
):
|
| 2588 |
+
warnings.warn(
|
| 2589 |
+
"Note: it looks like you are running in Jupyter. "
|
| 2590 |
+
"The progress bar will be turned off."
|
| 2591 |
+
)
|
| 2592 |
+
return False
|
| 2593 |
+
|
| 2594 |
+
return param_value
|
pysr/test/__main__.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
"""CLI for running PySR's test suite."""
|
|
|
|
| 2 |
import argparse
|
| 3 |
|
| 4 |
from . import *
|
|
|
|
| 1 |
"""CLI for running PySR's test suite."""
|
| 2 |
+
|
| 3 |
import argparse
|
| 4 |
|
| 5 |
from . import *
|
pysr/test/params.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import inspect
|
| 2 |
|
| 3 |
-
from
|
| 4 |
|
| 5 |
DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
|
| 6 |
DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
|
|
|
|
| 1 |
import inspect
|
| 2 |
|
| 3 |
+
from pysr import PySRRegressor
|
| 4 |
|
| 5 |
DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
|
| 6 |
DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
|
pysr/test/test.py
CHANGED
|
@@ -11,12 +11,18 @@ import pandas as pd
|
|
| 11 |
import sympy
|
| 12 |
from sklearn.utils.estimator_checks import check_estimator
|
| 13 |
|
| 14 |
-
from
|
| 15 |
-
from
|
| 16 |
-
from
|
| 17 |
-
from
|
| 18 |
-
from
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
from .params import (
|
| 21 |
DEFAULT_NCYCLES,
|
| 22 |
DEFAULT_NITERATIONS,
|
|
@@ -24,6 +30,11 @@ from .params import (
|
|
| 24 |
DEFAULT_POPULATIONS,
|
| 25 |
)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
class TestPipeline(unittest.TestCase):
|
| 29 |
def setUp(self):
|
|
@@ -171,6 +182,63 @@ class TestPipeline(unittest.TestCase):
|
|
| 171 |
self.assertLessEqual(mse1, 1e-4)
|
| 172 |
self.assertLessEqual(mse2, 1e-4)
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
def test_multioutput_weighted_with_callable_temp_equation(self):
|
| 175 |
X = self.X.copy()
|
| 176 |
y = X[:, [0, 1]] ** 2
|
|
@@ -308,7 +376,10 @@ class TestPipeline(unittest.TestCase):
|
|
| 308 |
"unused_feature": self.rstate.randn(500),
|
| 309 |
}
|
| 310 |
)
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
| 312 |
y = true_fn(X)
|
| 313 |
noise = self.rstate.randn(500) * 0.01
|
| 314 |
y = y + noise
|
|
@@ -367,13 +438,12 @@ class TestPipeline(unittest.TestCase):
|
|
| 367 |
|
| 368 |
def test_load_model(self):
|
| 369 |
"""See if we can load a ran model from the equation file."""
|
| 370 |
-
csv_file_data = """
|
| 371 |
-
Complexity,Loss,Equation
|
| 372 |
1,0.19951081,"1.9762075"
|
| 373 |
3,0.12717344,"(f0 + 1.4724599)"
|
| 374 |
4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
|
| 375 |
# Strip the indents:
|
| 376 |
-
csv_file_data = "\n".join([
|
| 377 |
|
| 378 |
for from_backup in [False, True]:
|
| 379 |
rand_dir = Path(tempfile.mkdtemp())
|
|
@@ -425,12 +495,22 @@ class TestPipeline(unittest.TestCase):
|
|
| 425 |
if os.path.exists(file_to_delete):
|
| 426 |
os.remove(file_to_delete)
|
| 427 |
|
| 428 |
-
pickle_file = rand_dir / "equations.pkl"
|
| 429 |
model3 = PySRRegressor.from_file(
|
| 430 |
model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
|
| 431 |
)
|
| 432 |
np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
|
| 433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
|
| 435 |
def manually_create_model(equations, feature_names=None):
|
| 436 |
if feature_names is None:
|
|
@@ -526,7 +606,7 @@ class TestFeatureSelection(unittest.TestCase):
|
|
| 526 |
X = self.rstate.randn(20000, 5)
|
| 527 |
y = X[:, 2] ** 2 + X[:, 3] ** 2
|
| 528 |
selected = run_feature_selection(X, y, select_k_features=2)
|
| 529 |
-
|
| 530 |
|
| 531 |
def test_feature_selection_handler(self):
|
| 532 |
X = self.rstate.randn(20000, 5)
|
|
@@ -538,8 +618,8 @@ class TestFeatureSelection(unittest.TestCase):
|
|
| 538 |
variable_names=var_names,
|
| 539 |
y=y,
|
| 540 |
)
|
| 541 |
-
|
| 542 |
-
selected_var_names = [var_names[i] for i in selection]
|
| 543 |
self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
|
| 544 |
np.testing.assert_array_equal(
|
| 545 |
np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
|
|
@@ -563,6 +643,105 @@ class TestMiscellaneous(unittest.TestCase):
|
|
| 563 |
test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
|
| 564 |
self.assertEqual(test_pkl_file, str(expected_pkl_file))
|
| 565 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
def test_deprecation(self):
|
| 567 |
"""Ensure that deprecation works as expected.
|
| 568 |
|
|
@@ -705,100 +884,28 @@ class TestMiscellaneous(unittest.TestCase):
|
|
| 705 |
model.get_best()
|
| 706 |
print("Failed", opt["kwargs"])
|
| 707 |
|
| 708 |
-
def
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
temp_equation_file=True,
|
| 713 |
-
procs=0,
|
| 714 |
-
multithreading=False,
|
| 715 |
)
|
| 716 |
-
nout = 3
|
| 717 |
-
X = np.random.randn(100, 2)
|
| 718 |
-
y = np.random.randn(100, nout)
|
| 719 |
-
model.fit(X, y)
|
| 720 |
-
contents = model.equation_file_contents_.copy()
|
| 721 |
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
for i in range(1, nout + 1):
|
| 726 |
-
assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")
|
| 727 |
-
|
| 728 |
-
with tempfile.NamedTemporaryFile() as pickle_file:
|
| 729 |
-
pkl.dump(model, pickle_file)
|
| 730 |
-
pickle_file.seek(0)
|
| 731 |
-
model2 = pkl.load(pickle_file)
|
| 732 |
-
|
| 733 |
-
contents2 = model2.equation_file_contents_
|
| 734 |
-
cols_to_check = ["equation", "loss", "complexity"]
|
| 735 |
-
for frame1, frame2 in zip(contents, contents2):
|
| 736 |
-
pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])
|
| 737 |
-
|
| 738 |
-
y_predictions2 = model2.predict(X)
|
| 739 |
-
np.testing.assert_array_equal(y_predictions, y_predictions2)
|
| 740 |
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
verbosity=0,
|
| 748 |
-
progress=False,
|
| 749 |
-
random_state=0,
|
| 750 |
-
deterministic=True, # Deterministic as tests require this.
|
| 751 |
-
procs=0,
|
| 752 |
-
multithreading=False,
|
| 753 |
-
warm_start=False,
|
| 754 |
-
temp_equation_file=True,
|
| 755 |
-
) # Return early.
|
| 756 |
-
|
| 757 |
-
check_generator = check_estimator(model, generate_only=True)
|
| 758 |
-
exception_messages = []
|
| 759 |
-
for _, check in check_generator:
|
| 760 |
-
if check.func.__name__ == "check_complex_data":
|
| 761 |
-
# We can use complex data, so avoid this check.
|
| 762 |
-
continue
|
| 763 |
-
try:
|
| 764 |
-
with warnings.catch_warnings():
|
| 765 |
-
warnings.simplefilter("ignore")
|
| 766 |
-
check(model)
|
| 767 |
-
print("Passed", check.func.__name__)
|
| 768 |
-
except Exception:
|
| 769 |
-
error_message = str(traceback.format_exc())
|
| 770 |
-
exception_messages.append(
|
| 771 |
-
f"{check.func.__name__}:\n" + error_message + "\n"
|
| 772 |
-
)
|
| 773 |
-
print("Failed", check.func.__name__, "with:")
|
| 774 |
-
# Add a leading tab to error message, which
|
| 775 |
-
# might be multi-line:
|
| 776 |
-
print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
|
| 777 |
-
# If any checks failed don't let the test pass.
|
| 778 |
-
self.assertEqual(len(exception_messages), 0)
|
| 779 |
-
|
| 780 |
-
def test_param_groupings(self):
|
| 781 |
-
"""Test that param_groupings are complete"""
|
| 782 |
-
param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
|
| 783 |
-
if not param_groupings_file.exists():
|
| 784 |
-
return
|
| 785 |
-
|
| 786 |
-
# Read the file, discarding lines ending in ":",
|
| 787 |
-
# and removing leading "\s*-\s*":
|
| 788 |
-
params = []
|
| 789 |
-
with open(param_groupings_file, "r") as f:
|
| 790 |
-
for line in f.readlines():
|
| 791 |
-
if line.strip().endswith(":"):
|
| 792 |
-
continue
|
| 793 |
-
if line.strip().startswith("-"):
|
| 794 |
-
params.append(line.strip()[1:].strip())
|
| 795 |
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
|
| 800 |
-
|
| 801 |
-
self.assertSetEqual(set(params), set(regressor_params))
|
| 802 |
|
| 803 |
|
| 804 |
TRUE_PREAMBLE = "\n".join(
|
|
@@ -932,7 +1039,7 @@ class TestLaTeXTable(unittest.TestCase):
|
|
| 932 |
middle_part_2 = r"""
|
| 933 |
$y_{1} = x_{1}$ & $1$ & $1.32$ & $0.0$ \\
|
| 934 |
$y_{1} = \cos{\left(x_{1} \right)}$ & $2$ & $0.0520$ & $3.23$ \\
|
| 935 |
-
$y_{1} = x_{0}
|
| 936 |
"""
|
| 937 |
true_latex_table_str = "\n\n".join(
|
| 938 |
self.create_true_latex(part, include_score=True)
|
|
@@ -985,7 +1092,7 @@ class TestLaTeXTable(unittest.TestCase):
|
|
| 985 |
middle_part = r"""
|
| 986 |
$y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
|
| 987 |
$y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
|
| 988 |
-
\begin{minipage}{0.8\linewidth} \vspace{-1em} \begin{dmath*} y = x_{0}
|
| 989 |
"""
|
| 990 |
true_latex_table_str = (
|
| 991 |
TRUE_PREAMBLE
|
|
@@ -1039,8 +1146,14 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
| 1039 |
"""This just checks the number of units passed"""
|
| 1040 |
use_custom_variable_names = False
|
| 1041 |
variable_names = None
|
|
|
|
| 1042 |
weights = None
|
| 1043 |
-
args = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1044 |
valid_units = [
|
| 1045 |
(np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
|
| 1046 |
(np.ones((10, 1)), np.ones(10), ["m/s"], None),
|
|
@@ -1148,6 +1261,7 @@ def runtests(just_tests=False):
|
|
| 1148 |
TestBest,
|
| 1149 |
TestFeatureSelection,
|
| 1150 |
TestMiscellaneous,
|
|
|
|
| 1151 |
TestLaTeXTable,
|
| 1152 |
TestDimensionalConstraints,
|
| 1153 |
]
|
|
|
|
| 11 |
import sympy
|
| 12 |
from sklearn.utils.estimator_checks import check_estimator
|
| 13 |
|
| 14 |
+
from pysr import PySRRegressor, install, jl
|
| 15 |
+
from pysr.export_latex import sympy2latex
|
| 16 |
+
from pysr.feature_selection import _handle_feature_selection, run_feature_selection
|
| 17 |
+
from pysr.julia_helpers import init_julia
|
| 18 |
+
from pysr.sr import (
|
| 19 |
+
_check_assertions,
|
| 20 |
+
_process_constraints,
|
| 21 |
+
_suggest_keywords,
|
| 22 |
+
idx_model_selection,
|
| 23 |
+
)
|
| 24 |
+
from pysr.utils import _csv_filename_to_pkl_filename
|
| 25 |
+
|
| 26 |
from .params import (
|
| 27 |
DEFAULT_NCYCLES,
|
| 28 |
DEFAULT_NITERATIONS,
|
|
|
|
| 30 |
DEFAULT_POPULATIONS,
|
| 31 |
)
|
| 32 |
|
| 33 |
+
# Disables local saving:
|
| 34 |
+
os.environ["SYMBOLIC_REGRESSION_IS_TESTING"] = os.environ.get(
|
| 35 |
+
"SYMBOLIC_REGRESSION_IS_TESTING", "true"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
|
| 39 |
class TestPipeline(unittest.TestCase):
|
| 40 |
def setUp(self):
|
|
|
|
| 182 |
self.assertLessEqual(mse1, 1e-4)
|
| 183 |
self.assertLessEqual(mse2, 1e-4)
|
| 184 |
|
| 185 |
+
def test_custom_variable_complexity(self):
|
| 186 |
+
for outer in (True, False):
|
| 187 |
+
for case in (1, 2):
|
| 188 |
+
y = self.X[:, [0, 1]]
|
| 189 |
+
if case == 1:
|
| 190 |
+
kwargs = dict(complexity_of_variables=[2, 3])
|
| 191 |
+
elif case == 2:
|
| 192 |
+
kwargs = dict(complexity_of_variables=2)
|
| 193 |
+
|
| 194 |
+
if outer:
|
| 195 |
+
outer_kwargs = kwargs
|
| 196 |
+
inner_kwargs = dict()
|
| 197 |
+
else:
|
| 198 |
+
outer_kwargs = dict()
|
| 199 |
+
inner_kwargs = kwargs
|
| 200 |
+
|
| 201 |
+
model = PySRRegressor(
|
| 202 |
+
binary_operators=["+"],
|
| 203 |
+
verbosity=0,
|
| 204 |
+
**self.default_test_kwargs,
|
| 205 |
+
early_stop_condition=(
|
| 206 |
+
f"stop_if_{case}(l, c) = l < 1e-8 && c <= {3 if case == 1 else 2}"
|
| 207 |
+
),
|
| 208 |
+
**outer_kwargs,
|
| 209 |
+
)
|
| 210 |
+
model.fit(self.X[:, [0, 1]], y, **inner_kwargs)
|
| 211 |
+
self.assertLessEqual(model.get_best()[0]["loss"], 1e-8)
|
| 212 |
+
self.assertLessEqual(model.get_best()[1]["loss"], 1e-8)
|
| 213 |
+
|
| 214 |
+
self.assertEqual(model.get_best()[0]["complexity"], 2)
|
| 215 |
+
self.assertEqual(
|
| 216 |
+
model.get_best()[1]["complexity"], 3 if case == 1 else 2
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
def test_error_message_custom_variable_complexity(self):
|
| 220 |
+
X = np.ones((10, 2))
|
| 221 |
+
y = np.ones((10,))
|
| 222 |
+
model = PySRRegressor()
|
| 223 |
+
with self.assertRaises(ValueError) as cm:
|
| 224 |
+
model.fit(X, y, complexity_of_variables=[1, 2, 3])
|
| 225 |
+
|
| 226 |
+
self.assertIn(
|
| 227 |
+
"number of elements in `complexity_of_variables`", str(cm.exception)
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
def test_error_message_both_variable_complexity(self):
|
| 231 |
+
X = np.ones((10, 2))
|
| 232 |
+
y = np.ones((10,))
|
| 233 |
+
model = PySRRegressor(complexity_of_variables=[1, 2])
|
| 234 |
+
with self.assertRaises(ValueError) as cm:
|
| 235 |
+
model.fit(X, y, complexity_of_variables=[1, 2, 3])
|
| 236 |
+
|
| 237 |
+
self.assertIn(
|
| 238 |
+
"You cannot set `complexity_of_variables` at both `fit` and `__init__`.",
|
| 239 |
+
str(cm.exception),
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
def test_multioutput_weighted_with_callable_temp_equation(self):
|
| 243 |
X = self.X.copy()
|
| 244 |
y = X[:, [0, 1]] ** 2
|
|
|
|
| 376 |
"unused_feature": self.rstate.randn(500),
|
| 377 |
}
|
| 378 |
)
|
| 379 |
+
|
| 380 |
+
def true_fn(x):
|
| 381 |
+
return np.array(x["T"] + x["x"] ** 2 + 1.323837)
|
| 382 |
+
|
| 383 |
y = true_fn(X)
|
| 384 |
noise = self.rstate.randn(500) * 0.01
|
| 385 |
y = y + noise
|
|
|
|
| 438 |
|
| 439 |
def test_load_model(self):
|
| 440 |
"""See if we can load a ran model from the equation file."""
|
| 441 |
+
csv_file_data = """Complexity,Loss,Equation
|
|
|
|
| 442 |
1,0.19951081,"1.9762075"
|
| 443 |
3,0.12717344,"(f0 + 1.4724599)"
|
| 444 |
4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
|
| 445 |
# Strip the indents:
|
| 446 |
+
csv_file_data = "\n".join([line.strip() for line in csv_file_data.split("\n")])
|
| 447 |
|
| 448 |
for from_backup in [False, True]:
|
| 449 |
rand_dir = Path(tempfile.mkdtemp())
|
|
|
|
| 495 |
if os.path.exists(file_to_delete):
|
| 496 |
os.remove(file_to_delete)
|
| 497 |
|
| 498 |
+
# pickle_file = rand_dir / "equations.pkl"
|
| 499 |
model3 = PySRRegressor.from_file(
|
| 500 |
model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
|
| 501 |
)
|
| 502 |
np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
|
| 503 |
|
| 504 |
+
def test_jl_function_error(self):
|
| 505 |
+
# TODO: Move this to better class
|
| 506 |
+
with self.assertRaises(ValueError) as cm:
|
| 507 |
+
PySRRegressor(unary_operators=["1"]).fit([[1]], [1])
|
| 508 |
+
|
| 509 |
+
self.assertIn(
|
| 510 |
+
"When building `unary_operators`, `'1'` did not return a Julia function",
|
| 511 |
+
str(cm.exception),
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
|
| 515 |
def manually_create_model(equations, feature_names=None):
|
| 516 |
if feature_names is None:
|
|
|
|
| 606 |
X = self.rstate.randn(20000, 5)
|
| 607 |
y = X[:, 2] ** 2 + X[:, 3] ** 2
|
| 608 |
selected = run_feature_selection(X, y, select_k_features=2)
|
| 609 |
+
np.testing.assert_array_equal(selected, [False, False, True, True, False])
|
| 610 |
|
| 611 |
def test_feature_selection_handler(self):
|
| 612 |
X = self.rstate.randn(20000, 5)
|
|
|
|
| 618 |
variable_names=var_names,
|
| 619 |
y=y,
|
| 620 |
)
|
| 621 |
+
np.testing.assert_array_equal(selection, [False, False, True, True, False])
|
| 622 |
+
selected_var_names = [var_names[i] for i in range(5) if selection[i]]
|
| 623 |
self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
|
| 624 |
np.testing.assert_array_equal(
|
| 625 |
np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
|
|
|
|
| 643 |
test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
|
| 644 |
self.assertEqual(test_pkl_file, str(expected_pkl_file))
|
| 645 |
|
| 646 |
+
def test_pickle_with_temp_equation_file(self):
|
| 647 |
+
"""If we have a temporary equation file, unpickle the estimator."""
|
| 648 |
+
model = PySRRegressor(
|
| 649 |
+
populations=int(1 + DEFAULT_POPULATIONS / 5),
|
| 650 |
+
temp_equation_file=True,
|
| 651 |
+
procs=0,
|
| 652 |
+
multithreading=False,
|
| 653 |
+
)
|
| 654 |
+
nout = 3
|
| 655 |
+
X = np.random.randn(100, 2)
|
| 656 |
+
y = np.random.randn(100, nout)
|
| 657 |
+
model.fit(X, y)
|
| 658 |
+
contents = model.equation_file_contents_.copy()
|
| 659 |
+
|
| 660 |
+
y_predictions = model.predict(X)
|
| 661 |
+
|
| 662 |
+
equation_file_base = model.equation_file_
|
| 663 |
+
for i in range(1, nout + 1):
|
| 664 |
+
assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")
|
| 665 |
+
|
| 666 |
+
with tempfile.NamedTemporaryFile() as pickle_file:
|
| 667 |
+
pkl.dump(model, pickle_file)
|
| 668 |
+
pickle_file.seek(0)
|
| 669 |
+
model2 = pkl.load(pickle_file)
|
| 670 |
+
|
| 671 |
+
contents2 = model2.equation_file_contents_
|
| 672 |
+
cols_to_check = ["equation", "loss", "complexity"]
|
| 673 |
+
for frame1, frame2 in zip(contents, contents2):
|
| 674 |
+
pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])
|
| 675 |
+
|
| 676 |
+
y_predictions2 = model2.predict(X)
|
| 677 |
+
np.testing.assert_array_almost_equal(y_predictions, y_predictions2)
|
| 678 |
+
|
| 679 |
+
def test_scikit_learn_compatibility(self):
|
| 680 |
+
"""Test PySRRegressor compatibility with scikit-learn."""
|
| 681 |
+
model = PySRRegressor(
|
| 682 |
+
niterations=int(1 + DEFAULT_NITERATIONS / 10),
|
| 683 |
+
populations=int(1 + DEFAULT_POPULATIONS / 3),
|
| 684 |
+
ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
|
| 685 |
+
verbosity=0,
|
| 686 |
+
progress=False,
|
| 687 |
+
random_state=0,
|
| 688 |
+
deterministic=True, # Deterministic as tests require this.
|
| 689 |
+
procs=0,
|
| 690 |
+
multithreading=False,
|
| 691 |
+
warm_start=False,
|
| 692 |
+
temp_equation_file=True,
|
| 693 |
+
) # Return early.
|
| 694 |
+
|
| 695 |
+
check_generator = check_estimator(model, generate_only=True)
|
| 696 |
+
exception_messages = []
|
| 697 |
+
for _, check in check_generator:
|
| 698 |
+
if check.func.__name__ == "check_complex_data":
|
| 699 |
+
# We can use complex data, so avoid this check.
|
| 700 |
+
continue
|
| 701 |
+
try:
|
| 702 |
+
with warnings.catch_warnings():
|
| 703 |
+
warnings.simplefilter("ignore")
|
| 704 |
+
check(model)
|
| 705 |
+
print("Passed", check.func.__name__)
|
| 706 |
+
except Exception:
|
| 707 |
+
error_message = str(traceback.format_exc())
|
| 708 |
+
exception_messages.append(
|
| 709 |
+
f"{check.func.__name__}:\n" + error_message + "\n"
|
| 710 |
+
)
|
| 711 |
+
print("Failed", check.func.__name__, "with:")
|
| 712 |
+
# Add a leading tab to error message, which
|
| 713 |
+
# might be multi-line:
|
| 714 |
+
print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
|
| 715 |
+
# If any checks failed don't let the test pass.
|
| 716 |
+
self.assertEqual(len(exception_messages), 0)
|
| 717 |
+
|
| 718 |
+
def test_param_groupings(self):
|
| 719 |
+
"""Test that param_groupings are complete"""
|
| 720 |
+
param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
|
| 721 |
+
if not param_groupings_file.exists():
|
| 722 |
+
return
|
| 723 |
+
|
| 724 |
+
# Read the file, discarding lines ending in ":",
|
| 725 |
+
# and removing leading "\s*-\s*":
|
| 726 |
+
params = []
|
| 727 |
+
with open(param_groupings_file, "r") as f:
|
| 728 |
+
for line in f.readlines():
|
| 729 |
+
if line.strip().endswith(":"):
|
| 730 |
+
continue
|
| 731 |
+
if line.strip().startswith("-"):
|
| 732 |
+
params.append(line.strip()[1:].strip())
|
| 733 |
+
|
| 734 |
+
regressor_params = [
|
| 735 |
+
p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
|
| 736 |
+
]
|
| 737 |
+
|
| 738 |
+
# Check the sets are equal:
|
| 739 |
+
self.assertSetEqual(set(params), set(regressor_params))
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
class TestHelpMessages(unittest.TestCase):
|
| 743 |
+
"""Test user help messages."""
|
| 744 |
+
|
| 745 |
def test_deprecation(self):
|
| 746 |
"""Ensure that deprecation works as expected.
|
| 747 |
|
|
|
|
| 884 |
model.get_best()
|
| 885 |
print("Failed", opt["kwargs"])
|
| 886 |
|
| 887 |
+
def test_suggest_keywords(self):
|
| 888 |
+
# Easy
|
| 889 |
+
self.assertEqual(
|
| 890 |
+
_suggest_keywords(PySRRegressor, "loss_function"), ["loss_function"]
|
|
|
|
|
|
|
|
|
|
| 891 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 892 |
|
| 893 |
+
# More complex, and with error
|
| 894 |
+
with self.assertRaises(TypeError) as cm:
|
| 895 |
+
model = PySRRegressor(ncyclesperiterationn=5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 896 |
|
| 897 |
+
self.assertIn(
|
| 898 |
+
"`ncyclesperiterationn` is not a valid keyword", str(cm.exception)
|
| 899 |
+
)
|
| 900 |
+
self.assertIn("Did you mean", str(cm.exception))
|
| 901 |
+
self.assertIn("`ncycles_per_iteration`, ", str(cm.exception))
|
| 902 |
+
self.assertIn("`niterations`", str(cm.exception))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 903 |
|
| 904 |
+
# Farther matches (this might need to be changed)
|
| 905 |
+
with self.assertRaises(TypeError) as cm:
|
| 906 |
+
model = PySRRegressor(operators=["+", "-"])
|
| 907 |
|
| 908 |
+
self.assertIn("`unary_operators`, `binary_operators`", str(cm.exception))
|
|
|
|
| 909 |
|
| 910 |
|
| 911 |
TRUE_PREAMBLE = "\n".join(
|
|
|
|
| 1039 |
middle_part_2 = r"""
|
| 1040 |
$y_{1} = x_{1}$ & $1$ & $1.32$ & $0.0$ \\
|
| 1041 |
$y_{1} = \cos{\left(x_{1} \right)}$ & $2$ & $0.0520$ & $3.23$ \\
|
| 1042 |
+
$y_{1} = x_{0} x_{0} x_{1}$ & $5$ & $2.00 \cdot 10^{-15}$ & $10.3$ \\
|
| 1043 |
"""
|
| 1044 |
true_latex_table_str = "\n\n".join(
|
| 1045 |
self.create_true_latex(part, include_score=True)
|
|
|
|
| 1092 |
middle_part = r"""
|
| 1093 |
$y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
|
| 1094 |
$y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
|
| 1095 |
+
\begin{minipage}{0.8\linewidth} \vspace{-1em} \begin{dmath*} y = x_{0} x_{0} x_{0} + x_{0} x_{0} x_{0} x_{0} x_{0} + 3.20 x_{0} - 1.20 x_{1} + x_{1} x_{1} x_{1} + 5.20 \sin{\left(- 2.60 x_{0} + 0.326 \sin{\left(x_{2} \right)} \right)} - \cos{\left(x_{0} x_{1} \right)} + \cos{\left(x_{0} x_{0} x_{0} + 3.20 x_{0} - 1.20 x_{1} + x_{1} x_{1} x_{1} + \cos{\left(x_{0} x_{1} \right)} \right)} \end{dmath*} \end{minipage} & $30$ & $1.12 \cdot 10^{-15}$ & $1.09$ \\
|
| 1096 |
"""
|
| 1097 |
true_latex_table_str = (
|
| 1098 |
TRUE_PREAMBLE
|
|
|
|
| 1146 |
"""This just checks the number of units passed"""
|
| 1147 |
use_custom_variable_names = False
|
| 1148 |
variable_names = None
|
| 1149 |
+
complexity_of_variables = 1
|
| 1150 |
weights = None
|
| 1151 |
+
args = (
|
| 1152 |
+
use_custom_variable_names,
|
| 1153 |
+
variable_names,
|
| 1154 |
+
complexity_of_variables,
|
| 1155 |
+
weights,
|
| 1156 |
+
)
|
| 1157 |
valid_units = [
|
| 1158 |
(np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
|
| 1159 |
(np.ones((10, 1)), np.ones(10), ["m/s"], None),
|
|
|
|
| 1261 |
TestBest,
|
| 1262 |
TestFeatureSelection,
|
| 1263 |
TestMiscellaneous,
|
| 1264 |
+
TestHelpMessages,
|
| 1265 |
TestLaTeXTable,
|
| 1266 |
TestDimensionalConstraints,
|
| 1267 |
]
|
pysr/test/test_jax.py
CHANGED
|
@@ -5,27 +5,29 @@ import numpy as np
|
|
| 5 |
import pandas as pd
|
| 6 |
import sympy
|
| 7 |
|
| 8 |
-
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class TestJAX(unittest.TestCase):
|
| 12 |
def setUp(self):
|
| 13 |
np.random.seed(0)
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
def test_sympy2jax(self):
|
| 16 |
-
from jax import numpy as jnp
|
| 17 |
from jax import random
|
| 18 |
|
| 19 |
x, y, z = sympy.symbols("x y z")
|
| 20 |
cosx = 1.0 * sympy.cos(x) + y
|
| 21 |
key = random.PRNGKey(0)
|
| 22 |
X = random.normal(key, (1000, 2))
|
| 23 |
-
true = 1.0 * jnp.cos(X[:, 0]) + X[:, 1]
|
| 24 |
f, params = sympy2jax(cosx, [x, y, z])
|
| 25 |
-
self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
|
| 26 |
|
| 27 |
def test_pipeline_pandas(self):
|
| 28 |
-
from jax import numpy as jnp
|
| 29 |
|
| 30 |
X = pd.DataFrame(np.random.randn(100, 10))
|
| 31 |
y = np.ones(X.shape[0])
|
|
@@ -52,14 +54,12 @@ class TestJAX(unittest.TestCase):
|
|
| 52 |
jformat = model.jax()
|
| 53 |
|
| 54 |
np.testing.assert_almost_equal(
|
| 55 |
-
np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
|
| 56 |
np.square(np.cos(X.values[:, 1])), # Select feature 1
|
| 57 |
decimal=3,
|
| 58 |
)
|
| 59 |
|
| 60 |
def test_pipeline(self):
|
| 61 |
-
from jax import numpy as jnp
|
| 62 |
-
|
| 63 |
X = np.random.randn(100, 10)
|
| 64 |
y = np.ones(X.shape[0])
|
| 65 |
model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True)
|
|
@@ -81,15 +81,46 @@ class TestJAX(unittest.TestCase):
|
|
| 81 |
jformat = model.jax()
|
| 82 |
|
| 83 |
np.testing.assert_almost_equal(
|
| 84 |
-
np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
|
| 85 |
np.square(np.cos(X[:, 1])), # Select feature 1
|
| 86 |
decimal=3,
|
| 87 |
)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
def test_feature_selection_custom_operators(self):
|
| 90 |
rstate = np.random.RandomState(0)
|
| 91 |
X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
| 93 |
y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
|
| 94 |
|
| 95 |
model = PySRRegressor(
|
|
|
|
| 5 |
import pandas as pd
|
| 6 |
import sympy
|
| 7 |
|
| 8 |
+
import pysr
|
| 9 |
+
from pysr import PySRRegressor, sympy2jax
|
| 10 |
|
| 11 |
|
| 12 |
class TestJAX(unittest.TestCase):
|
| 13 |
def setUp(self):
|
| 14 |
np.random.seed(0)
|
| 15 |
+
from jax import numpy as jnp
|
| 16 |
+
|
| 17 |
+
self.jnp = jnp
|
| 18 |
|
| 19 |
def test_sympy2jax(self):
|
|
|
|
| 20 |
from jax import random
|
| 21 |
|
| 22 |
x, y, z = sympy.symbols("x y z")
|
| 23 |
cosx = 1.0 * sympy.cos(x) + y
|
| 24 |
key = random.PRNGKey(0)
|
| 25 |
X = random.normal(key, (1000, 2))
|
| 26 |
+
true = 1.0 * self.jnp.cos(X[:, 0]) + X[:, 1]
|
| 27 |
f, params = sympy2jax(cosx, [x, y, z])
|
| 28 |
+
self.assertTrue(self.jnp.all(self.jnp.isclose(f(X, params), true)).item())
|
| 29 |
|
| 30 |
def test_pipeline_pandas(self):
|
|
|
|
| 31 |
|
| 32 |
X = pd.DataFrame(np.random.randn(100, 10))
|
| 33 |
y = np.ones(X.shape[0])
|
|
|
|
| 54 |
jformat = model.jax()
|
| 55 |
|
| 56 |
np.testing.assert_almost_equal(
|
| 57 |
+
np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])),
|
| 58 |
np.square(np.cos(X.values[:, 1])), # Select feature 1
|
| 59 |
decimal=3,
|
| 60 |
)
|
| 61 |
|
| 62 |
def test_pipeline(self):
|
|
|
|
|
|
|
| 63 |
X = np.random.randn(100, 10)
|
| 64 |
y = np.ones(X.shape[0])
|
| 65 |
model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True)
|
|
|
|
| 81 |
jformat = model.jax()
|
| 82 |
|
| 83 |
np.testing.assert_almost_equal(
|
| 84 |
+
np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])),
|
| 85 |
np.square(np.cos(X[:, 1])), # Select feature 1
|
| 86 |
decimal=3,
|
| 87 |
)
|
| 88 |
|
| 89 |
+
def test_avoid_simplification(self):
|
| 90 |
+
ex = pysr.export_sympy.pysr2sympy(
|
| 91 |
+
"square(exp(sign(0.44796443))) + 1.5 * x1",
|
| 92 |
+
feature_names_in=["x1"],
|
| 93 |
+
extra_sympy_mappings={"square": lambda x: x**2},
|
| 94 |
+
)
|
| 95 |
+
f, params = pysr.export_jax.sympy2jax(ex, [sympy.symbols("x1")])
|
| 96 |
+
key = np.random.RandomState(0)
|
| 97 |
+
X = key.randn(10, 1)
|
| 98 |
+
np.testing.assert_almost_equal(
|
| 99 |
+
np.array(f(self.jnp.array(X), params)),
|
| 100 |
+
np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0],
|
| 101 |
+
decimal=3,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
def test_issue_656(self):
|
| 105 |
+
import sympy
|
| 106 |
+
|
| 107 |
+
E_plus_x1 = sympy.exp(1) + sympy.symbols("x1")
|
| 108 |
+
f, params = pysr.export_jax.sympy2jax(E_plus_x1, [sympy.symbols("x1")])
|
| 109 |
+
key = np.random.RandomState(0)
|
| 110 |
+
X = key.randn(10, 1)
|
| 111 |
+
np.testing.assert_almost_equal(
|
| 112 |
+
np.array(f(self.jnp.array(X), params)),
|
| 113 |
+
np.exp(1) + X[:, 0],
|
| 114 |
+
decimal=3,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
def test_feature_selection_custom_operators(self):
|
| 118 |
rstate = np.random.RandomState(0)
|
| 119 |
X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
|
| 120 |
+
|
| 121 |
+
def cos_approx(x):
|
| 122 |
+
return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
|
| 123 |
+
|
| 124 |
y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
|
| 125 |
|
| 126 |
model = PySRRegressor(
|
pysr/test/test_startup.py
CHANGED
|
@@ -9,8 +9,9 @@ from pathlib import Path
|
|
| 9 |
|
| 10 |
import numpy as np
|
| 11 |
|
| 12 |
-
from
|
| 13 |
-
from
|
|
|
|
| 14 |
from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
|
| 15 |
|
| 16 |
|
|
@@ -118,10 +119,6 @@ class TestStartup(unittest.TestCase):
|
|
| 118 |
code="import juliacall; import pysr",
|
| 119 |
msg="juliacall module already imported.",
|
| 120 |
),
|
| 121 |
-
dict(
|
| 122 |
-
code='import os; os.environ["PYSR_AUTOLOAD_EXTENSIONS"] = "foo"; import pysr',
|
| 123 |
-
msg="PYSR_AUTOLOAD_EXTENSIONS environment variable is set",
|
| 124 |
-
),
|
| 125 |
]
|
| 126 |
for warning_test in warning_tests:
|
| 127 |
result = subprocess.run(
|
|
|
|
| 9 |
|
| 10 |
import numpy as np
|
| 11 |
|
| 12 |
+
from pysr import PySRRegressor
|
| 13 |
+
from pysr.julia_import import jl_version
|
| 14 |
+
|
| 15 |
from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
|
| 16 |
|
| 17 |
|
|
|
|
| 119 |
code="import juliacall; import pysr",
|
| 120 |
msg="juliacall module already imported.",
|
| 121 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
]
|
| 123 |
for warning_test in warning_tests:
|
| 124 |
result = subprocess.run(
|
pysr/test/test_torch.py
CHANGED
|
@@ -4,7 +4,8 @@ import numpy as np
|
|
| 4 |
import pandas as pd
|
| 5 |
import sympy
|
| 6 |
|
| 7 |
-
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
class TestTorch(unittest.TestCase):
|
|
@@ -153,10 +154,43 @@ class TestTorch(unittest.TestCase):
|
|
| 153 |
decimal=3,
|
| 154 |
)
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
def test_feature_selection_custom_operators(self):
|
| 157 |
rstate = np.random.RandomState(0)
|
| 158 |
X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
| 160 |
y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
|
| 161 |
|
| 162 |
model = PySRRegressor(
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import sympy
|
| 6 |
|
| 7 |
+
import pysr
|
| 8 |
+
from pysr import PySRRegressor, sympy2torch
|
| 9 |
|
| 10 |
|
| 11 |
class TestTorch(unittest.TestCase):
|
|
|
|
| 154 |
decimal=3,
|
| 155 |
)
|
| 156 |
|
| 157 |
+
def test_avoid_simplification(self):
|
| 158 |
+
# SymPy should not simplify without permission
|
| 159 |
+
torch = self.torch
|
| 160 |
+
ex = pysr.export_sympy.pysr2sympy(
|
| 161 |
+
"square(exp(sign(0.44796443))) + 1.5 * x1",
|
| 162 |
+
# ^ Normally this would become exp1 and require
|
| 163 |
+
# its own mapping
|
| 164 |
+
feature_names_in=["x1"],
|
| 165 |
+
extra_sympy_mappings={"square": lambda x: x**2},
|
| 166 |
+
)
|
| 167 |
+
m = pysr.export_torch.sympy2torch(ex, ["x1"])
|
| 168 |
+
rng = np.random.RandomState(0)
|
| 169 |
+
X = rng.randn(10, 1)
|
| 170 |
+
np.testing.assert_almost_equal(
|
| 171 |
+
m(torch.tensor(X)).detach().numpy(),
|
| 172 |
+
np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0],
|
| 173 |
+
decimal=3,
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
def test_issue_656(self):
|
| 177 |
+
# Should correctly map numeric symbols to floats
|
| 178 |
+
E_plus_x1 = sympy.exp(1) + sympy.symbols("x1")
|
| 179 |
+
m = pysr.export_torch.sympy2torch(E_plus_x1, ["x1"])
|
| 180 |
+
X = np.random.randn(10, 1)
|
| 181 |
+
np.testing.assert_almost_equal(
|
| 182 |
+
m(self.torch.tensor(X)).detach().numpy(),
|
| 183 |
+
np.exp(1) + X[:, 0],
|
| 184 |
+
decimal=3,
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
def test_feature_selection_custom_operators(self):
|
| 188 |
rstate = np.random.RandomState(0)
|
| 189 |
X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
|
| 190 |
+
|
| 191 |
+
def cos_approx(x):
|
| 192 |
+
return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
|
| 193 |
+
|
| 194 |
y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
|
| 195 |
|
| 196 |
model = PySRRegressor(
|
pysr/utils.py
CHANGED
|
@@ -1,10 +1,20 @@
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
from
|
|
|
|
| 5 |
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
if os.path.splitext(csv_filename)[1] == ".pkl":
|
| 9 |
return csv_filename
|
| 10 |
|
|
@@ -53,3 +63,13 @@ def _subscriptify(i: int) -> str:
|
|
| 53 |
For example, 123 -> "βββ".
|
| 54 |
"""
|
| 55 |
return "".join([chr(0x2080 + int(c)) for c in str(i)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import difflib
|
| 2 |
+
import inspect
|
| 3 |
import os
|
| 4 |
import re
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any, List, TypeVar, Union
|
| 7 |
|
| 8 |
+
from numpy import ndarray
|
| 9 |
+
from sklearn.utils.validation import _check_feature_names_in # type: ignore
|
| 10 |
|
| 11 |
+
T = TypeVar("T", bound=Any)
|
| 12 |
|
| 13 |
+
ArrayLike = Union[ndarray, List[T]]
|
| 14 |
+
PathLike = Union[str, Path]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _csv_filename_to_pkl_filename(csv_filename: PathLike) -> PathLike:
|
| 18 |
if os.path.splitext(csv_filename)[1] == ".pkl":
|
| 19 |
return csv_filename
|
| 20 |
|
|
|
|
| 63 |
For example, 123 -> "βββ".
|
| 64 |
"""
|
| 65 |
return "".join([chr(0x2080 + int(c)) for c in str(i)])
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _suggest_keywords(cls, k: str) -> List[str]:
|
| 69 |
+
valid_keywords = [
|
| 70 |
+
param
|
| 71 |
+
for param in inspect.signature(cls.__init__).parameters
|
| 72 |
+
if param not in ["self", "kwargs"]
|
| 73 |
+
]
|
| 74 |
+
suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
|
| 75 |
+
return suggestions
|
requirements.txt
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
sympy>=1.0.0,<2.0.0
|
| 2 |
pandas>=0.21.0,<3.0.0
|
| 3 |
-
numpy>=1.13.0,<
|
| 4 |
scikit_learn>=1.0.0,<2.0.0
|
| 5 |
-
juliacall==0.9.
|
| 6 |
click>=7.0.0,<9.0.0
|
| 7 |
setuptools>=50.0.0
|
| 8 |
-
typing_extensions>=4.0.0,<5.0.0; python_version < "3.8"
|
|
|
|
| 1 |
sympy>=1.0.0,<2.0.0
|
| 2 |
pandas>=0.21.0,<3.0.0
|
| 3 |
+
numpy>=1.13.0,<3.0.0
|
| 4 |
scikit_learn>=1.0.0,<2.0.0
|
| 5 |
+
juliacall==0.9.20
|
| 6 |
click>=7.0.0,<9.0.0
|
| 7 |
setuptools>=50.0.0
|
|
|