Spaces:
Running
Running
Commit
·
55b1295
1
Parent(s):
c2b20b6
Full update of benchmark script
Browse files- benchmarks/hyperparamopt.py +136 -88
benchmarks/hyperparamopt.py
CHANGED
|
@@ -2,28 +2,27 @@
|
|
| 2 |
import sys
|
| 3 |
import numpy as np
|
| 4 |
import pickle as pkl
|
|
|
|
| 5 |
import hyperopt
|
| 6 |
from hyperopt import hp, fmin, tpe, Trials
|
| 7 |
-
import pysr
|
| 8 |
-
import time
|
| 9 |
-
|
| 10 |
-
import contextlib
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
@contextlib.contextmanager
|
| 14 |
-
def temp_seed(seed):
|
| 15 |
-
state = np.random.get_state()
|
| 16 |
-
np.random.seed(seed)
|
| 17 |
-
try:
|
| 18 |
-
yield
|
| 19 |
-
finally:
|
| 20 |
-
np.random.set_state(state)
|
| 21 |
-
|
| 22 |
|
| 23 |
# Change the following code to your file
|
| 24 |
################################################################################
|
| 25 |
TRIALS_FOLDER = "trials"
|
| 26 |
NUMBER_TRIALS_PER_RUN = 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def run_trial(args):
|
|
@@ -33,81 +32,136 @@ def run_trial(args):
|
|
| 33 |
:returns: Dict with status and loss from cross-validation
|
| 34 |
|
| 35 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
print("Running on", args)
|
| 38 |
-
args["niterations"] = 100
|
| 39 |
-
args["npop"] = 100
|
| 40 |
-
args["ncyclesperiteration"] = 1000
|
| 41 |
-
args["topn"] = 10
|
| 42 |
-
args["parsimony"] = 0.0
|
| 43 |
-
args["useFrequency"] = True
|
| 44 |
-
args["annealing"] = True
|
| 45 |
-
|
| 46 |
-
if args["npop"] < 20 or args["ncyclesperiteration"] < 3:
|
| 47 |
-
print("Bad parameters")
|
| 48 |
-
return {"status": "ok", "loss": np.inf}
|
| 49 |
-
|
| 50 |
-
args["weightDoNothing"] = 1.0
|
| 51 |
ntrials = 3
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
"(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)",
|
| 60 |
-
"X[:, 0] * np.sin(2*np.pi * (X[:, 1] * X[:, 2] - X[:, 3] / X[:, 4])) + 3.0",
|
| 61 |
-
]
|
| 62 |
|
| 63 |
-
|
| 64 |
-
try:
|
| 65 |
-
local_trials = []
|
| 66 |
-
for i in range(len(eval_str)):
|
| 67 |
-
print(f"Starting test {i}")
|
| 68 |
-
for j in range(ntrials):
|
| 69 |
-
print(f"Starting trial {j}")
|
| 70 |
-
y = eval(eval_str[i])
|
| 71 |
-
trial = pysr.pysr(
|
| 72 |
-
X,
|
| 73 |
-
y,
|
| 74 |
-
procs=4,
|
| 75 |
-
populations=20,
|
| 76 |
-
binary_operators=["plus", "mult", "pow", "div"],
|
| 77 |
-
unary_operators=["cos", "exp", "sin", "logm", "abs"],
|
| 78 |
-
maxsize=25,
|
| 79 |
-
constraints={"pow": (-1, 1)},
|
| 80 |
-
**args,
|
| 81 |
-
)
|
| 82 |
-
if len(trial) == 0:
|
| 83 |
-
raise ValueError
|
| 84 |
-
local_trials.append(
|
| 85 |
-
np.min(trial["MSE"]) ** 0.5 / np.std(eval(eval_str[i - 1]))
|
| 86 |
-
)
|
| 87 |
-
print(f"Test {i} trial {j} with", str(args), f"got {local_trials[-1]}")
|
| 88 |
-
|
| 89 |
-
except ValueError:
|
| 90 |
-
print("Broken", str(args))
|
| 91 |
-
return {"status": "ok", "loss": np.inf} # or 'fail' if nan loss
|
| 92 |
-
loss = np.average(local_trials)
|
| 93 |
print(f"Finished with {loss}", str(args))
|
| 94 |
|
| 95 |
return {"status": "ok", "loss": loss} # or 'fail' if nan loss
|
| 96 |
|
| 97 |
|
| 98 |
-
space =
|
| 99 |
-
"
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
################################################################################
|
| 113 |
|
|
@@ -178,7 +232,7 @@ while True:
|
|
| 178 |
max_evals=n + len(trials.trials),
|
| 179 |
trials=trials,
|
| 180 |
verbose=1,
|
| 181 |
-
rstate=np.random.
|
| 182 |
)
|
| 183 |
except hyperopt.exceptions.AllTrialsFailed:
|
| 184 |
continue
|
|
@@ -188,12 +242,6 @@ while True:
|
|
| 188 |
|
| 189 |
# Merge with empty trials dataset:
|
| 190 |
save_trials = merge_trials(hyperopt_trial, trials.trials[-n:])
|
| 191 |
-
new_fname = (
|
| 192 |
-
TRIALS_FOLDER
|
| 193 |
-
+ "/"
|
| 194 |
-
+ str(np.random.randint(0, sys.maxsize))
|
| 195 |
-
+ str(time.time())
|
| 196 |
-
+ ".pkl"
|
| 197 |
-
)
|
| 198 |
pkl.dump({"trials": save_trials, "n": n}, open(new_fname, "wb"))
|
| 199 |
loaded_fnames.append(new_fname)
|
|
|
|
| 2 |
import sys
|
| 3 |
import numpy as np
|
| 4 |
import pickle as pkl
|
| 5 |
+
from pysr import PySRRegressor
|
| 6 |
import hyperopt
|
| 7 |
from hyperopt import hp, fmin, tpe, Trials
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Change the following code to your file
|
| 10 |
################################################################################
|
| 11 |
TRIALS_FOLDER = "trials"
|
| 12 |
NUMBER_TRIALS_PER_RUN = 1
|
| 13 |
+
timeout_in_seconds = 5 * 60
|
| 14 |
+
|
| 15 |
+
# Test run to compile everything:
|
| 16 |
+
binary_operators = ["*", "/", "+", "-"]
|
| 17 |
+
unary_operators = ["sin", "cos", "exp", "log"]
|
| 18 |
+
julia_project = None
|
| 19 |
+
model = PySRRegressor(
|
| 20 |
+
binary_operators=binary_operators,
|
| 21 |
+
unary_operators=unary_operators,
|
| 22 |
+
timeout_in_seconds=30,
|
| 23 |
+
julia_project=julia_project,
|
| 24 |
+
)
|
| 25 |
+
model.fit(np.random.randn(100, 3), np.random.randn(100))
|
| 26 |
|
| 27 |
|
| 28 |
def run_trial(args):
|
|
|
|
| 32 |
:returns: Dict with status and loss from cross-validation
|
| 33 |
|
| 34 |
"""
|
| 35 |
+
# The arguments which are integers:
|
| 36 |
+
integer_args = [
|
| 37 |
+
"populations",
|
| 38 |
+
"niterations",
|
| 39 |
+
"ncyclesperiteration",
|
| 40 |
+
"npop",
|
| 41 |
+
"topn",
|
| 42 |
+
"maxsize",
|
| 43 |
+
"optimizer_nrestarts",
|
| 44 |
+
"optimizer_iterations",
|
| 45 |
+
]
|
| 46 |
+
# Set these to int types:
|
| 47 |
+
for k, v in args.items():
|
| 48 |
+
if k in integer_args:
|
| 49 |
+
args[k] = int(v)
|
| 50 |
+
|
| 51 |
+
# Duplicate this argument:
|
| 52 |
+
args["tournament_selection_n"] = args["topn"]
|
| 53 |
+
|
| 54 |
+
# Invalid hyperparams:
|
| 55 |
+
invalid = args["npop"] < args["topn"]
|
| 56 |
+
if invalid:
|
| 57 |
+
return dict(status="fail", loss=float("inf"))
|
| 58 |
+
|
| 59 |
+
args["timeout_in_seconds"] = timeout_in_seconds
|
| 60 |
+
args["julia_project"] = julia_project
|
| 61 |
+
args["procs"] = 4
|
| 62 |
+
|
| 63 |
+
# Create the dataset:
|
| 64 |
+
rstate = np.random.RandomState(0)
|
| 65 |
+
X = 3 * rstate.randn(200, 5)
|
| 66 |
+
y = np.cos(2.3 * X[:, 0]) * np.sin(2.3 * X[:, 0] * X[:, 1] * X[:, 2])
|
| 67 |
+
|
| 68 |
+
# Old datasets:
|
| 69 |
+
# eval_str = [
|
| 70 |
+
# "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5",
|
| 71 |
+
# "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)",
|
| 72 |
+
# "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)",
|
| 73 |
+
# "X[:, 0] * np.sin(2*np.pi * (X[:, 1] * X[:, 2] - X[:, 3] / X[:, 4])) + 3.0",
|
| 74 |
+
# ]
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
ntrials = 3
|
| 77 |
+
losses = []
|
| 78 |
+
for i in range(ntrials):
|
| 79 |
+
# Create the model:
|
| 80 |
+
model = PySRRegressor(**args)
|
| 81 |
|
| 82 |
+
# Run the model:
|
| 83 |
+
model.fit(X, y)
|
| 84 |
|
| 85 |
+
# Compute loss:
|
| 86 |
+
cur_loss = float(model.get_best()["loss"])
|
| 87 |
+
losses.append(cur_loss)
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
+
loss = np.median(losses)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
print(f"Finished with {loss}", str(args))
|
| 91 |
|
| 92 |
return {"status": "ok", "loss": loss} # or 'fail' if nan loss
|
| 93 |
|
| 94 |
|
| 95 |
+
space = dict(
|
| 96 |
+
# model_selection="best",
|
| 97 |
+
model_selection=hp.choice("model_selection", ["accuracy"]),
|
| 98 |
+
# binary_operators=None,
|
| 99 |
+
binary_operators=hp.choice("binary_operators", [binary_operators]),
|
| 100 |
+
# unary_operators=None,
|
| 101 |
+
unary_operators=hp.choice("unary_operators", [unary_operators]),
|
| 102 |
+
# populations=100,
|
| 103 |
+
populations=hp.qloguniform("populations", np.log(10), np.log(1000), 1),
|
| 104 |
+
# niterations=4,
|
| 105 |
+
niterations=hp.choice(
|
| 106 |
+
"niterations", [10000]
|
| 107 |
+
), # We will quit automatically based on a clock.
|
| 108 |
+
# ncyclesperiteration=100,
|
| 109 |
+
ncyclesperiteration=hp.qloguniform(
|
| 110 |
+
"ncyclesperiteration", np.log(10), np.log(5000), 1
|
| 111 |
+
),
|
| 112 |
+
# alpha=0.1,
|
| 113 |
+
alpha=hp.loguniform("alpha", np.log(0.0001), np.log(1000)),
|
| 114 |
+
# annealing=False,
|
| 115 |
+
annealing=hp.choice("annealing", [False, True]),
|
| 116 |
+
# fractionReplaced=0.01,
|
| 117 |
+
fractionReplaced=hp.loguniform("fractionReplaced", np.log(0.0001), np.log(0.5)),
|
| 118 |
+
# fractionReplacedHof=0.005,
|
| 119 |
+
fractionReplacedHof=hp.loguniform(
|
| 120 |
+
"fractionReplacedHof", np.log(0.0001), np.log(0.5)
|
| 121 |
+
),
|
| 122 |
+
# npop=100,
|
| 123 |
+
npop=hp.qloguniform("npop", np.log(20), np.log(1000), 1),
|
| 124 |
+
# parsimony=1e-4,
|
| 125 |
+
parsimony=hp.loguniform("parsimony", np.log(0.0001), np.log(0.5)),
|
| 126 |
+
# topn=10,
|
| 127 |
+
topn=hp.qloguniform("topn", np.log(2), np.log(50), 1),
|
| 128 |
+
# weightAddNode=1,
|
| 129 |
+
weightAddNode=hp.loguniform("weightAddNode", np.log(0.0001), np.log(100)),
|
| 130 |
+
# weightInsertNode=3,
|
| 131 |
+
weightInsertNode=hp.loguniform("weightInsertNode", np.log(0.0001), np.log(100)),
|
| 132 |
+
# weightDeleteNode=3,
|
| 133 |
+
weightDeleteNode=hp.loguniform("weightDeleteNode", np.log(0.0001), np.log(100)),
|
| 134 |
+
# weightDoNothing=1,
|
| 135 |
+
weightDoNothing=hp.loguniform("weightDoNothing", np.log(0.0001), np.log(100)),
|
| 136 |
+
# weightMutateConstant=10,
|
| 137 |
+
weightMutateConstant=hp.loguniform(
|
| 138 |
+
"weightMutateConstant", np.log(0.0001), np.log(100)
|
| 139 |
+
),
|
| 140 |
+
# weightMutateOperator=1,
|
| 141 |
+
weightMutateOperator=hp.loguniform(
|
| 142 |
+
"weightMutateOperator", np.log(0.0001), np.log(100)
|
| 143 |
+
),
|
| 144 |
+
# weightRandomize=1,
|
| 145 |
+
weightRandomize=hp.loguniform("weightRandomize", np.log(0.0001), np.log(100)),
|
| 146 |
+
# weightSimplify=0.002,
|
| 147 |
+
weightSimplify=hp.choice("weightSimplify", [0.002]), # One of these is fixed.
|
| 148 |
+
# perturbationFactor=1.0,
|
| 149 |
+
perturbationFactor=hp.loguniform("perturbationFactor", np.log(0.0001), np.log(100)),
|
| 150 |
+
# maxsize=20,
|
| 151 |
+
maxsize=hp.choice("maxsize", [20]),
|
| 152 |
+
# warmupMaxsizeBy=0.0,
|
| 153 |
+
warmupMaxsizeBy=hp.uniform("warmupMaxsizeBy", 0.0, 0.5),
|
| 154 |
+
# useFrequency=True,
|
| 155 |
+
useFrequency=hp.choice("useFrequency", [True, False]),
|
| 156 |
+
# optimizer_nrestarts=3,
|
| 157 |
+
optimizer_nrestarts=hp.quniform("optimizer_nrestarts", 1, 10, 1),
|
| 158 |
+
# optimize_probability=1.0,
|
| 159 |
+
optimize_probability=hp.uniform("optimize_probability", 0.0, 1.0),
|
| 160 |
+
# optimizer_iterations=10,
|
| 161 |
+
optimizer_iterations=hp.quniform("optimizer_iterations", 1, 10, 1),
|
| 162 |
+
# tournament_selection_p=1.0,
|
| 163 |
+
tournament_selection_p=hp.uniform("tournament_selection_p", 0.0, 1.0),
|
| 164 |
+
)
|
| 165 |
|
| 166 |
################################################################################
|
| 167 |
|
|
|
|
| 232 |
max_evals=n + len(trials.trials),
|
| 233 |
trials=trials,
|
| 234 |
verbose=1,
|
| 235 |
+
rstate=np.random.default_rng(np.random.randint(1, 10 ** 6)),
|
| 236 |
)
|
| 237 |
except hyperopt.exceptions.AllTrialsFailed:
|
| 238 |
continue
|
|
|
|
| 242 |
|
| 243 |
# Merge with empty trials dataset:
|
| 244 |
save_trials = merge_trials(hyperopt_trial, trials.trials[-n:])
|
| 245 |
+
new_fname = TRIALS_FOLDER + "/" + str(np.random.randint(0, sys.maxsize)) + ".pkl"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
pkl.dump({"trials": save_trials, "n": n}, open(new_fname, "wb"))
|
| 247 |
loaded_fnames.append(new_fname)
|