Spaces:
Running
Running
Merge pull request #2 from DhananjayAshok/refactoring
Browse filesRefactoring pf sr.py and integration with Miles's changes last night
- pysr/sr.py +252 -182
pysr/sr.py
CHANGED
|
@@ -191,15 +191,9 @@ def pysr(X=None, y=None, weights=None,
|
|
| 191 |
(as strings).
|
| 192 |
|
| 193 |
"""
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
|
| 198 |
-
if maxdepth is None:
|
| 199 |
-
maxdepth = maxsize
|
| 200 |
-
if equation_file is None:
|
| 201 |
-
date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
|
| 202 |
-
equation_file = 'hall_of_fame_' + date_time + '.csv'
|
| 203 |
|
| 204 |
if isinstance(X, pd.DataFrame):
|
| 205 |
variable_names = list(X.columns)
|
|
@@ -210,128 +204,144 @@ def pysr(X=None, y=None, weights=None,
|
|
| 210 |
if len(X.shape) == 1:
|
| 211 |
X = X[:, None]
|
| 212 |
|
| 213 |
-
|
| 214 |
-
assert len(unary_operators) + len(binary_operators) > 0
|
| 215 |
-
assert len(X.shape) == 2
|
| 216 |
-
assert len(y.shape) == 1
|
| 217 |
-
assert X.shape[0] == y.shape[0]
|
| 218 |
-
if weights is not None:
|
| 219 |
-
assert len(weights.shape) == 1
|
| 220 |
-
assert X.shape[0] == weights.shape[0]
|
| 221 |
-
if use_custom_variable_names:
|
| 222 |
-
assert len(variable_names) == X.shape[1]
|
| 223 |
|
| 224 |
-
|
| 225 |
-
selection = run_feature_selection(X, y, select_k_features)
|
| 226 |
-
print(f"Using features {selection}")
|
| 227 |
-
X = X[:, selection]
|
| 228 |
-
|
| 229 |
-
if use_custom_variable_names:
|
| 230 |
-
variable_names = [variable_names[selection[i]] for i in range(len(selection))]
|
| 231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
if populations is None:
|
| 233 |
populations = procs
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
if isinstance(unary_operators, str):
|
| 237 |
-
|
| 238 |
if X is None:
|
| 239 |
-
|
| 240 |
-
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5"
|
| 241 |
-
elif test == 'simple2':
|
| 242 |
-
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/(np.abs(X[:, 0])+1)"
|
| 243 |
-
elif test == 'simple3':
|
| 244 |
-
eval_str = "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)"
|
| 245 |
-
elif test == 'simple4':
|
| 246 |
-
eval_str = "1.0 + 3*X[:, 0]**2 - 0.5*X[:, 0]**3 + 0.1*X[:, 0]**4"
|
| 247 |
-
elif test == 'simple5':
|
| 248 |
-
eval_str = "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)"
|
| 249 |
-
|
| 250 |
-
X = np.random.randn(100, 5)*3
|
| 251 |
-
y = eval(eval_str)
|
| 252 |
-
print("Running on", eval_str)
|
| 253 |
-
|
| 254 |
-
# System-independent paths
|
| 255 |
-
pkg_directory = Path(__file__).parents[1] / 'julia'
|
| 256 |
-
pkg_filename = pkg_directory / "sr.jl"
|
| 257 |
-
operator_filename = pkg_directory / "Operators.jl"
|
| 258 |
-
julia_auxiliaries = [
|
| 259 |
-
"Equation.jl", "ProgramConstants.jl",
|
| 260 |
-
"LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
|
| 261 |
-
"MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
|
| 262 |
-
"HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
|
| 263 |
-
"Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
|
| 264 |
-
"ConstantOptimization.jl"
|
| 265 |
-
]
|
| 266 |
-
julia_auxiliary_filenames = [
|
| 267 |
-
pkg_directory / fname
|
| 268 |
-
for fname in julia_auxiliaries
|
| 269 |
-
]
|
| 270 |
-
|
| 271 |
-
tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
|
| 272 |
-
hyperparam_filename = tmpdir / f'hyperparams.jl'
|
| 273 |
-
dataset_filename = tmpdir / f'dataset.jl'
|
| 274 |
-
auxiliary_filename = tmpdir / f'auxiliary.jl'
|
| 275 |
-
runfile_filename = tmpdir / f'runfile.jl'
|
| 276 |
-
X_filename = tmpdir / "X.csv"
|
| 277 |
-
y_filename = tmpdir / "y.csv"
|
| 278 |
-
weights_filename = tmpdir / "weights.csv"
|
| 279 |
|
| 280 |
def_hyperparams = ""
|
| 281 |
|
| 282 |
# Add pre-defined functions to Julia
|
| 283 |
-
|
| 284 |
-
for i in range(len(op_list)):
|
| 285 |
-
op = op_list[i]
|
| 286 |
-
is_user_defined_operator = '(' in op
|
| 287 |
|
| 288 |
-
if is_user_defined_operator:
|
| 289 |
-
def_hyperparams += op + "\n"
|
| 290 |
-
# Cut off from the first non-alphanumeric char:
|
| 291 |
-
first_non_char = [
|
| 292 |
-
j for j in range(len(op))
|
| 293 |
-
if not (op[j].isalpha() or op[j].isdigit())][0]
|
| 294 |
-
function_name = op[:first_non_char]
|
| 295 |
-
op_list[i] = function_name
|
| 296 |
|
| 297 |
#arbitrary complexity by default
|
| 298 |
-
|
| 299 |
-
if op not in constraints:
|
| 300 |
-
constraints[op] = -1
|
| 301 |
-
for op in binary_operators:
|
| 302 |
-
if op not in constraints:
|
| 303 |
-
constraints[op] = (-1, -1)
|
| 304 |
-
if op in ['plus', 'sub']:
|
| 305 |
-
if constraints[op][0] != constraints[op][1]:
|
| 306 |
-
raise NotImplementedError("You need equal constraints on both sides for - and *, due to simplification strategies.")
|
| 307 |
-
elif op == 'mult':
|
| 308 |
-
# Make sure the complex expression is in the left side.
|
| 309 |
-
if constraints[op][0] == -1:
|
| 310 |
-
continue
|
| 311 |
-
elif constraints[op][1] == -1 or constraints[op][0] < constraints[op][1]:
|
| 312 |
-
constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
|
| 313 |
|
| 314 |
-
constraints_str =
|
| 315 |
-
first = True
|
| 316 |
-
for op in unary_operators:
|
| 317 |
-
val = constraints[op]
|
| 318 |
-
if not first:
|
| 319 |
-
constraints_str += ", "
|
| 320 |
-
constraints_str += f"{val:d}"
|
| 321 |
-
first = False
|
| 322 |
|
| 323 |
-
constraints_str += """]
|
| 324 |
-
const bin_constraints = ["""
|
| 325 |
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
|
| 336 |
{constraints_str}
|
| 337 |
const binops = {'[' + ', '.join(binary_operators) + ']'}
|
|
@@ -370,7 +380,6 @@ const warmupMaxsize = {warmupMaxsize:d}
|
|
| 370 |
const limitPowComplexity = {"true" if limitPowComplexity else "false"}
|
| 371 |
const useFrequency = {"true" if useFrequency else "false"}
|
| 372 |
"""
|
| 373 |
-
|
| 374 |
op_runner = ""
|
| 375 |
if len(binary_operators) > 0:
|
| 376 |
op_runner += """
|
|
@@ -381,14 +390,13 @@ const useFrequency = {"true" if useFrequency else "false"}
|
|
| 381 |
end"""
|
| 382 |
for i in range(1, len(binary_operators)):
|
| 383 |
op_runner += f"""
|
| 384 |
-
elseif i === {i+1}
|
| 385 |
@inbounds @simd for j=1:clen
|
| 386 |
x[j] = {binary_operators[i]}(x[j], y[j])
|
| 387 |
end"""
|
| 388 |
op_runner += """
|
| 389 |
end
|
| 390 |
end"""
|
| 391 |
-
|
| 392 |
if len(unary_operators) > 0:
|
| 393 |
op_runner += """
|
| 394 |
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
|
@@ -398,93 +406,155 @@ end"""
|
|
| 398 |
end"""
|
| 399 |
for i in range(1, len(unary_operators)):
|
| 400 |
op_runner += f"""
|
| 401 |
-
elseif i === {i+1}
|
| 402 |
@inbounds @simd for j=1:clen
|
| 403 |
x[j] = {unary_operators[i]}(x[j])
|
| 404 |
end"""
|
| 405 |
op_runner += """
|
| 406 |
end
|
| 407 |
end"""
|
| 408 |
-
|
| 409 |
def_hyperparams += op_runner
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
-
def_auxiliary = '\n'.join([
|
| 412 |
-
f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
|
| 413 |
-
])
|
| 414 |
-
|
| 415 |
-
def_datasets = """using DelimitedFiles"""
|
| 416 |
-
|
| 417 |
-
np.savetxt(X_filename, X, delimiter=',')
|
| 418 |
-
np.savetxt(y_filename, y, delimiter=',')
|
| 419 |
-
if weights is not None:
|
| 420 |
-
np.savetxt(weights_filename, weights, delimiter=',')
|
| 421 |
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
|
| 426 |
-
if weights is not None:
|
| 427 |
-
def_datasets += f"""
|
| 428 |
-
const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
|
| 429 |
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
-
with open(hyperparam_filename, 'w') as f:
|
| 435 |
-
print(def_hyperparams, file=f)
|
| 436 |
|
| 437 |
-
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
| 439 |
|
| 440 |
-
|
| 441 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
print(f'@everywhere include("{_escape_filename(auxiliary_filename)}")', file=f)
|
| 447 |
-
print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
|
| 448 |
-
print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
|
| 449 |
-
print(f'rmprocs(nprocs)', file=f)
|
| 450 |
|
| 451 |
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
|
| 460 |
-
global global_n_features
|
| 461 |
-
global global_equation_file
|
| 462 |
-
global global_variable_names
|
| 463 |
-
global global_extra_sympy_mappings
|
| 464 |
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
|
| 470 |
-
print("Running on", ' '.join(command))
|
| 471 |
-
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
|
| 472 |
-
try:
|
| 473 |
-
while True:
|
| 474 |
-
line = process.stdout.readline()
|
| 475 |
-
if not line: break
|
| 476 |
-
print(line.decode('utf-8').replace('\n', ''))
|
| 477 |
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
|
| 484 |
-
if delete_tempfiles:
|
| 485 |
-
shutil.rmtree(tmpdir)
|
| 486 |
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
|
| 490 |
def run_feature_selection(X, y, select_k_features):
|
|
|
|
| 191 |
(as strings).
|
| 192 |
|
| 193 |
"""
|
| 194 |
+
raise_depreciation_errors(limitPowComplexity, threads)
|
| 195 |
+
auxiliary_filename, X_filename, dataset_filename, hyperparam_filename, julia_auxiliary_filenames, operator_filename \
|
| 196 |
+
,pkg_filename, runfile_filename, tmpdir, weights_filename, y_filename = set_paths(tempdir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
if isinstance(X, pd.DataFrame):
|
| 199 |
variable_names = list(X.columns)
|
|
|
|
| 204 |
if len(X.shape) == 1:
|
| 205 |
X = X[:, None]
|
| 206 |
|
| 207 |
+
check_assertions(X, binary_operators, unary_operators, use_custom_variable_names, variable_names, weights, y)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
X, variable_names = handle_feature_selection(X, select_k_features, use_custom_variable_names, variable_names, y)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
+
if maxdepth is None:
|
| 212 |
+
maxdepth = maxsize
|
| 213 |
+
if equation_file is None:
|
| 214 |
+
date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
|
| 215 |
+
equation_file = 'hall_of_fame_' + date_time + '.csv'
|
| 216 |
if populations is None:
|
| 217 |
populations = procs
|
| 218 |
+
if isinstance(binary_operators, str):
|
| 219 |
+
binary_operators = [binary_operators]
|
| 220 |
+
if isinstance(unary_operators, str):
|
| 221 |
+
unary_operators = [unary_operators]
|
| 222 |
if X is None:
|
| 223 |
+
X, y = using_test_input(X, test, y)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
def_hyperparams = ""
|
| 226 |
|
| 227 |
# Add pre-defined functions to Julia
|
| 228 |
+
def_hyperparams = predefined_function_addition(binary_operators, def_hyperparams, unary_operators)
|
|
|
|
|
|
|
|
|
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
#arbitrary complexity by default
|
| 232 |
+
handle_constraints(binary_operators, constraints, unary_operators)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
+
constraints_str = make_constraints_str(binary_operators, constraints, unary_operators)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
|
|
|
|
|
|
| 236 |
|
| 237 |
+
def_hyperparams = make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators,
|
| 238 |
+
constraints_str, def_hyperparams, equation_file, fast_cycle,
|
| 239 |
+
fractionReplacedHof, hofMigration, limitPowComplexity, maxdepth,
|
| 240 |
+
maxsize, migration, nrestarts, operator_filename, parsimony,
|
| 241 |
+
perturbationFactor, populations, procs, shouldOptimizeConstants,
|
| 242 |
+
unary_operators, useFrequency, use_custom_variable_names, variable_names,
|
| 243 |
+
warmupMaxsize, weightAddNode, weightDeleteNode, weightDoNothing,
|
| 244 |
+
weightInsertNode, weightMutateConstant, weightMutateOperator,
|
| 245 |
+
weightRandomize, weightSimplify, weights)
|
| 246 |
+
def_auxiliary = make_auxiliary_julia_str(julia_auxiliary_filenames)
|
| 247 |
+
|
| 248 |
+
def_datasets = make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename)
|
| 249 |
|
| 250 |
+
create_julia_files(auxiliary_filename, dataset_filename, def_auxiliary, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
|
| 251 |
+
ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity)
|
| 252 |
+
|
| 253 |
+
final_pysr_process(julia_optimization, procs, runfile_filename, timeout)
|
| 254 |
+
|
| 255 |
+
set_globals(X, equation_file, extra_sympy_mappings, variable_names)
|
| 256 |
+
|
| 257 |
+
if delete_tempfiles:
|
| 258 |
+
shutil.rmtree(tmpdir)
|
| 259 |
+
|
| 260 |
+
return get_hof()
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
def make_auxiliary_julia_str(julia_auxiliary_filenames):
|
| 264 |
+
def_auxiliary = '\n'.join([
|
| 265 |
+
f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
|
| 266 |
+
])
|
| 267 |
+
return def_auxiliary
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def set_globals(X, equation_file, extra_sympy_mappings, variable_names):
|
| 271 |
+
global global_n_features
|
| 272 |
+
global global_equation_file
|
| 273 |
+
global global_variable_names
|
| 274 |
+
global global_extra_sympy_mappings
|
| 275 |
+
global_n_features = X.shape[1]
|
| 276 |
+
global_equation_file = equation_file
|
| 277 |
+
global_variable_names = variable_names
|
| 278 |
+
global_extra_sympy_mappings = extra_sympy_mappings
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def final_pysr_process(julia_optimization, procs, runfile_filename, timeout):
|
| 282 |
+
command = [
|
| 283 |
+
f'julia', f'-O{julia_optimization:d}',
|
| 284 |
+
f'-p', f'{procs}',
|
| 285 |
+
str(runfile_filename),
|
| 286 |
+
]
|
| 287 |
+
if timeout is not None:
|
| 288 |
+
command = [f'timeout', f'{timeout}'] + command
|
| 289 |
+
print("Running on", ' '.join(command))
|
| 290 |
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
|
| 291 |
+
try:
|
| 292 |
+
while True:
|
| 293 |
+
line = process.stdout.readline()
|
| 294 |
+
if not line: break
|
| 295 |
+
print(line.decode('utf-8').replace('\n', ''))
|
| 296 |
+
|
| 297 |
+
process.stdout.close()
|
| 298 |
+
process.wait()
|
| 299 |
+
except KeyboardInterrupt:
|
| 300 |
+
print("Killing process... will return when done.")
|
| 301 |
+
process.kill()
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
def create_julia_files(auxiliary_filename, dataset_filename, def_auxiliary, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
|
| 305 |
+
ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity):
|
| 306 |
+
with open(hyperparam_filename, 'w') as f:
|
| 307 |
+
print(def_hyperparams, file=f)
|
| 308 |
+
with open(dataset_filename, 'w') as f:
|
| 309 |
+
print(def_datasets, file=f)
|
| 310 |
+
with open(auxiliary_filename, 'w') as f:
|
| 311 |
+
print(def_auxiliary, file=f)
|
| 312 |
+
with open(runfile_filename, 'w') as f:
|
| 313 |
+
print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
|
| 314 |
+
print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
|
| 315 |
+
print(f'@everywhere include("{_escape_filename(auxiliary_filename)}")', file=f)
|
| 316 |
+
print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
|
| 317 |
+
print(
|
| 318 |
+
f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})',
|
| 319 |
+
file=f)
|
| 320 |
+
print(f'rmprocs(nprocs)', file=f)
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename):
|
| 324 |
+
def_datasets = """using DelimitedFiles"""
|
| 325 |
+
np.savetxt(X_filename, X, delimiter=',')
|
| 326 |
+
np.savetxt(y_filename, y, delimiter=',')
|
| 327 |
+
if weights is not None:
|
| 328 |
+
np.savetxt(weights_filename, weights, delimiter=',')
|
| 329 |
+
def_datasets += f"""
|
| 330 |
+
const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
|
| 331 |
+
const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
|
| 332 |
+
if weights is not None:
|
| 333 |
+
def_datasets += f"""
|
| 334 |
+
const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
|
| 335 |
+
return def_datasets
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
|
| 339 |
+
def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
|
| 340 |
+
limitPowComplexity, maxdepth, maxsize, migration, nrestarts, operator_filename,
|
| 341 |
+
parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
|
| 342 |
+
unary_operators, useFrequency, use_custom_variable_names, variable_names, warmupMaxsize, weightAddNode,
|
| 343 |
+
weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
|
| 344 |
+
weightMutateOperator, weightRandomize, weightSimplify, weights):
|
| 345 |
def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
|
| 346 |
{constraints_str}
|
| 347 |
const binops = {'[' + ', '.join(binary_operators) + ']'}
|
|
|
|
| 380 |
const limitPowComplexity = {"true" if limitPowComplexity else "false"}
|
| 381 |
const useFrequency = {"true" if useFrequency else "false"}
|
| 382 |
"""
|
|
|
|
| 383 |
op_runner = ""
|
| 384 |
if len(binary_operators) > 0:
|
| 385 |
op_runner += """
|
|
|
|
| 390 |
end"""
|
| 391 |
for i in range(1, len(binary_operators)):
|
| 392 |
op_runner += f"""
|
| 393 |
+
elseif i === {i + 1}
|
| 394 |
@inbounds @simd for j=1:clen
|
| 395 |
x[j] = {binary_operators[i]}(x[j], y[j])
|
| 396 |
end"""
|
| 397 |
op_runner += """
|
| 398 |
end
|
| 399 |
end"""
|
|
|
|
| 400 |
if len(unary_operators) > 0:
|
| 401 |
op_runner += """
|
| 402 |
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
|
|
|
| 406 |
end"""
|
| 407 |
for i in range(1, len(unary_operators)):
|
| 408 |
op_runner += f"""
|
| 409 |
+
elseif i === {i + 1}
|
| 410 |
@inbounds @simd for j=1:clen
|
| 411 |
x[j] = {unary_operators[i]}(x[j])
|
| 412 |
end"""
|
| 413 |
op_runner += """
|
| 414 |
end
|
| 415 |
end"""
|
|
|
|
| 416 |
def_hyperparams += op_runner
|
| 417 |
+
if use_custom_variable_names:
|
| 418 |
+
def_hyperparams += f"""
|
| 419 |
+
const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
|
| 420 |
+
return def_hyperparams
|
| 421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
+
def make_constraints_str(binary_operators, constraints, unary_operators):
|
| 424 |
+
constraints_str = "const una_constraints = ["
|
| 425 |
+
first = True
|
| 426 |
+
for op in unary_operators:
|
| 427 |
+
val = constraints[op]
|
| 428 |
+
if not first:
|
| 429 |
+
constraints_str += ", "
|
| 430 |
+
constraints_str += f"{val:d}"
|
| 431 |
+
first = False
|
| 432 |
+
constraints_str += """]
|
| 433 |
+
const bin_constraints = ["""
|
| 434 |
+
first = True
|
| 435 |
+
for op in binary_operators:
|
| 436 |
+
tup = constraints[op]
|
| 437 |
+
if not first:
|
| 438 |
+
constraints_str += ", "
|
| 439 |
+
constraints_str += f"({tup[0]:d}, {tup[1]:d})"
|
| 440 |
+
first = False
|
| 441 |
+
constraints_str += "]"
|
| 442 |
+
return constraints_str
|
| 443 |
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
+
def handle_constraints(binary_operators, constraints, unary_operators):
|
| 446 |
+
for op in unary_operators:
|
| 447 |
+
if op not in constraints:
|
| 448 |
+
constraints[op] = -1
|
| 449 |
+
for op in binary_operators:
|
| 450 |
+
if op not in constraints:
|
| 451 |
+
constraints[op] = (-1, -1)
|
| 452 |
+
if op in ['plus', 'sub']:
|
| 453 |
+
if constraints[op][0] != constraints[op][1]:
|
| 454 |
+
raise NotImplementedError(
|
| 455 |
+
"You need equal constraints on both sides for - and *, due to simplification strategies.")
|
| 456 |
+
elif op == 'mult':
|
| 457 |
+
# Make sure the complex expression is in the left side.
|
| 458 |
+
if constraints[op][0] == -1:
|
| 459 |
+
continue
|
| 460 |
+
elif constraints[op][1] == -1 or constraints[op][0] < constraints[op][1]:
|
| 461 |
+
constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
|
| 462 |
|
|
|
|
|
|
|
| 463 |
|
| 464 |
+
def predefined_function_addition(binary_operators, def_hyperparams, unary_operators):
|
| 465 |
+
for op_list in [binary_operators, unary_operators]:
|
| 466 |
+
for i in range(len(op_list)):
|
| 467 |
+
op = op_list[i]
|
| 468 |
+
is_user_defined_operator = '(' in op
|
| 469 |
|
| 470 |
+
if is_user_defined_operator:
|
| 471 |
+
def_hyperparams += op + "\n"
|
| 472 |
+
# Cut off from the first non-alphanumeric char:
|
| 473 |
+
first_non_char = [
|
| 474 |
+
j for j in range(len(op))
|
| 475 |
+
if not (op[j].isalpha() or op[j].isdigit())][0]
|
| 476 |
+
function_name = op[:first_non_char]
|
| 477 |
+
op_list[i] = function_name
|
| 478 |
+
return def_hyperparams
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
def using_test_input(X, test, y):
|
| 482 |
+
if test == 'simple1':
|
| 483 |
+
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5"
|
| 484 |
+
elif test == 'simple2':
|
| 485 |
+
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/(np.abs(X[:, 0])+1)"
|
| 486 |
+
elif test == 'simple3':
|
| 487 |
+
eval_str = "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)"
|
| 488 |
+
elif test == 'simple4':
|
| 489 |
+
eval_str = "1.0 + 3*X[:, 0]**2 - 0.5*X[:, 0]**3 + 0.1*X[:, 0]**4"
|
| 490 |
+
elif test == 'simple5':
|
| 491 |
+
eval_str = "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)"
|
| 492 |
+
X = np.random.randn(100, 5) * 3
|
| 493 |
+
y = eval(eval_str)
|
| 494 |
+
print("Running on", eval_str)
|
| 495 |
+
return X, y
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
def handle_feature_selection(X, select_k_features, use_custom_variable_names, variable_names, y):
|
| 499 |
+
if select_k_features is not None:
|
| 500 |
+
selection = run_feature_selection(X, y, select_k_features)
|
| 501 |
+
print(f"Using features {selection}")
|
| 502 |
+
X = X[:, selection]
|
| 503 |
|
| 504 |
+
if use_custom_variable_names:
|
| 505 |
+
variable_names = [variable_names[selection[i]] for i in range(len(selection))]
|
| 506 |
+
return X, variable_names
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
|
| 509 |
+
def set_paths(tempdir):
|
| 510 |
+
# System-independent paths
|
| 511 |
+
pkg_directory = Path(__file__).parents[1] / 'julia'
|
| 512 |
+
pkg_filename = pkg_directory / "sr.jl"
|
| 513 |
+
operator_filename = pkg_directory / "Operators.jl"
|
| 514 |
+
julia_auxiliaries = [
|
| 515 |
+
"Equation.jl", "ProgramConstants.jl",
|
| 516 |
+
"LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
|
| 517 |
+
"MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
|
| 518 |
+
"HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
|
| 519 |
+
"Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
|
| 520 |
+
"ConstantOptimization.jl"
|
| 521 |
+
]
|
| 522 |
+
julia_auxiliary_filenames = [
|
| 523 |
+
pkg_directory / fname
|
| 524 |
+
for fname in julia_auxiliaries
|
| 525 |
+
]
|
| 526 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
|
| 528 |
+
tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
|
| 529 |
+
hyperparam_filename = tmpdir / f'hyperparams.jl'
|
| 530 |
+
dataset_filename = tmpdir / f'dataset.jl'
|
| 531 |
+
auxiliary_filename = tmpdir / f'auxiliary.jl'
|
| 532 |
+
runfile_filename = tmpdir / f'runfile.jl'
|
| 533 |
+
X_filename = tmpdir / "X.csv"
|
| 534 |
+
y_filename = tmpdir / "y.csv"
|
| 535 |
+
weights_filename = tmpdir / "weights.csv"
|
| 536 |
+
return auxiliary_filename, X_filename, dataset_filename, hyperparam_filename, julia_auxiliary_filenames, \
|
| 537 |
+
operator_filename, pkg_filename, runfile_filename, tmpdir, weights_filename, y_filename
|
| 538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
+
def check_assertions(X, binary_operators, unary_operators, use_custom_variable_names, variable_names, weights, y):
|
| 541 |
+
# Check for potential errors before they happen
|
| 542 |
+
assert len(unary_operators) + len(binary_operators) > 0
|
| 543 |
+
assert len(X.shape) == 2
|
| 544 |
+
assert len(y.shape) == 1
|
| 545 |
+
assert X.shape[0] == y.shape[0]
|
| 546 |
+
if weights is not None:
|
| 547 |
+
assert len(weights.shape) == 1
|
| 548 |
+
assert X.shape[0] == weights.shape[0]
|
| 549 |
+
if use_custom_variable_names:
|
| 550 |
+
assert len(variable_names) == X.shape[1]
|
| 551 |
|
|
|
|
|
|
|
| 552 |
|
| 553 |
+
def raise_depreciation_errors(limitPowComplexity, threads):
|
| 554 |
+
if threads is not None:
|
| 555 |
+
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
| 556 |
+
if limitPowComplexity:
|
| 557 |
+
raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
|
| 558 |
|
| 559 |
|
| 560 |
def run_feature_selection(X, y, select_k_features):
|