Spaces:
Running
Running
Commit
·
0aafc34
1
Parent(s):
181a454
Refactored till file creation
Browse files- pysr/sr.py +114 -90
pysr/sr.py
CHANGED
|
@@ -195,7 +195,6 @@ def pysr(X=None, y=None, weights=None,
|
|
| 195 |
X_filename, dataset_filename, hyperparam_filename, operator_filename, pkg_filename, runfile_filename, tmpdir, \
|
| 196 |
weights_filename, y_filename = set_paths(tempdir)
|
| 197 |
|
| 198 |
-
|
| 199 |
if isinstance(X, pd.DataFrame):
|
| 200 |
variable_names = list(X.columns)
|
| 201 |
X = np.array(X)
|
|
@@ -231,27 +230,99 @@ def pysr(X=None, y=None, weights=None,
|
|
| 231 |
#arbitrary complexity by default
|
| 232 |
handle_constraints(binary_operators, constraints, unary_operators)
|
| 233 |
|
| 234 |
-
constraints_str =
|
| 235 |
-
first = True
|
| 236 |
-
for op in unary_operators:
|
| 237 |
-
val = constraints[op]
|
| 238 |
-
if not first:
|
| 239 |
-
constraints_str += ", "
|
| 240 |
-
constraints_str += f"{val:d}"
|
| 241 |
-
first = False
|
| 242 |
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
-
first = True
|
| 247 |
-
for op in binary_operators:
|
| 248 |
-
tup = constraints[op]
|
| 249 |
-
if not first:
|
| 250 |
-
constraints_str += ", "
|
| 251 |
-
constraints_str += f"({tup[0]:d}, {tup[1]:d})"
|
| 252 |
-
first = False
|
| 253 |
-
constraints_str += "]"
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
|
| 256 |
{constraints_str}
|
| 257 |
const binops = {'[' + ', '.join(binary_operators) + ']'}
|
|
@@ -290,7 +361,6 @@ const warmupMaxsize = {warmupMaxsize:d}
|
|
| 290 |
const limitPowComplexity = {"true" if limitPowComplexity else "false"}
|
| 291 |
const useFrequency = {"true" if useFrequency else "false"}
|
| 292 |
"""
|
| 293 |
-
|
| 294 |
op_runner = ""
|
| 295 |
if len(binary_operators) > 0:
|
| 296 |
op_runner += """
|
|
@@ -301,14 +371,13 @@ const useFrequency = {"true" if useFrequency else "false"}
|
|
| 301 |
end"""
|
| 302 |
for i in range(1, len(binary_operators)):
|
| 303 |
op_runner += f"""
|
| 304 |
-
elseif i === {i+1}
|
| 305 |
@inbounds @simd for j=1:clen
|
| 306 |
x[j] = {binary_operators[i]}(x[j], y[j])
|
| 307 |
end"""
|
| 308 |
op_runner += """
|
| 309 |
end
|
| 310 |
end"""
|
| 311 |
-
|
| 312 |
if len(unary_operators) > 0:
|
| 313 |
op_runner += """
|
| 314 |
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
|
@@ -318,85 +387,40 @@ end"""
|
|
| 318 |
end"""
|
| 319 |
for i in range(1, len(unary_operators)):
|
| 320 |
op_runner += f"""
|
| 321 |
-
elseif i === {i+1}
|
| 322 |
@inbounds @simd for j=1:clen
|
| 323 |
x[j] = {unary_operators[i]}(x[j])
|
| 324 |
end"""
|
| 325 |
op_runner += """
|
| 326 |
end
|
| 327 |
end"""
|
| 328 |
-
|
| 329 |
def_hyperparams += op_runner
|
| 330 |
-
|
| 331 |
-
def_datasets = """using DelimitedFiles"""
|
| 332 |
-
|
| 333 |
-
np.savetxt(X_filename, X, delimiter=',')
|
| 334 |
-
np.savetxt(y_filename, y, delimiter=',')
|
| 335 |
-
if weights is not None:
|
| 336 |
-
np.savetxt(weights_filename, weights, delimiter=',')
|
| 337 |
-
|
| 338 |
-
def_datasets += f"""
|
| 339 |
-
const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
|
| 340 |
-
const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
|
| 341 |
-
|
| 342 |
-
if weights is not None:
|
| 343 |
-
def_datasets += f"""
|
| 344 |
-
const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
|
| 345 |
-
|
| 346 |
if use_custom_variable_names:
|
| 347 |
def_hyperparams += f"""
|
| 348 |
-
const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
|
| 349 |
-
|
| 350 |
-
with open(hyperparam_filename, 'w') as f:
|
| 351 |
-
print(def_hyperparams, file=f)
|
| 352 |
-
|
| 353 |
-
with open(dataset_filename, 'w') as f:
|
| 354 |
-
print(def_datasets, file=f)
|
| 355 |
-
|
| 356 |
-
with open(runfile_filename, 'w') as f:
|
| 357 |
-
print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
|
| 358 |
-
print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
|
| 359 |
-
print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
|
| 360 |
-
print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
|
| 361 |
-
print(f'rmprocs(nprocs)', file=f)
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
command = [
|
| 365 |
-
f'julia', f'-O{julia_optimization:d}',
|
| 366 |
-
f'-p', f'{procs}',
|
| 367 |
-
str(runfile_filename),
|
| 368 |
-
]
|
| 369 |
-
if timeout is not None:
|
| 370 |
-
command = [f'timeout', f'{timeout}'] + command
|
| 371 |
-
|
| 372 |
-
global global_n_features
|
| 373 |
-
global global_equation_file
|
| 374 |
-
global global_variable_names
|
| 375 |
-
global global_extra_sympy_mappings
|
| 376 |
-
|
| 377 |
-
global_n_features = X.shape[1]
|
| 378 |
-
global_equation_file = equation_file
|
| 379 |
-
global_variable_names = variable_names
|
| 380 |
-
global_extra_sympy_mappings = extra_sympy_mappings
|
| 381 |
-
|
| 382 |
-
print("Running on", ' '.join(command))
|
| 383 |
-
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
|
| 384 |
-
try:
|
| 385 |
-
while True:
|
| 386 |
-
line = process.stdout.readline()
|
| 387 |
-
if not line: break
|
| 388 |
-
print(line.decode('utf-8').replace('\n', ''))
|
| 389 |
-
|
| 390 |
-
process.stdout.close()
|
| 391 |
-
process.wait()
|
| 392 |
-
except KeyboardInterrupt:
|
| 393 |
-
print("Killing process... will return when done.")
|
| 394 |
-
process.kill()
|
| 395 |
|
| 396 |
-
if delete_tempfiles:
|
| 397 |
-
shutil.rmtree(tmpdir)
|
| 398 |
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
|
| 401 |
|
| 402 |
def handle_constraints(binary_operators, constraints, unary_operators):
|
|
|
|
| 195 |
X_filename, dataset_filename, hyperparam_filename, operator_filename, pkg_filename, runfile_filename, tmpdir, \
|
| 196 |
weights_filename, y_filename = set_paths(tempdir)
|
| 197 |
|
|
|
|
| 198 |
if isinstance(X, pd.DataFrame):
|
| 199 |
variable_names = list(X.columns)
|
| 200 |
X = np.array(X)
|
|
|
|
| 230 |
#arbitrary complexity by default
|
| 231 |
handle_constraints(binary_operators, constraints, unary_operators)
|
| 232 |
|
| 233 |
+
constraints_str = make_constraints_str(binary_operators, constraints, unary_operators)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
def_hyperparams = make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators,
|
| 236 |
+
constraints_str, def_hyperparams, equation_file, fast_cycle,
|
| 237 |
+
fractionReplacedHof, hofMigration, limitPowComplexity, maxdepth,
|
| 238 |
+
maxsize, migration, nrestarts, operator_filename, parsimony,
|
| 239 |
+
perturbationFactor, populations, procs, shouldOptimizeConstants,
|
| 240 |
+
unary_operators, useFrequency, use_custom_variable_names, variable_names,
|
| 241 |
+
warmupMaxsize, weightAddNode, weightDeleteNode, weightDoNothing,
|
| 242 |
+
weightInsertNode, weightMutateConstant, weightMutateOperator,
|
| 243 |
+
weightRandomize, weightSimplify, weights)
|
| 244 |
+
|
| 245 |
+
def_datasets = make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename)
|
| 246 |
+
|
| 247 |
+
create_julia_files(dataset_filename, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
|
| 248 |
+
ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity)
|
| 249 |
+
|
| 250 |
+
command = [
|
| 251 |
+
f'julia', f'-O{julia_optimization:d}',
|
| 252 |
+
f'-p', f'{procs}',
|
| 253 |
+
str(runfile_filename),
|
| 254 |
+
]
|
| 255 |
+
if timeout is not None:
|
| 256 |
+
command = [f'timeout', f'{timeout}'] + command
|
| 257 |
+
|
| 258 |
+
global global_n_features
|
| 259 |
+
global global_equation_file
|
| 260 |
+
global global_variable_names
|
| 261 |
+
global global_extra_sympy_mappings
|
| 262 |
+
|
| 263 |
+
global_n_features = X.shape[1]
|
| 264 |
+
global_equation_file = equation_file
|
| 265 |
+
global_variable_names = variable_names
|
| 266 |
+
global_extra_sympy_mappings = extra_sympy_mappings
|
| 267 |
+
|
| 268 |
+
print("Running on", ' '.join(command))
|
| 269 |
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
|
| 270 |
+
try:
|
| 271 |
+
while True:
|
| 272 |
+
line = process.stdout.readline()
|
| 273 |
+
if not line: break
|
| 274 |
+
print(line.decode('utf-8').replace('\n', ''))
|
| 275 |
+
|
| 276 |
+
process.stdout.close()
|
| 277 |
+
process.wait()
|
| 278 |
+
except KeyboardInterrupt:
|
| 279 |
+
print("Killing process... will return when done.")
|
| 280 |
+
process.kill()
|
| 281 |
+
|
| 282 |
+
if delete_tempfiles:
|
| 283 |
+
shutil.rmtree(tmpdir)
|
| 284 |
+
|
| 285 |
+
return get_hof()
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def create_julia_files(dataset_filename, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
|
| 289 |
+
ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity):
|
| 290 |
+
with open(hyperparam_filename, 'w') as f:
|
| 291 |
+
print(def_hyperparams, file=f)
|
| 292 |
+
with open(dataset_filename, 'w') as f:
|
| 293 |
+
print(def_datasets, file=f)
|
| 294 |
+
with open(runfile_filename, 'w') as f:
|
| 295 |
+
print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
|
| 296 |
+
print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
|
| 297 |
+
print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
|
| 298 |
+
print(
|
| 299 |
+
f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})',
|
| 300 |
+
file=f)
|
| 301 |
+
print(f'rmprocs(nprocs)', file=f)
|
| 302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
+
def make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename):
|
| 305 |
+
def_datasets = """using DelimitedFiles"""
|
| 306 |
+
np.savetxt(X_filename, X, delimiter=',')
|
| 307 |
+
np.savetxt(y_filename, y, delimiter=',')
|
| 308 |
+
if weights is not None:
|
| 309 |
+
np.savetxt(weights_filename, weights, delimiter=',')
|
| 310 |
+
def_datasets += f"""
|
| 311 |
+
const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
|
| 312 |
+
const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
|
| 313 |
+
if weights is not None:
|
| 314 |
+
def_datasets += f"""
|
| 315 |
+
const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
|
| 316 |
+
return def_datasets
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
|
| 320 |
+
def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
|
| 321 |
+
limitPowComplexity, maxdepth, maxsize, migration, nrestarts, operator_filename,
|
| 322 |
+
parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
|
| 323 |
+
unary_operators, useFrequency, use_custom_variable_names, variable_names, warmupMaxsize, weightAddNode,
|
| 324 |
+
weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
|
| 325 |
+
weightMutateOperator, weightRandomize, weightSimplify, weights):
|
| 326 |
def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
|
| 327 |
{constraints_str}
|
| 328 |
const binops = {'[' + ', '.join(binary_operators) + ']'}
|
|
|
|
| 361 |
const limitPowComplexity = {"true" if limitPowComplexity else "false"}
|
| 362 |
const useFrequency = {"true" if useFrequency else "false"}
|
| 363 |
"""
|
|
|
|
| 364 |
op_runner = ""
|
| 365 |
if len(binary_operators) > 0:
|
| 366 |
op_runner += """
|
|
|
|
| 371 |
end"""
|
| 372 |
for i in range(1, len(binary_operators)):
|
| 373 |
op_runner += f"""
|
| 374 |
+
elseif i === {i + 1}
|
| 375 |
@inbounds @simd for j=1:clen
|
| 376 |
x[j] = {binary_operators[i]}(x[j], y[j])
|
| 377 |
end"""
|
| 378 |
op_runner += """
|
| 379 |
end
|
| 380 |
end"""
|
|
|
|
| 381 |
if len(unary_operators) > 0:
|
| 382 |
op_runner += """
|
| 383 |
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
|
|
|
| 387 |
end"""
|
| 388 |
for i in range(1, len(unary_operators)):
|
| 389 |
op_runner += f"""
|
| 390 |
+
elseif i === {i + 1}
|
| 391 |
@inbounds @simd for j=1:clen
|
| 392 |
x[j] = {unary_operators[i]}(x[j])
|
| 393 |
end"""
|
| 394 |
op_runner += """
|
| 395 |
end
|
| 396 |
end"""
|
|
|
|
| 397 |
def_hyperparams += op_runner
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
if use_custom_variable_names:
|
| 399 |
def_hyperparams += f"""
|
| 400 |
+
const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
|
| 401 |
+
return def_hyperparams
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
|
|
|
|
|
|
| 403 |
|
| 404 |
+
def make_constraints_str(binary_operators, constraints, unary_operators):
|
| 405 |
+
constraints_str = "const una_constraints = ["
|
| 406 |
+
first = True
|
| 407 |
+
for op in unary_operators:
|
| 408 |
+
val = constraints[op]
|
| 409 |
+
if not first:
|
| 410 |
+
constraints_str += ", "
|
| 411 |
+
constraints_str += f"{val:d}"
|
| 412 |
+
first = False
|
| 413 |
+
constraints_str += """]
|
| 414 |
+
const bin_constraints = ["""
|
| 415 |
+
first = True
|
| 416 |
+
for op in binary_operators:
|
| 417 |
+
tup = constraints[op]
|
| 418 |
+
if not first:
|
| 419 |
+
constraints_str += ", "
|
| 420 |
+
constraints_str += f"({tup[0]:d}, {tup[1]:d})"
|
| 421 |
+
first = False
|
| 422 |
+
constraints_str += "]"
|
| 423 |
+
return constraints_str
|
| 424 |
|
| 425 |
|
| 426 |
def handle_constraints(binary_operators, constraints, unary_operators):
|