Spaces:
Sleeping
Sleeping
Commit
路
5db0d89
1
Parent(s):
8b49600
Overhaul docstrings
Browse files- pysr/sr.py +106 -119
pysr/sr.py
CHANGED
|
@@ -130,125 +130,112 @@ def pysr(X, y, weights=None,
|
|
| 130 |
equations, but you should adjust `niterations`,
|
| 131 |
`binary_operators`, `unary_operators` to your requirements.
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
maxsize
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
variable_names
|
| 206 |
-
|
| 207 |
-
batching
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
batchSize
|
| 211 |
-
select_k_features
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
the temp directory. Deletion is then controlled with the \
|
| 240 |
-
delete_tempfiles argument.
|
| 241 |
-
output_jax_format (bool): Whether to create a 'jax_format' column in the output, \
|
| 242 |
-
containing jax-callable functions and the default parameters in a jax array.
|
| 243 |
-
output_torch_format (bool): Whether to create a 'torch_format' column in the output, \
|
| 244 |
-
containing a torch module with trainable parameters.
|
| 245 |
-
|
| 246 |
-
# Returns
|
| 247 |
-
|
| 248 |
-
equations (pd.DataFrame/list): Results dataframe, \
|
| 249 |
-
giving complexity, MSE, and equations (as strings), as well as functional \
|
| 250 |
-
forms. If list, each element corresponds to a dataframe of equations \
|
| 251 |
-
for each output.
|
| 252 |
"""
|
| 253 |
if binary_operators is None:
|
| 254 |
binary_operators = '+ * - /'.split(' ')
|
|
|
|
| 130 |
equations, but you should adjust `niterations`,
|
| 131 |
`binary_operators`, `unary_operators` to your requirements.
|
| 132 |
|
| 133 |
+
:param X: 2D array. Rows are examples, columns are features. If pandas DataFrame, the columns are used for variable names (so make sure they don't contain spaces).
|
| 134 |
+
:type X: np.ndarray/pandas.DataFrame
|
| 135 |
+
:param y: 1D array (rows are examples) or 2D array (rows are examples, columns are outputs). Putting in a 2D array will trigger a search for equations for each feature of y.
|
| 136 |
+
:type y: np.ndarray
|
| 137 |
+
:param weights: same shape as y. Each element is how to weight the mean-square-error loss for that particular element of y.
|
| 138 |
+
:type weights: np.ndarray
|
| 139 |
+
:param binary_operators: List of strings giving the binary operators in Julia's Base. Default is ["+", "-", "*", "/",].
|
| 140 |
+
:type binary_operators: list
|
| 141 |
+
:param unary_operators: Same but for operators taking a single scalar. Default is [].
|
| 142 |
+
:type unary_operators: list
|
| 143 |
+
:param procs: Number of processes (=number of populations running).
|
| 144 |
+
:type procs: int
|
| 145 |
+
:param loss: String of Julia code specifying the loss function. Can either be a loss from LossFunctions.jl, or your own loss written as a function. Examples of custom written losses include: `myloss(x, y) = abs(x-y)` for non-weighted, or `myloss(x, y, w) = w*abs(x-y)` for weighted. Among the included losses, these are as follows. Regression: `LPDistLoss{P}()`, `L1DistLoss()`, `L2DistLoss()` (mean square), `LogitDistLoss()`, `HuberLoss(d)`, `L1EpsilonInsLoss(系)`, `L2EpsilonInsLoss(系)`, `PeriodicLoss(c)`, `QuantileLoss(蟿)`. Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`, `SmoothedL1HingeLoss(纬)`, `ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
|
| 146 |
+
:type loss: str
|
| 147 |
+
:param populations: Number of populations running.
|
| 148 |
+
:type populations: int
|
| 149 |
+
:param niterations: Number of iterations of the algorithm to run. The best equations are printed, and migrate between populations, at the end of each.
|
| 150 |
+
:type niterations: int
|
| 151 |
+
:param ncyclesperiteration: Number of total mutations to run, per 10 samples of the population, per iteration.
|
| 152 |
+
:type ncyclesperiteration: int
|
| 153 |
+
:param alpha: Initial temperature.
|
| 154 |
+
:type alpha: float
|
| 155 |
+
:param annealing: Whether to use annealing. You should (and it is default).
|
| 156 |
+
:type annealing: bool
|
| 157 |
+
:param fractionReplaced: How much of population to replace with migrating equations from other populations.
|
| 158 |
+
:type fractionReplaced: float
|
| 159 |
+
:param fractionReplacedHof: How much of population to replace with migrating equations from hall of fame.
|
| 160 |
+
:type fractionReplacedHof: float
|
| 161 |
+
:param npop: Number of individuals in each population
|
| 162 |
+
:type npop: int
|
| 163 |
+
:param parsimony: Multiplicative factor for how much to punish complexity.
|
| 164 |
+
:type parsimony: float
|
| 165 |
+
:param migration: Whether to migrate.
|
| 166 |
+
:type migration: bool
|
| 167 |
+
:param hofMigration: Whether to have the hall of fame migrate.
|
| 168 |
+
:type hofMigration: bool
|
| 169 |
+
:param shouldOptimizeConstants: Whether to numerically optimize constants (Nelder-Mead/Newton) at the end of each iteration.
|
| 170 |
+
:type shouldOptimizeConstants: bool
|
| 171 |
+
:param topn: How many top individuals migrate from each population.
|
| 172 |
+
:type topn: int
|
| 173 |
+
:param perturbationFactor: Constants are perturbed by a max factor of (perturbationFactor*T + 1). Either multiplied by this or divided by this.
|
| 174 |
+
:type perturbationFactor: float
|
| 175 |
+
:param weightAddNode: Relative likelihood for mutation to add a node
|
| 176 |
+
:type weightAddNode: float
|
| 177 |
+
:param weightInsertNode: Relative likelihood for mutation to insert a node
|
| 178 |
+
:type weightInsertNode: float
|
| 179 |
+
:param weightDeleteNode: Relative likelihood for mutation to delete a node
|
| 180 |
+
:type weightDeleteNode: float
|
| 181 |
+
:param weightDoNothing: Relative likelihood for mutation to leave the individual
|
| 182 |
+
:type weightDoNothing: float
|
| 183 |
+
:param weightMutateConstant: Relative likelihood for mutation to change the constant slightly in a random direction.
|
| 184 |
+
:type weightMutateConstant: float
|
| 185 |
+
:param weightMutateOperator: Relative likelihood for mutation to swap an operator.
|
| 186 |
+
:type weightMutateOperator: float
|
| 187 |
+
:param weightRandomize: Relative likelihood for mutation to completely delete and then randomly generate the equation
|
| 188 |
+
:type weightRandomize: float
|
| 189 |
+
:param weightSimplify: Relative likelihood for mutation to simplify constant parts by evaluation
|
| 190 |
+
:type weightSimplify: float
|
| 191 |
+
:param timeout: Time in seconds to timeout search
|
| 192 |
+
:type timeout: float
|
| 193 |
+
:param equation_file: Where to save the files (.csv separated by |)
|
| 194 |
+
:type equation_file: str
|
| 195 |
+
:param verbosity: What verbosity level to use. 0 means minimal print statements.
|
| 196 |
+
:type verbosity: int
|
| 197 |
+
:param progress: Whether to use a progress bar instead of printing to stdout.
|
| 198 |
+
:type progress: bool
|
| 199 |
+
:param maxsize: Max size of an equation.
|
| 200 |
+
:type maxsize: int
|
| 201 |
+
:param maxdepth: Max depth of an equation. You can use both maxsize and maxdepth. maxdepth is by default set to = maxsize, which means that it is redundant.
|
| 202 |
+
:type maxdepth: int
|
| 203 |
+
:param fast_cycle: (experimental) - batch over population subsamples. This is a slightly different algorithm than regularized evolution, but does cycles 15% faster. May be algorithmically less efficient.
|
| 204 |
+
:type fast_cycle: bool
|
| 205 |
+
:param variable_names: a list of names for the variables, other than "x0", "x1", etc.
|
| 206 |
+
:type variable_names: list
|
| 207 |
+
:param batching: whether to compare population members on small batches during evolution. Still uses full dataset for comparing against hall of fame.
|
| 208 |
+
:type batching: bool
|
| 209 |
+
:param batchSize: the amount of data to use if doing batching.
|
| 210 |
+
:type batchSize: int
|
| 211 |
+
:param select_k_features: whether to run feature selection in Python using random forests, before passing to the symbolic regression code. None means no feature selection; an int means select that many features.
|
| 212 |
+
:type select_k_features: None/int
|
| 213 |
+
:param warmupMaxsizeBy: whether to slowly increase max size from a small number up to the maxsize (if greater than 0). If greater than 0, says the fraction of training time at which the current maxsize will reach the user-passed maxsize.
|
| 214 |
+
:type warmupMaxsizeBy: float
|
| 215 |
+
:param constraints: dictionary of int (unary) or 2-tuples (binary), this enforces maxsize constraints on the individual arguments of operators. E.g., `'pow': (-1, 1)` says that power laws can have any complexity left argument, but only 1 complexity exponent. Use this to force more interpretable solutions.
|
| 216 |
+
:type constraints: dict
|
| 217 |
+
:param useFrequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
|
| 218 |
+
:type useFrequency: bool
|
| 219 |
+
:param julia_optimization: Optimization level (0, 1, 2, 3)
|
| 220 |
+
:type julia_optimization: int
|
| 221 |
+
:param tempdir: directory for the temporary files
|
| 222 |
+
:type tempdir: str/None
|
| 223 |
+
:param delete_tempfiles: whether to delete the temporary files after finishing
|
| 224 |
+
:type delete_tempfiles: bool
|
| 225 |
+
:param julia_project: a Julia environment location containing a Project.toml (and potentially the source code for SymbolicRegression.jl). Default gives the Python package directory, where a Project.toml file should be present from the install.
|
| 226 |
+
:type julia_project: str/None
|
| 227 |
+
:param user_input: Whether to ask for user input or not for installing (to be used for automated scripts). Will choose to install when asked.
|
| 228 |
+
:type user_input: bool
|
| 229 |
+
:param update: Whether to automatically update Julia packages.
|
| 230 |
+
:type update: bool
|
| 231 |
+
:param temp_equation_file: Whether to put the hall of fame file in the temp directory. Deletion is then controlled with the delete_tempfiles argument.
|
| 232 |
+
:type temp_equation_file: bool
|
| 233 |
+
:param output_jax_format: Whether to create a 'jax_format' column in the output, containing jax-callable functions and the default parameters in a jax array.
|
| 234 |
+
:type output_jax_format: bool
|
| 235 |
+
:param output_torch_format: Whether to create a 'torch_format' column in the output, containing a torch module with trainable parameters.
|
| 236 |
+
:type output_torch_format: bool
|
| 237 |
+
:returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
|
| 238 |
+
:type: pd.DataFrame/list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
"""
|
| 240 |
if binary_operators is None:
|
| 241 |
binary_operators = '+ * - /'.split(' ')
|