res_reg_lmnt_awikner.classes
1import pandas as pd 2from tqdm import tqdm 3import numpy as np 4import time 5from itertools import product 6import sys 7import os 8import getopt 9from res_reg_lmnt_awikner.helpers import get_windows_path, get_filename 10from res_reg_lmnt_awikner.lorenzrungekutta_numba import lorenzrungekutta 11from res_reg_lmnt_awikner.ks_etdrk4 import kursiv_predict 12from scipy.sparse.linalg import eigs 13from scipy.sparse import csc_matrix 14 15__docformat__ = "google" 16 17 18class RunOpts: 19 """This class contains the various parameters and options that will be used during the time series data generation, 20 reservoir generation, training, testing, and output.""" 21 def __init__(self, argv=None, 22 runflag=True, 23 train_time=20000, 24 test_time=None, 25 sync_time=2000, 26 discard_time=500, 27 res_size=500, 28 res_per_test=1, 29 noise_realizations=1, 30 num_tests=1, 31 num_trains=1, 32 traintype='normal', 33 noisetype='gaussian', 34 system='KS', 35 savepred=False, 36 save_time_rms=False, 37 squarenodes=True, 38 rho=0.6, 39 sigma=0.1, 40 theta=0.1, 41 leakage=1.0, 42 bias_type='new_random', 43 win_type='full_0centered', 44 debug_mode=False, 45 pmap=False, 46 machine='deepthought2', 47 ifray=False, 48 tau=None, 49 num_cpus=1, 50 metric='mean_rms', 51 return_all=True, 52 save_eigenvals=False, 53 max_valid_time=2000, 54 noise_values_array=np.logspace(-4, 3, num=3, base=10), 55 reg_values=np.append(0., np.logspace(-11, -9, 5)), 56 res_start=0, 57 train_start=0, 58 test_start=0, 59 reg_train_times=None, 60 root_folder=None, 61 prior='zero', 62 save_truth=False): 63 """Initializes the RunOpts class. 64 65 Raises: 66 ValueError: If an input instance variable has an unaccepted value. 67 TypeError: If an input instance variable has an incorrect type.""" 68 self.argv = argv 69 """Command line input for generating a RunOpts object. If left as None, then the object will be generated using 70 the other inputs or defaults. If multiple instances of the same variable are given, the class will default 71 to the command line input. See RunOpts.get_run_opts for more information. Default: None""" 72 self.system = system 73 """String denoting which dynamical system we are obtaining time series data from. Options: 'lorenz' for the 74 Lorenz 63 equations, 'KS' for the Kuramoto-Sivashinsky equation with 64 grid points and a periodicity length of 75 22, and KS_d2175 for the Kuramoto-Sivashinksy equation with 64 grid points and a periodicity length of 21.75. 76 Default: 'KS'""" 77 self.tau = tau 78 """Time between each data point in the time series data. If system = 'lorenz', this value must be evenly divided 79 by 0.01 (the integration time step). If left as None, then the class will set tau to the default value for the 80 particular dynamical system. Default: None""" 81 self.train_time = train_time 82 """Number of training data points. Default: 20000""" 83 self.test_time = test_time 84 """Number of testing data points. If left as None, then the class will default to 4000 if system = 'lorenz', or 85 16000 if system = 'KS' or 'KS_d2175'. Default: None""" 86 self.sync_time = sync_time 87 """Number of data points used to synchronize the reservoir to each test data set. Default: 2000""" 88 self.discard_time = discard_time 89 """Number of data points used to synchronize the reservoir to each training data set. Default: 500""" 90 self.res_size = res_size 91 """Number of nodes in the reservoir. Default: 500""" 92 self.rho = rho 93 """Reservoir spectral radius. Default: 0.6""" 94 self.sigma = sigma 95 """Reservoir input scaling. Default: 0.1""" 96 self.theta = theta 97 """Reservoir input bias scaling. Default: 0.1""" 98 self.leakage = leakage 99 """Reservoir leaking rate. Default: 1.0""" 100 self.squarenodes = squarenodes 101 """Boolean denoting whether or not the squared node states are including in the reservoir feature vector. 102 Default: True""" 103 self.bias_type = bias_type 104 """Type of reservoir input bias to be used. See the Reservoir class for available options. 105 Default: new_random""" 106 self.win_type = win_type 107 """Type of input coupling matrix to be used. See the Reservoir class for available options. 108 Default: full_0centered""" 109 self.traintype = traintype 110 """Type of training to be used to determine the reservoir output coupling matrix. There are a number of options 111 available, but those used in the paper are: 112 113 'normal' - Standard reservoir training, potentially with input noise added. 114 115 'gradientk%d' % (Number of noise steps) - Reservoir training with no noise and LMNT regularization for a number of 116 noise steps > 1, or Jacobian regularization for a number of noise steps = 1. 117 118 'regzerok%d' % (Number of noise steps) - Reservoir training with no noise and LMNT/Jacobian regularization computed 119 using a zero-input and zero reservoir state. 120 121 Default: 'normal' 122 """ 123 self.noisetype = noisetype 124 """Type of noise to be added to the reservoir input during training. Options are 'none' and 'gaussian'. 125 Default: 'none'""" 126 self.noise_values_array = noise_values_array 127 """Numpy array containing the variance of the added input noise (if noisetype = 'gaussian') or the LMNT/Jacobian 128 regularization parameter value (if traintype = 'gradientk%d' or 'regzerok%d'). Each value contained in the array 129 will be tested separately using each of the reservoirs, training, and testing data sets. 130 Default: np.logspace(-4, 3, num=3, base=10)""" 131 self.reg_train_times = reg_train_times 132 """Numpy array containing the number of training data points to be used to train the LMNT or Jacobian 133 regularization. If left as None, then the class will default to an array containing only 134 the total number of training data points for standard LMNT/Jacobian or the number of noise steps 135 if using zero-input LMNT. Default: None""" 136 self.noise_realizations = noise_realizations 137 """Number of input noise realizations used to train the reservoir (if training with noise). If not training with 138 noise, set to 1. Default: 1""" 139 self.reg_values = reg_values 140 """Numpy array containing the Tikhonov regularization parameter values. Each value contained in the array 141 will be tested separately using each of the reservoirs, training, and testing data sets. 142 Default: np.append(0., np.logspace(-11, -9, 5))""" 143 self.prior = prior 144 """Prior to be used when computing the output coupling matrix using Tikhonov regularization. Options are: 145 146 'zero' - Standard Tikhonov regularization with a zero prior. 147 148 'input_pass' - Tikhonov regularization with a persistence prior (i.e., set the input pass-through weights to 1). 149 150 Default: 'zero'""" 151 self.max_valid_time = max_valid_time 152 """Maximum valid time for each valid time test during the testing period. This should be set so that 153 test_time / max_valid_time is a whole number greater than 0. Default: 2000""" 154 self.res_per_test = res_per_test 155 """Number of random reservoir realizations to test. Default: 1""" 156 self.num_trains = num_trains 157 """Number of independently generated training data sets to test with. Default: 1""" 158 self.num_tests = num_tests 159 """Number of independently generated testing data sets to test with. Default: 1""" 160 self.res_start = res_start 161 """Starting iterate for generating the random seeds that are used to generate the reservoir. Default: 0""" 162 self.train_start = train_start 163 """Starting iterate for generating the random seeds that are used to generate the training data sets. 164 Default: 0""" 165 self.test_start = test_start 166 """Starting iterate for generating the random seeds that are used to generate the testing data sets. 167 Default: 0""" 168 self.root_folder = root_folder 169 """Location where output data will be stored in the Data folder. If None, then defaults to the current working 170 directory. Default: None""" 171 self.return_all = return_all 172 """Boolean for determining of all results should be returned, or only the results with the obtained using the 173 "best" Tikhonov regularization parameter value based on the selected metric. Default: True""" 174 self.metric = metric 175 """Metric for determining the "best" results. Not used if return_all = True. Options include 'mean_rms', 'max_rms', 176 and 'stable_frac'. Caution: Some options may be deprecated. Default: 'mss-var'""" 177 self.savepred = savepred 178 """Boolean for determining if reservoir prediction time series should be saved. Default: False""" 179 self.save_time_rms = save_time_rms 180 """Boolean for determining if reservoir prediction RMS error should be saved. Default: False""" 181 self.pmap = pmap 182 """Boolean for determining if reservoir prediction Poincare maximum map should be saved. Default: False""" 183 self.save_eigenvals = save_eigenvals 184 """Boolean for determining if the eigenvalues of the LMNT/Jacobian regularization matrices should be saved. 185 Default: False""" 186 self.save_truth = save_truth 187 """Boolean for determining if the true testing data should be saved. Default: False""" 188 self.ifray = ifray 189 """Boolean for determining if ray should be used to compute results for multiple reservoirs and training 190 data sets. Default: False""" 191 self.num_cpus = num_cpus 192 """If using ray for paralellization, this sets the number of cpus to be used. Default: 1""" 193 self.machine = machine 194 """Machine which results are computed on. Leave as personal unless you are connecting to a ray cluster 195 elsewhere. Default: 'personal'""" 196 self.runflag = runflag 197 """True indicates that we are about to compute results, and the appropriate directories should be created. 198 Otherwise, we do not create additional directories. Default: True""" 199 self.debug_mode = debug_mode 200 """Boolean for determining if errors during reservoir training which could arise from non-convergence of the 201 eigenvalue solver should be suppressed. If left as False, will suppress errors im much of the core code, 202 so this should be set to True if making changes. Default: False""" 203 if not isinstance(argv, type(None)): 204 self.get_run_opts(argv) 205 if isinstance(self.tau, type(None)): 206 if self.system == 'lorenz': 207 self.tau = 0.1 208 elif self.system in ['KS', 'KS_d2175']: 209 self.tau = 0.25 210 if isinstance(self.test_time, type(None)): 211 if self.system == 'lorenz': 212 self.test_time = 4000 213 elif 'KS' in self.system: 214 self.test_time = 16000 215 if not isinstance(self.reg_train_times, np.ndarray): 216 if isinstance(self.reg_train_times, type(None)): 217 self.reg_train_times = np.array([self.train_time]) 218 elif isinstance(self.reg_train_times, int): 219 self.reg_train_times = np.array([self.reg_train_times]) 220 else: 221 raise TypeError() 222 if isinstance(self.root_folder, type(None)): 223 self.root_folder = os.getcwd() 224 if isinstance(self.reg_train_times, np.ndarray) or isinstance(self.reg_train_times, list): 225 if (self.reg_train_times[0] != self.train_time or len(self.reg_train_times) != 1) and ( 226 self.traintype in ['normal', 'normalres1', 'normalres2', 'rmean', 'rmeanres1', 227 'rmeanres2', 'rplusq', 'rplusqres1', 228 'rplusqres2'] or 'confined' in self.traintype): 229 print(('Traintypes "normal", "rmean", and "rplusq" are not ' 230 'compatible with fractional regularization training.')) 231 raise ValueError 232 if self.prior not in ['zero', 'input_pass']: 233 print('Prior type not recognized.') 234 raise ValueError 235 self.run_file_name = '' 236 self.run_folder_name = '' 237 self.get_file_name() 238 239 def get_file_name(self): 240 """Creates the folder and final data file name for the tests about to be run. 241 Args: 242 self: RunOpts object. 243 Returns: 244 RunOpts object with initialized folder and file name variables. Also creates the aforementioned folder 245 if not already created. 246 """ 247 if self.prior == 'zero': 248 prior_str = '' 249 else: 250 prior_str = '_prior_%s' % self.prior 251 if self.savepred: 252 predflag = '_wpred' 253 else: 254 predflag = '' 255 if self.save_time_rms: 256 timeflag = '_savetime' 257 else: 258 timeflag = '' 259 if self.squarenodes: 260 squarenodes_flag = '_squarenodes' 261 else: 262 squarenodes_flag = '' 263 if self.save_eigenvals: 264 eigenval_flag = '_wmoregradeigs' 265 else: 266 eigenval_flag = '' 267 if self.pmap: 268 pmap_flag = '_wpmap0' 269 else: 270 pmap_flag = '' 271 data_folder_base = os.path.join(self.root_folder, 'Data') 272 if not os.path.isdir(data_folder_base): 273 os.mkdir(data_folder_base) 274 275 if not self.return_all: 276 data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % ( 277 self.system, self.noisetype, self.traintype)) 278 run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f' 279 '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s_metric_%s') \ 280 % (self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho, 281 self.sigma, self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, 282 self.train_time, self.noise_realizations, self.noisetype, self.traintype, prior_str, 283 self.metric) 284 else: 285 data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % ( 286 self.system, self.noisetype, self.traintype)) 287 run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f' 288 '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s') % ( 289 self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho, 290 self.sigma, 291 self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, self.train_time, 292 self.noise_realizations, self.noisetype, self.traintype, prior_str) 293 294 if self.runflag: 295 if not os.path.isdir(data_folder): 296 os.mkdir(data_folder) 297 if not os.path.isdir(os.path.join(data_folder, run_name + '_folder')): 298 os.mkdir(os.path.join(data_folder, run_name + '_folder')) 299 self.run_file_name = os.path.join(data_folder, run_name + '.bz2') 300 self.run_folder_name = os.path.join(data_folder, run_name + '_folder') 301 302 def get_run_opts(self): 303 """Processes the command line input into instance variables. 304 Args: 305 self: RunOpts object. 306 Returns: 307 RunOpts object with instance variables set from command line input. 308 Raises: 309 GetoptError: Raises an error of command line arguments no recognized.""" 310 try: 311 opts, args = getopt.getopt(self.argv, "T:N:r:", 312 ['testtime=', 'noisetype=', 'traintype=', 'system=', 'res=', 313 'tests=', 'trains=', 'savepred=', 'tau=', 'rho=', 314 'sigma=', 'theta=','leakage=', 'bias_type=', 'debug=', 'win_type=', 315 'machine=', 'num_cpus=', 'pmap=', 'parallel=', 'metric=', 'returnall=', 316 'savetime=', 'saveeigenvals=', 'noisevals=', 'regvals=', 'maxvt=', 317 'resstart=', 'trainstart=', 'teststart=', 318 'squarenodes=', 'regtraintimes=', 'discardlen=', 319 'prior=', 'synctime=', 'datarootdir=', 'savetruth=']) 320 except getopt.GetoptError: 321 print('Error: Some options not recognized') 322 sys.exit(2) 323 for opt, arg in opts: 324 if opt == '-T': 325 self.train_time = int(arg) 326 print('Training iterations: %d' % self.train_time) 327 elif opt == '-N': 328 self.res_size = int(arg) 329 print('Reservoir nodes: %d' % self.res_size) 330 elif opt == '-r': 331 self.noise_realizations = int(arg) 332 print('Noise Realizations: %d' % self.noise_realizations) 333 elif opt == '--savetruth': 334 if arg == 'True': 335 self.save_truth = True 336 elif arg == 'False': 337 self.save_truth = False 338 else: 339 raise ValueError 340 elif opt == '--datarootdir': 341 self.root_folder = str(arg) 342 print('Root directory for data: %s' % self.root_folder) 343 elif opt == '--synctime': 344 self.sync_time = int(arg) 345 print('Sync time: %d' % self.sync_time) 346 elif opt == '--testtime': 347 self.test_time = int(arg) 348 if self.test_time == 0: 349 print('Testing duration: default') 350 else: 351 print('Testing duration: %d' % self.test_time) 352 elif opt == '--resstart': 353 self.res_start = int(arg) 354 print('Reservoir ensemble start: %d' % self.res_start) 355 elif opt == '--trainstart': 356 self.train_start = int(arg) 357 print('Train ensemble start: %d' % self.train_start) 358 elif opt == '--teststart': 359 self.test_start = int(arg) 360 print('Test ensemble start: %d' % self.test_start) 361 elif opt == '--saveeigenvals': 362 if arg == 'True': 363 self.save_eigenvals = True 364 elif arg == 'False': 365 self.save_eigenvals = False 366 else: 367 raise ValueError 368 print('Save grad reg eigenvalues: %s' % arg) 369 elif opt == '--prior': 370 self.prior = str(arg) 371 print('Prior type: %s' % self.prior) 372 elif opt == '--discardlen': 373 self.discard_time = int(arg) 374 print('Discard iterations: %d' % self.discard_time) 375 elif opt == '--squarenodes': 376 if arg == 'True': 377 self.squarenodes = True 378 elif arg == 'False': 379 self.squarenodes = False 380 else: 381 raise ValueError 382 print('Square reservoir nodes: %s' % arg) 383 elif opt == '--maxvt': 384 self.max_valid_time = int(arg) 385 print('Maximum valid time: %d' % self.max_valid_time) 386 elif opt == '--noisevals': 387 self.noise_values_array = np.array([float(noise) for noise in arg.split(',')]) 388 noise_str = '[ ' 389 for noise in self.noise_values_array: 390 noise_str += '%0.3e, ' % noise 391 noise_str = noise_str[:-2] + ' ]' 392 print('Noise values: %s' % noise_str) 393 elif opt == '--regvals': 394 self.reg_values = np.array([float(reg) for reg in arg.split(',')]) 395 reg_str = '[ ' 396 for reg in self.reg_values: 397 reg_str += '%0.3e, ' % reg 398 reg_str = reg_str[:-2] + ' ]' 399 print('Regularization values: %s' % reg_str) 400 elif opt == '--regtraintimes': 401 if arg != 'None': 402 self.reg_train_times = np.array([int(reg_train) for reg_train in arg.split(',')]) 403 reg_train_str = '[ ' 404 for reg_train in self.reg_train_times: 405 reg_train_str += '%0.3e, ' % reg_train 406 reg_train_str = reg_train_str[:-2] + ' ]' 407 print('Regularization training times: %s' % reg_train_str) 408 elif opt == '--savetime': 409 if str(arg) == 'True': 410 self.save_time_rms = True 411 elif str(arg) == 'False': 412 self.save_time_rms = False 413 else: 414 raise ValueError 415 elif opt == '--metric': 416 self.metric = str(arg) 417 if self.metric not in ['pmap_max_wass_dist', 'mean_rms', 'max_rms', 'mss_var']: 418 raise ValueError 419 print('Stability metric: %s' % self.metric) 420 elif opt == '--returnall': 421 if arg == 'True': 422 self.return_all = True 423 elif arg == 'False': 424 self.return_all = False 425 else: 426 raise ValueError 427 elif opt == '--parallel': 428 parallel_in = str(arg) 429 if parallel_in == 'True': 430 self.ifray = True 431 elif parallel_in == 'False': 432 self.ifray = False 433 else: 434 raise ValueError 435 elif opt == '--pmap': 436 pmap_in = str(arg) 437 if pmap_in == 'True': 438 self.pmap = True 439 elif pmap_in == 'False': 440 self.pmap = False 441 else: 442 raise ValueError 443 elif opt == '--machine': 444 self.machine = str(arg) 445 if self.machine not in ['deepthought2', 'personal']: 446 raise ValueError 447 print('Machine: %s' % self.machine) 448 elif opt == '--num_cpus': 449 self.num_cpus = int(arg) 450 print('Number of CPUS: %d' % self.num_cpus) 451 elif opt == '--rho': 452 self.rho = float(arg) 453 print('Rho: %f' % self.rho) 454 elif opt == '--sigma': 455 self.sigma = float(arg) 456 print('Sigma: %f' % self.sigma) 457 elif opt == '--theta': 458 self.theta = float(arg) 459 print('Theta: %f' % self.theta) 460 elif opt == '--leakage': 461 self.leakage = float(arg) 462 print('Leakage: %f' % self.leakage) 463 elif opt == '--tau': 464 self.tau = float(arg) 465 print('Reservoir timestep: %f' % self.tau) 466 elif opt == '--win_type': 467 self.win_type = str(arg) 468 print('Win Type: %s' % self.win_type) 469 elif opt == '--bias_type': 470 self.bias_type = str(arg) 471 print('Bias Type: %s' % self.bias_type) 472 elif opt == '--res': 473 self.res_per_test = int(arg) 474 print('Number of reservoirs: %d' % self.res_per_test) 475 elif opt == '--tests': 476 self.num_tests = int(arg) 477 print('Number of tests: %d' % self.num_tests) 478 elif opt == '--trains': 479 self.num_trains = int(arg) 480 print('Number of training data sequences: %d' % self.num_trains) 481 elif opt == '--savepred': 482 if arg == 'True': 483 self.savepred = True 484 elif arg == 'False': 485 self.savepred = False 486 print('Saving predictions: %s' % arg) 487 elif opt == '--noisetype': 488 self.noisetype = str(arg) 489 print('Noise type: %s' % self.noisetype) 490 elif opt == '--traintype': 491 self.traintype = str(arg) 492 print('Training type: %s' % self.traintype) 493 elif opt == '--system': 494 self.system = str(arg) 495 print('System: %s' % self.system) 496 elif opt == '--debug': 497 if arg == 'True': 498 self.debug_mode = True 499 elif arg == 'False': 500 self.debug_mode = False 501 print('Debug Mode: %s' % arg) 502 503 504class Reservoir: 505 """Class for initializing and storing the reservoir matrices and internal states.""" 506 def __init__(self, run_opts, res_gen, res_itr, input_size, avg_degree=3): 507 """Initializes the Reservoir object. 508 Args: 509 run_opts: RunOpts object containing parameters used to generate the reservoir. 510 res_gen: A numpy.random.Generator object used to generate the random matrices in the Reservoir. 511 res_itr: Reservoir iteration tag. 512 input_size: Number of elements in reservoir input. 513 avg_degree: Average in-degree of each reservoir node (i.e., the average number of edges that connect into each vertex in the graph). Default: 3 514 Returns: 515 Constructed Reservoir object. 516 Raises: 517 ValueError: If win_type or bias_type is not recognized.""" 518 # Define class for storing reservoir layers generated from input parameters and an input random number generator 519 self.rsvr_size = run_opts.res_size 520 self.res_itr = res_itr 521 522 density = avg_degree / self.rsvr_size 523 524 if run_opts.win_type == 'full_0centered': 525 unnormalized_W = 2 * res_gen.random((self.rsvr_size, self.rsvr_size)) - 1 526 else: 527 unnormalized_W = res_gen.random((self.rsvr_size, self.rsvr_size)) 528 for i in range(unnormalized_W[:, 0].size): 529 for j in range(unnormalized_W[0].size): 530 if res_gen.random(1) > density: 531 unnormalized_W[i][j] = 0 532 533 max_eig = eigs(unnormalized_W, k=1, 534 return_eigenvectors=False, maxiter=10 ** 5, v0=res_gen.random(self.rsvr_size)) 535 536 W_sp = csc_matrix(np.ascontiguousarray( 537 run_opts.rho / np.abs(max_eig[0]) * unnormalized_W)) 538 self.W_data, self.W_indices, self.W_indptr, self.W_shape = \ 539 (W_sp.data, W_sp.indices, W_sp.indptr, np.array(list(W_sp.shape))) 540 # print('Avg. degree of W:') 541 # print(self.W_data.size/rsvr_size) 542 543 # print('Adjacency matrix section:') 544 # print(self.W_data[:4]) 545 546 if run_opts.win_type == 'dense': 547 if run_opts.bias_type != 'new_random': 548 raise ValueError 549 Win = (res_gen.random(self.rsvr_size * (input_size + 1)).reshape(self.rsvr_size, 550 input_size + 1) * 2 - 1) * run_opts.sigma 551 else: 552 if 'full' in run_opts.win_type: 553 input_vars = np.arange(input_size) 554 elif run_opts.win_type == 'x': 555 input_vars = np.array([0]) 556 else: 557 print('Win_type not recognized.') 558 raise ValueError() 559 if run_opts.bias_type == 'old': 560 const_frac = 0.15 561 const_conn = int(self.rsvr_size * const_frac) 562 Win = np.zeros((self.rsvr_size, input_size + 1)) 563 Win[:const_conn, 0] = (res_gen.random( 564 Win[:const_conn, 0].size) * 2 - 1) * run_opts.theta 565 q = int((self.rsvr_size - const_conn) // input_vars.size) 566 for i, var in enumerate(input_vars): 567 Win[const_conn + q * i:const_conn + q * 568 (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 569 elif run_opts.bias_type == 'new_random': 570 Win = np.zeros((self.rsvr_size, input_size + 1)) 571 Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta 572 q = int(self.rsvr_size // input_vars.size) 573 for i, var in enumerate(input_vars): 574 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 575 leftover_nodes = self.rsvr_size - q * input_vars.size 576 for i in range(leftover_nodes): 577 Win[self.rsvr_size - leftover_nodes + i, input_vars[res_gen.integers(input_vars.size)]] = \ 578 (res_gen.random() * 2 - 1) * run_opts.sigma 579 elif run_opts.bias_type == 'new_new_random': 580 Win = np.zeros((self.rsvr_size, input_size + 1)) 581 Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta 582 q = int(self.rsvr_size // input_vars.size) 583 for i, var in enumerate(input_vars): 584 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 585 leftover_nodes = self.rsvr_size - q * input_vars.size 586 var = input_vars[res_gen.choice( 587 input_vars.size, size=leftover_nodes, replace=False)] 588 Win[self.rsvr_size - leftover_nodes:, var + 589 1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma 590 elif run_opts.bias_type == 'new_const': 591 Win = np.zeros((self.rsvr_size, input_size + 1)) 592 Win[:, 0] = run_opts.theta 593 q = int(self.rsvr_size // input_vars.size) 594 for i, var in enumerate(input_vars): 595 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 596 leftover_nodes = self.rsvr_size - q * input_vars.size 597 var = input_vars[res_gen.integers( 598 input_vars.size, size=leftover_nodes)] 599 Win[self.rsvr_size - leftover_nodes:, var + 600 1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma 601 else: 602 print('bias_type not recognized.') 603 raise ValueError() 604 605 Win_sp = csc_matrix(Win) 606 self.Win_data, self.Win_indices, self.Win_indptr, self.Win_shape = \ 607 np.ascontiguousarray(Win_sp.data), \ 608 np.ascontiguousarray(Win_sp.indices), \ 609 np.ascontiguousarray(Win_sp.indptr), \ 610 np.array(list(Win_sp.shape)) 611 612 self.X = (res_gen.random((self.rsvr_size, run_opts.train_time + run_opts.discard_time + 2)) * 2 - 1) 613 self.Wout = np.array([]) 614 self.leakage = run_opts.leakage 615 616 617class ResOutput: 618 """Class for holding the output from a reservoir computer test. Is typically used to save the output from one of 619 the (potentially parallel) runs.""" 620 def __init__(self, run_opts, noise_array): 621 """Creates the ResOutput object. 622 Args: 623 self: ResOutput object 624 run_opts: RunOpts object for the test. 625 noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test. 626 Returns: 627 self: initialized ResOutput object.""" 628 self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 629 dtype=object) 630 self.mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 631 dtype=object) 632 self.max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 633 dtype=object) 634 self.variances_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 635 dtype=object) 636 self.valid_time_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 637 dtype=object) 638 self.rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 639 dtype=object) 640 self.preds_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 641 dtype=object) 642 self.wass_dist_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 643 dtype=object) 644 self.pmap_max_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 645 dtype=object) 646 self.pmap_max_wass_dist_out = np.zeros( 647 (noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), dtype=object) 648 self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 649 dtype=object) 650 self.train_mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 651 dtype=object) 652 self.train_max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 653 dtype=object) 654 self.grad_eigenvals_out = np.zeros((noise_array.size, run_opts.reg_train_times.size), dtype=object) 655 self.pred_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 656 dtype=object) 657 658 def save(self, run_opts, noise_array, res_itr, train_seed, test_idxs): 659 """Saves the data in the ResOutput object to a series of .csv files. 660 Args: 661 self: ResOutput object 662 run_opts: RunOpts object for test. 663 noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test. 664 res_itr: Index for the reservoir iteration used. 665 train_seed: Index for the training data iteration used. 666 test_idxs: Indices for the testing data iterations used. 667 Returns: 668 Saves .csv files.""" 669 for (i, noise_val), (j, reg_train_time) in product(enumerate(noise_array), enumerate(run_opts.reg_train_times)): 670 print((i, j)) 671 stable_frac = np.zeros(run_opts.reg_values.size) 672 for k, array_elem in enumerate(self.stable_frac_out[i, :, j]): 673 stable_frac[k] = array_elem 674 np.savetxt( 675 get_filename(run_opts.run_folder_name, 'stable_frac', res_itr, train_seed, noise_val, reg_train_time), 676 stable_frac, delimiter=',') 677 678 mean_rms = np.zeros((run_opts.reg_values.size, *self.mean_rms_out[i, 0, j].shape)) 679 for k, array_elem in enumerate(self.mean_rms_out[i, :, j]): 680 mean_rms[k] = array_elem 681 np.savetxt( 682 get_filename(run_opts.run_folder_name, 'mean_rms', res_itr, train_seed, noise_val, reg_train_time), 683 mean_rms, delimiter=',') 684 685 max_rms = np.zeros((run_opts.reg_values.size, *self.max_rms_out[i, 0, j].shape)) 686 for k, array_elem in enumerate(self.max_rms_out[i, :, j]): 687 max_rms[k] = array_elem 688 np.savetxt( 689 get_filename(run_opts.run_folder_name, 'max_rms', res_itr, train_seed, noise_val, reg_train_time), 690 max_rms, delimiter=',') 691 692 variances = np.zeros((run_opts.reg_values.size, *self.variances_out[i, 0, j].shape)) 693 for k, array_elem in enumerate(self.variances_out[i, :, j]): 694 variances[k] = array_elem 695 np.savetxt( 696 get_filename(run_opts.run_folder_name, 'variance', res_itr, train_seed, noise_val, reg_train_time), 697 variances, delimiter=',') 698 699 valid_time = np.zeros((run_opts.reg_values.size, *self.valid_time_out[i, 0, j].shape)) 700 for k, array_elem in enumerate(self.valid_time_out[i, :, j]): 701 valid_time[k] = array_elem 702 for k in range(run_opts.num_tests): 703 np.savetxt( 704 get_filename(run_opts.run_folder_name, 'valid_time', res_itr, train_seed, noise_val, reg_train_time, 705 test_idx=test_idxs[k]), valid_time[:, k], delimiter=',') 706 707 train_mean_rms = np.zeros((run_opts.reg_values.size, *self.train_mean_rms_out[i, 0, j].shape)) 708 for k, array_elem in enumerate(self.train_mean_rms_out[i, :, j]): 709 train_mean_rms[k] = array_elem 710 np.savetxt(get_filename(run_opts.run_folder_name, 'train_mean_rms', res_itr, train_seed, noise_val, 711 reg_train_time), 712 train_mean_rms, delimiter=',') 713 714 train_max_rms = np.zeros((run_opts.reg_values.size, *self.train_max_rms_out[i, 0, j].shape)) 715 for k, array_elem in enumerate(self.train_max_rms_out[i, :, j]): 716 train_max_rms[k] = array_elem 717 np.savetxt( 718 get_filename(run_opts.run_folder_name, 'train_max_rms', res_itr, train_seed, noise_val, reg_train_time), 719 train_max_rms, delimiter=',') 720 721 if run_opts.pmap: 722 pmap_max_wass_dist = np.zeros((run_opts.reg_values.size, *self.pmap_max_wass_dist_out[i, 0, j].shape)) 723 for k, array_elem in enumerate(self.pmap_max_wass_dist_out[i, :, j]): 724 pmap_max_wass_dist[k] = array_elem 725 np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max_wass_dist', res_itr, train_seed, noise_val, 726 reg_train_time), 727 pmap_max_wass_dist, delimiter=',') 728 729 pmap_max = np.zeros((run_opts.reg_values.size, run_opts.num_tests), dtype=object) 730 for k, l in product(np.arange(run_opts.reg_values.size), np.arange(run_opts.num_tests)): 731 pmap_max[k, l] = self.pmap_max_out[i, k, j][l] 732 pmap_len = [len(arr) for arr in pmap_max[k, l]] 733 max_pmap_len = max(pmap_len) 734 pmap_max_save = np.zeros((len(pmap_max[k, l]), max_pmap_len)) 735 for m in range(len(pmap_max[k, l])): 736 pmap_max_save[m, :pmap_len[m]] = pmap_max[k, l][m] 737 np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max', res_itr, train_seed, noise_val, 738 reg_train_time, 739 test_idx=test_idxs[l], reg=run_opts.reg_values[k]), 740 pmap_max_save, delimiter=',') 741 if run_opts.save_time_rms: 742 rms = np.zeros((run_opts.reg_values.size, *self.rms_out[i, 0, j].shape)) 743 for k, array_elem in enumerate(self.rms_out[i, :, j]): 744 rms[k] = array_elem 745 for k in range(run_opts.num_tests): 746 np.savetxt( 747 get_filename(run_opts.run_folder_name, 'rms', res_itr, train_seed, noise_val, reg_train_time, 748 test_idx=test_idxs[k]), 749 rms[:, k], delimiter=',') 750 if run_opts.save_eigenvals: 751 eigenvals = self.grad_eigenvals_out[i, j] 752 np.savetxt(get_filename(run_opts.run_folder_name, 'gradreg_eigenvals', res_itr, train_seed, noise_val, 753 reg_train_time), 754 eigenvals, delimiter=',') 755 if run_opts.savepred: 756 pred = np.zeros((run_opts.reg_values.size, *self.pred_out[i, 0, j].shape)) 757 for k, array_elem in enumerate(self.pred_out[i, :, j]): 758 pred[k] = array_elem 759 for l, (k, reg) in product(range(run_opts.num_tests), enumerate(run_opts.reg_values)): 760 np.savetxt( 761 get_filename(run_opts.run_folder_name, 'pred', res_itr, train_seed, noise_val, reg_train_time, 762 test_idx=test_idxs[l], reg=reg), 763 pred[k, l], delimiter=',') 764 765 766class NumericalModel: 767 """Class for generating training or testing data using one of the test numerical models.""" 768 def __init__(self, tau=0.1, int_step=1, T=300, ttsplit=5000, u0=0, system='lorenz', 769 params=np.array([[], []], dtype=np.complex128)): 770 """Creates the NumericalModel object. 771 Args: 772 self: NumericalModel object. 773 tau: Time step between measurements of the system dynamics. 774 int_step: the number of numerical integration steps between each measurement. 775 T: Total number of measurements. 776 ttsplit: Number of measurements to be used in the training data set. 777 u0: Initial condition for integration. 778 system: Name of system to generate data from. Options: 'lorenz', 'KS','KS_d2175' 779 params: Internal parameters for model integration. Currently only used by KS options. 780 Returns: 781 Complete NumericalModel object with precomputed internal parameters.""" 782 783 if system == 'lorenz': 784 u_arr = np.ascontiguousarray(lorenzrungekutta( 785 u0[0], u0[1], u0[2], T, tau)) 786 self.input_size = 3 787 788 u_arr[0] = (u_arr[0] - 0) / 7.929788629895004 789 u_arr[1] = (u_arr[1] - 0) / 8.9932616136662 790 u_arr[2] = (u_arr[2] - 23.596294463016896) / 8.575917849311919 791 self.params = params 792 793 elif system == 'KS': 794 u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, params=params) 795 self.input_size = u_arr.shape[0] 796 u_arr = np.ascontiguousarray(u_arr) / (1.1876770355823614) 797 elif system == 'KS_d2175': 798 u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, d=21.75, params=params) 799 self.input_size = u_arr.shape[0] 800 u_arr = np.ascontiguousarray(u_arr) / (1.2146066380280796) 801 else: 802 raise ValueError 803 804 self.train_length = ttsplit 805 self.u_arr_train = u_arr[:, :ttsplit + 1] 806 807 # u[ttsplit], the (ttsplit + 1)st element, is the last in u_arr_train and the first in u_arr_test 808 self.u_arr_test = u_arr[:, ttsplit:] 809 810 811class ResPreds: 812 """Class for loading and storing prediction time series data generated from reservoir computer tests.""" 813 def __init__(self, run_opts): 814 """Loads the prediction data from .csv files. 815 Args: 816 run_opts: RunOpts object containing the run parameters.""" 817 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 818 self.noise_vals = run_opts.noise_values_array 819 self.reg_train_vals = run_opts.reg_train_times 820 self.reg_vals = run_opts.reg_values 821 print('Starding data read...') 822 # print(self.pred_folder) 823 self.preds = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size, 824 self.reg_train_vals.size, self.reg_vals.size), dtype=object) 825 total_vals = self.preds.size 826 with tqdm(total=total_vals) as pbar: 827 for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product( 828 enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)), 829 enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)), 830 enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)), 831 enumerate(self.noise_vals), 832 enumerate(self.reg_train_vals), enumerate(self.reg_vals)): 833 filename = get_filename(self.pred_folder, 'pred', res, train, noise, reg_train, reg = reg, test_idx = test) 834 if os.name == 'nt' and len(filename) >= 260: 835 filename = get_windows_path(filename) 836 self.preds[i, j, k, l, m, n] = np.loadtxt(filename, delimiter=',') 837 pbar.update(1) 838 839 840class ResPmap: 841 """Class for loading and storing Poincare maximum map data from reservoir predictions 842 generated from reservoir computer tests.""" 843 def __init__(self, run_opts): 844 """Loads the Poincare maxmimum map data from .csv files. 845 Args: 846 run_opts: RunOpts object containing the run parameters.""" 847 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 848 self.noise_vals = run_opts.noise_values_array 849 self.reg_train_vals = run_opts.reg_train_times 850 self.reg_vals = run_opts.reg_values 851 print('Starding data read...') 852 # print(self.pred_folder) 853 self.pmap_max = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size, 854 self.reg_train_vals.size, self.reg_vals.size), dtype=object) 855 total_vals = self.pmap_max.size 856 with tqdm(total=total_vals) as pbar: 857 for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product( 858 enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)), 859 enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)), 860 enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)), 861 enumerate(self.noise_vals), 862 enumerate(self.reg_train_vals), enumerate(self.reg_vals)): 863 filename = get_filename(self.pred_folder, 'pmap_max', res, train, noise, reg_train, reg = reg, test_idx = test) 864 if os.name == 'nt' and len(filename) >= 260: 865 filename = get_windows_path(filename) 866 pmap_in = np.loadtxt(filename, delimiter=',') 867 pmap_max = [pmap_in[o, pmap_in[o] != 0.] for o in range(pmap_in.shape[0])] 868 self.pmap_max[i, j, k, l, m, n] = pmap_max 869 pbar.update(1) 870 871 872class ResData: 873 """Class for loading and storing prediction analysis data generated from reservoir computer tests.""" 874 def __init__(self, run_opts): 875 """Loads the prediction analysis data from a compressed pandas data file. 876 Args: 877 run_opts: RunOpts object containing the run parameters.""" 878 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 879 print('Starding data read...') 880 tic = time.perf_counter() 881 # print(self.data_filename) 882 if os.name == 'nt' and len(self.data_filename) >= 260: 883 self.data_filename = get_windows_path(self.data_filename) 884 self.data = pd.read_csv(self.data_filename, index_col=0) 885 toc = time.perf_counter() 886 print('Data reading finished in %0.2f sec.' % (toc - tic)) 887 self.res = pd.unique(self.data['res']) 888 self.train = pd.unique(self.data['train']) 889 self.test = pd.unique(self.data['test']) 890 self.noise = pd.unique(self.data['noise']) 891 self.reg = pd.unique(self.data['reg']) 892 self.reg_train = pd.unique(self.data['reg_train']) 893 self.nan = pd.isna(self.data['variance']) 894 895 def shape(self): 896 """Returns the shape of the pandas data frame""" 897 return self.data.shape 898 899 def size(self): 900 """Returns the size of the pandas data frame""" 901 return self.data.size 902 903 def data_slice(self, res=np.array([]), train=np.array([]), test=np.array([]), 904 reg_train=np.array([]), noise=np.array([]), reg=np.array([]), median_flag=False, 905 reduce_axes=[], metric='', gross_frac_metric='valid_time', gross_err_bnd=1e2, 906 reduce_fun=pd.DataFrame.median): 907 """Slices and/or finds the best results using a metric computed by the reduce_fun. 908 Args: 909 self: ResPreds object 910 res: Indices for reservoir results to be returned/optimized over. If left as an empty array, uses all indices. 911 train: Indices for training data set results to be returned/optimized over. If left as an empty array, uses all indices. 912 test: Indices for testing data set results to be returned/optimized over. If left as an empty array, uses all indices. 913 reg_train: Number of training data points for regularization to be returned/optimized over. If left as an empty array, uses all values. 914 noise: Noise/Jacobian/LMNT regularization parameter value results to be returned/optimized over. If left as an empty array, uses all values. 915 reg: Tikhonov regularization paramter value results to be returned/optimized over. If left as an empty array, uses all values. 916 median_flag: Boolean indicating whether the data should be optimized. 917 reduce_axes: List containing the axes to be optimized over. Elements can be 'res', 'train', 'test', 'reg_train', 'noise', or 'reg'. 918 metric: Metric to be used to compute which parameters give the best result. Options are: 919 'gross_frac': Lowest fraction of gross error 920 'mean_rms': Lowest mean map error. 921 'max_rms: Lowest maximum map error. 922 'valid_time': Highest valid prediction time.' 923 gross_frac_metrix: If using 'gross_frac' as a metric, this is the secondary metric that will be used if multiple parameters give equally good prediction results. 924 gross_err_bnd: The cutoff in the mean map error above which predictions are considered to have gross error. 925 reduce_fun: Function for computing the overall performance for a given set of parameters over many tests. 926 Returns: 927 Pandas DataFrame containing the sliced and optimized prediction results. 928 Raises: 929 ValueError: If any of the inputs are not recognized/incompatible.""" 930 input_list = [res, train, test, reg_train, noise, reg] 931 name_list = np.array(['res', 'train', 'test', 'reg_train', 'noise', 'reg']) 932 data_names = [name for name in self.data.columns if name not in name_list] 933 base_list = [self.res, self.train, self.test, self.reg_train, self.noise, self.reg] 934 slice_vals = np.zeros(len(input_list), dtype=object) 935 if median_flag: 936 if not isinstance(reduce_axes, list): 937 print('reduce_axes must be a list.') 938 return ValueError 939 elif len(reduce_axes) == 0: 940 print('median_flag is True, but no axes to compute the median over are specified.') 941 raise ValueError 942 elif not all(axis in ['res', 'train', 'test', 'noise', 'reg','reg_train'] for axis in reduce_axes) or \ 943 len(reduce_axes) != len(set(reduce_axes)): 944 print('reduce_axis max only contain res, train, test, noise, or reg.') 945 raise ValueError 946 elif metric not in ['', 'mean_rms', 'max_rms', 'valid_time', 'pmap_max_wass_dist', 'gross_frac']: 947 print('Metric not recognized.') 948 raise ValueError 949 elif len(metric) == 0 and any(axis in ['noise', 'reg'] for axis in reduce_axes): 950 print('Cannot reduce axes with unspecified metric.') 951 raise ValueError 952 953 for i, item in enumerate(input_list): 954 if isinstance(item, np.ndarray): 955 if item.size == 0: 956 slice_vals[i] = base_list[i] 957 else: 958 slice_vals[i] = item 959 elif isinstance(item, list): 960 if len(item) == 0: 961 slice_vals[i] = self.item 962 else: 963 slice_vals[i] = np.array(item) 964 else: 965 slice_vals[i] = np.array([item]) 966 967 sliced_data = self.data 968 for name, slice_val in zip(name_list, slice_vals): 969 sliced_data = sliced_data[sliced_data[name].isin(slice_val)] 970 971 if not median_flag: 972 return sliced_data 973 elif median_flag: 974 median_slice_data = pd.DataFrame() 975 remaining_vars = [var not in reduce_axes for var in name_list[:3]] 976 remaining_vars.extend([True, True, True]) 977 if np.all(remaining_vars): 978 median_slice_data = sliced_data 979 nans = pd.isna(median_slice_data['mean_rms']) 980 near_nans = median_slice_data['mean_rms'] > gross_err_bnd 981 median_slice_data['gross_count'] = np.zeros(median_slice_data.shape[0]) 982 median_slice_data['gross_frac'] = np.zeros(median_slice_data.shape[0]) 983 median_slice_data.loc[nans | near_nans, 'gross_count'] = 1.0 984 median_slice_data.loc[nans | near_nans, 'gross_frac'] = 1.0 985 else: 986 total_vars = 1 987 for slice_val in slice_vals[remaining_vars]: 988 total_vars *= slice_val.size 989 for vars_set in product(*slice_vals[remaining_vars]): 990 row_dict = {} 991 reduced_sliced_data = sliced_data 992 for var, name in zip(vars_set, name_list[remaining_vars]): 993 row_dict[name] = var 994 reduced_sliced_data = reduced_sliced_data[reduced_sliced_data[name] == var] 995 if reduced_sliced_data.size == 0: 996 for name in data_names: 997 if 'variance' in name: 998 row_dict[name] = 0. 999 elif 'valid_time' not in name: 1000 row_dict[name] = 1e10 1001 row_dict['valid_time'] = 0. 1002 row_dict['gross_count'] = 0. 1003 row_dict['gross_frac'] = 1.0 1004 row_dict['data_present'] = False 1005 else: 1006 nans = pd.isna(reduced_sliced_data['mean_rms']) 1007 near_nans = reduced_sliced_data['mean_rms'] > gross_err_bnd 1008 nan_count = reduced_sliced_data[nans | near_nans].shape[0] 1009 nan_frac = nan_count / reduced_sliced_data.shape[0] 1010 valid_times = np.array([]) 1011 for name in data_names: 1012 if 'variance' in name: 1013 row_dict[name] = reduce_fun(reduced_sliced_data.loc[~nans, name]) 1014 elif 'valid_time' not in name: 1015 reduced_sliced_data.loc[nans, name] = 1e10 1016 row_dict[name] = reduce_fun(reduced_sliced_data[name]) 1017 else: 1018 reduced_sliced_data.loc[pd.isna(reduced_sliced_data[name]), name] = 0.0 1019 valid_times = np.append(valid_times, reduced_sliced_data[name].to_numpy()) 1020 row_dict['valid_time'] = reduce_fun(pd.DataFrame(valid_times)).to_numpy()[0] 1021 row_dict['gross_count'] = nan_count 1022 row_dict['gross_frac'] = nan_frac 1023 row_dict['data_present'] = True 1024 for key in row_dict.keys(): 1025 if not isinstance(row_dict[key], list) and not isinstance(row_dict[key], np.ndarray): 1026 row_dict[key] = [row_dict[key]] 1027 median_slice_data = pd.concat([median_slice_data, pd.DataFrame(row_dict)], ignore_index=True, 1028 verify_integrity=True) 1029 if len(metric) == 0: 1030 return median_slice_data 1031 else: 1032 best_median_slice_data = pd.DataFrame() 1033 remaining_vars = [var not in reduce_axes for var in name_list] 1034 total_vars = 1 1035 for slice_val in slice_vals[remaining_vars]: 1036 total_vars *= slice_val.size 1037 for vars_set in product(*slice_vals[remaining_vars]): 1038 row_dict = {} 1039 best_reduced_slice_data = median_slice_data 1040 for var, name in zip(vars_set, name_list[remaining_vars]): 1041 row_dict[name] = var 1042 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data[name] == var] 1043 if metric == 'valid_time': 1044 best_reduced_slice_data = best_reduced_slice_data[ 1045 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].max()] 1046 elif metric == 'gross_frac': 1047 best_reduced_slice_data = best_reduced_slice_data[ 1048 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()] 1049 if len(best_reduced_slice_data.shape) != 1: 1050 if gross_frac_metric == 'valid_time': 1051 best_reduced_slice_data = best_reduced_slice_data[ 1052 best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[ 1053 gross_frac_metric].max()] 1054 else: 1055 best_reduced_slice_data = best_reduced_slice_data[ 1056 best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[ 1057 gross_frac_metric].min()] 1058 else: 1059 best_reduced_slice_data = best_reduced_slice_data[ 1060 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()] 1061 if len(best_reduced_slice_data.shape) != 1: 1062 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['noise'] == 1063 best_reduced_slice_data['noise'].min()] 1064 if len(best_reduced_slice_data.shape) != 1: 1065 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['reg'] == 1066 best_reduced_slice_data['reg'].min()] 1067 for key in best_reduced_slice_data.keys(): 1068 if not isinstance(best_reduced_slice_data[key], list) and not isinstance( 1069 best_reduced_slice_data[key], np.ndarray): 1070 best_reduced_slice_data[key] = [best_reduced_slice_data[key]] 1071 best_median_slice_data = pd.concat([best_median_slice_data, best_reduced_slice_data], 1072 ignore_index=True, verify_integrity=True) 1073 return best_median_slice_data
19class RunOpts: 20 """This class contains the various parameters and options that will be used during the time series data generation, 21 reservoir generation, training, testing, and output.""" 22 def __init__(self, argv=None, 23 runflag=True, 24 train_time=20000, 25 test_time=None, 26 sync_time=2000, 27 discard_time=500, 28 res_size=500, 29 res_per_test=1, 30 noise_realizations=1, 31 num_tests=1, 32 num_trains=1, 33 traintype='normal', 34 noisetype='gaussian', 35 system='KS', 36 savepred=False, 37 save_time_rms=False, 38 squarenodes=True, 39 rho=0.6, 40 sigma=0.1, 41 theta=0.1, 42 leakage=1.0, 43 bias_type='new_random', 44 win_type='full_0centered', 45 debug_mode=False, 46 pmap=False, 47 machine='deepthought2', 48 ifray=False, 49 tau=None, 50 num_cpus=1, 51 metric='mean_rms', 52 return_all=True, 53 save_eigenvals=False, 54 max_valid_time=2000, 55 noise_values_array=np.logspace(-4, 3, num=3, base=10), 56 reg_values=np.append(0., np.logspace(-11, -9, 5)), 57 res_start=0, 58 train_start=0, 59 test_start=0, 60 reg_train_times=None, 61 root_folder=None, 62 prior='zero', 63 save_truth=False): 64 """Initializes the RunOpts class. 65 66 Raises: 67 ValueError: If an input instance variable has an unaccepted value. 68 TypeError: If an input instance variable has an incorrect type.""" 69 self.argv = argv 70 """Command line input for generating a RunOpts object. If left as None, then the object will be generated using 71 the other inputs or defaults. If multiple instances of the same variable are given, the class will default 72 to the command line input. See RunOpts.get_run_opts for more information. Default: None""" 73 self.system = system 74 """String denoting which dynamical system we are obtaining time series data from. Options: 'lorenz' for the 75 Lorenz 63 equations, 'KS' for the Kuramoto-Sivashinsky equation with 64 grid points and a periodicity length of 76 22, and KS_d2175 for the Kuramoto-Sivashinksy equation with 64 grid points and a periodicity length of 21.75. 77 Default: 'KS'""" 78 self.tau = tau 79 """Time between each data point in the time series data. If system = 'lorenz', this value must be evenly divided 80 by 0.01 (the integration time step). If left as None, then the class will set tau to the default value for the 81 particular dynamical system. Default: None""" 82 self.train_time = train_time 83 """Number of training data points. Default: 20000""" 84 self.test_time = test_time 85 """Number of testing data points. If left as None, then the class will default to 4000 if system = 'lorenz', or 86 16000 if system = 'KS' or 'KS_d2175'. Default: None""" 87 self.sync_time = sync_time 88 """Number of data points used to synchronize the reservoir to each test data set. Default: 2000""" 89 self.discard_time = discard_time 90 """Number of data points used to synchronize the reservoir to each training data set. Default: 500""" 91 self.res_size = res_size 92 """Number of nodes in the reservoir. Default: 500""" 93 self.rho = rho 94 """Reservoir spectral radius. Default: 0.6""" 95 self.sigma = sigma 96 """Reservoir input scaling. Default: 0.1""" 97 self.theta = theta 98 """Reservoir input bias scaling. Default: 0.1""" 99 self.leakage = leakage 100 """Reservoir leaking rate. Default: 1.0""" 101 self.squarenodes = squarenodes 102 """Boolean denoting whether or not the squared node states are including in the reservoir feature vector. 103 Default: True""" 104 self.bias_type = bias_type 105 """Type of reservoir input bias to be used. See the Reservoir class for available options. 106 Default: new_random""" 107 self.win_type = win_type 108 """Type of input coupling matrix to be used. See the Reservoir class for available options. 109 Default: full_0centered""" 110 self.traintype = traintype 111 """Type of training to be used to determine the reservoir output coupling matrix. There are a number of options 112 available, but those used in the paper are: 113 114 'normal' - Standard reservoir training, potentially with input noise added. 115 116 'gradientk%d' % (Number of noise steps) - Reservoir training with no noise and LMNT regularization for a number of 117 noise steps > 1, or Jacobian regularization for a number of noise steps = 1. 118 119 'regzerok%d' % (Number of noise steps) - Reservoir training with no noise and LMNT/Jacobian regularization computed 120 using a zero-input and zero reservoir state. 121 122 Default: 'normal' 123 """ 124 self.noisetype = noisetype 125 """Type of noise to be added to the reservoir input during training. Options are 'none' and 'gaussian'. 126 Default: 'none'""" 127 self.noise_values_array = noise_values_array 128 """Numpy array containing the variance of the added input noise (if noisetype = 'gaussian') or the LMNT/Jacobian 129 regularization parameter value (if traintype = 'gradientk%d' or 'regzerok%d'). Each value contained in the array 130 will be tested separately using each of the reservoirs, training, and testing data sets. 131 Default: np.logspace(-4, 3, num=3, base=10)""" 132 self.reg_train_times = reg_train_times 133 """Numpy array containing the number of training data points to be used to train the LMNT or Jacobian 134 regularization. If left as None, then the class will default to an array containing only 135 the total number of training data points for standard LMNT/Jacobian or the number of noise steps 136 if using zero-input LMNT. Default: None""" 137 self.noise_realizations = noise_realizations 138 """Number of input noise realizations used to train the reservoir (if training with noise). If not training with 139 noise, set to 1. Default: 1""" 140 self.reg_values = reg_values 141 """Numpy array containing the Tikhonov regularization parameter values. Each value contained in the array 142 will be tested separately using each of the reservoirs, training, and testing data sets. 143 Default: np.append(0., np.logspace(-11, -9, 5))""" 144 self.prior = prior 145 """Prior to be used when computing the output coupling matrix using Tikhonov regularization. Options are: 146 147 'zero' - Standard Tikhonov regularization with a zero prior. 148 149 'input_pass' - Tikhonov regularization with a persistence prior (i.e., set the input pass-through weights to 1). 150 151 Default: 'zero'""" 152 self.max_valid_time = max_valid_time 153 """Maximum valid time for each valid time test during the testing period. This should be set so that 154 test_time / max_valid_time is a whole number greater than 0. Default: 2000""" 155 self.res_per_test = res_per_test 156 """Number of random reservoir realizations to test. Default: 1""" 157 self.num_trains = num_trains 158 """Number of independently generated training data sets to test with. Default: 1""" 159 self.num_tests = num_tests 160 """Number of independently generated testing data sets to test with. Default: 1""" 161 self.res_start = res_start 162 """Starting iterate for generating the random seeds that are used to generate the reservoir. Default: 0""" 163 self.train_start = train_start 164 """Starting iterate for generating the random seeds that are used to generate the training data sets. 165 Default: 0""" 166 self.test_start = test_start 167 """Starting iterate for generating the random seeds that are used to generate the testing data sets. 168 Default: 0""" 169 self.root_folder = root_folder 170 """Location where output data will be stored in the Data folder. If None, then defaults to the current working 171 directory. Default: None""" 172 self.return_all = return_all 173 """Boolean for determining of all results should be returned, or only the results with the obtained using the 174 "best" Tikhonov regularization parameter value based on the selected metric. Default: True""" 175 self.metric = metric 176 """Metric for determining the "best" results. Not used if return_all = True. Options include 'mean_rms', 'max_rms', 177 and 'stable_frac'. Caution: Some options may be deprecated. Default: 'mss-var'""" 178 self.savepred = savepred 179 """Boolean for determining if reservoir prediction time series should be saved. Default: False""" 180 self.save_time_rms = save_time_rms 181 """Boolean for determining if reservoir prediction RMS error should be saved. Default: False""" 182 self.pmap = pmap 183 """Boolean for determining if reservoir prediction Poincare maximum map should be saved. Default: False""" 184 self.save_eigenvals = save_eigenvals 185 """Boolean for determining if the eigenvalues of the LMNT/Jacobian regularization matrices should be saved. 186 Default: False""" 187 self.save_truth = save_truth 188 """Boolean for determining if the true testing data should be saved. Default: False""" 189 self.ifray = ifray 190 """Boolean for determining if ray should be used to compute results for multiple reservoirs and training 191 data sets. Default: False""" 192 self.num_cpus = num_cpus 193 """If using ray for paralellization, this sets the number of cpus to be used. Default: 1""" 194 self.machine = machine 195 """Machine which results are computed on. Leave as personal unless you are connecting to a ray cluster 196 elsewhere. Default: 'personal'""" 197 self.runflag = runflag 198 """True indicates that we are about to compute results, and the appropriate directories should be created. 199 Otherwise, we do not create additional directories. Default: True""" 200 self.debug_mode = debug_mode 201 """Boolean for determining if errors during reservoir training which could arise from non-convergence of the 202 eigenvalue solver should be suppressed. If left as False, will suppress errors im much of the core code, 203 so this should be set to True if making changes. Default: False""" 204 if not isinstance(argv, type(None)): 205 self.get_run_opts(argv) 206 if isinstance(self.tau, type(None)): 207 if self.system == 'lorenz': 208 self.tau = 0.1 209 elif self.system in ['KS', 'KS_d2175']: 210 self.tau = 0.25 211 if isinstance(self.test_time, type(None)): 212 if self.system == 'lorenz': 213 self.test_time = 4000 214 elif 'KS' in self.system: 215 self.test_time = 16000 216 if not isinstance(self.reg_train_times, np.ndarray): 217 if isinstance(self.reg_train_times, type(None)): 218 self.reg_train_times = np.array([self.train_time]) 219 elif isinstance(self.reg_train_times, int): 220 self.reg_train_times = np.array([self.reg_train_times]) 221 else: 222 raise TypeError() 223 if isinstance(self.root_folder, type(None)): 224 self.root_folder = os.getcwd() 225 if isinstance(self.reg_train_times, np.ndarray) or isinstance(self.reg_train_times, list): 226 if (self.reg_train_times[0] != self.train_time or len(self.reg_train_times) != 1) and ( 227 self.traintype in ['normal', 'normalres1', 'normalres2', 'rmean', 'rmeanres1', 228 'rmeanres2', 'rplusq', 'rplusqres1', 229 'rplusqres2'] or 'confined' in self.traintype): 230 print(('Traintypes "normal", "rmean", and "rplusq" are not ' 231 'compatible with fractional regularization training.')) 232 raise ValueError 233 if self.prior not in ['zero', 'input_pass']: 234 print('Prior type not recognized.') 235 raise ValueError 236 self.run_file_name = '' 237 self.run_folder_name = '' 238 self.get_file_name() 239 240 def get_file_name(self): 241 """Creates the folder and final data file name for the tests about to be run. 242 Args: 243 self: RunOpts object. 244 Returns: 245 RunOpts object with initialized folder and file name variables. Also creates the aforementioned folder 246 if not already created. 247 """ 248 if self.prior == 'zero': 249 prior_str = '' 250 else: 251 prior_str = '_prior_%s' % self.prior 252 if self.savepred: 253 predflag = '_wpred' 254 else: 255 predflag = '' 256 if self.save_time_rms: 257 timeflag = '_savetime' 258 else: 259 timeflag = '' 260 if self.squarenodes: 261 squarenodes_flag = '_squarenodes' 262 else: 263 squarenodes_flag = '' 264 if self.save_eigenvals: 265 eigenval_flag = '_wmoregradeigs' 266 else: 267 eigenval_flag = '' 268 if self.pmap: 269 pmap_flag = '_wpmap0' 270 else: 271 pmap_flag = '' 272 data_folder_base = os.path.join(self.root_folder, 'Data') 273 if not os.path.isdir(data_folder_base): 274 os.mkdir(data_folder_base) 275 276 if not self.return_all: 277 data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % ( 278 self.system, self.noisetype, self.traintype)) 279 run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f' 280 '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s_metric_%s') \ 281 % (self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho, 282 self.sigma, self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, 283 self.train_time, self.noise_realizations, self.noisetype, self.traintype, prior_str, 284 self.metric) 285 else: 286 data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % ( 287 self.system, self.noisetype, self.traintype)) 288 run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f' 289 '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s') % ( 290 self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho, 291 self.sigma, 292 self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, self.train_time, 293 self.noise_realizations, self.noisetype, self.traintype, prior_str) 294 295 if self.runflag: 296 if not os.path.isdir(data_folder): 297 os.mkdir(data_folder) 298 if not os.path.isdir(os.path.join(data_folder, run_name + '_folder')): 299 os.mkdir(os.path.join(data_folder, run_name + '_folder')) 300 self.run_file_name = os.path.join(data_folder, run_name + '.bz2') 301 self.run_folder_name = os.path.join(data_folder, run_name + '_folder') 302 303 def get_run_opts(self): 304 """Processes the command line input into instance variables. 305 Args: 306 self: RunOpts object. 307 Returns: 308 RunOpts object with instance variables set from command line input. 309 Raises: 310 GetoptError: Raises an error of command line arguments no recognized.""" 311 try: 312 opts, args = getopt.getopt(self.argv, "T:N:r:", 313 ['testtime=', 'noisetype=', 'traintype=', 'system=', 'res=', 314 'tests=', 'trains=', 'savepred=', 'tau=', 'rho=', 315 'sigma=', 'theta=','leakage=', 'bias_type=', 'debug=', 'win_type=', 316 'machine=', 'num_cpus=', 'pmap=', 'parallel=', 'metric=', 'returnall=', 317 'savetime=', 'saveeigenvals=', 'noisevals=', 'regvals=', 'maxvt=', 318 'resstart=', 'trainstart=', 'teststart=', 319 'squarenodes=', 'regtraintimes=', 'discardlen=', 320 'prior=', 'synctime=', 'datarootdir=', 'savetruth=']) 321 except getopt.GetoptError: 322 print('Error: Some options not recognized') 323 sys.exit(2) 324 for opt, arg in opts: 325 if opt == '-T': 326 self.train_time = int(arg) 327 print('Training iterations: %d' % self.train_time) 328 elif opt == '-N': 329 self.res_size = int(arg) 330 print('Reservoir nodes: %d' % self.res_size) 331 elif opt == '-r': 332 self.noise_realizations = int(arg) 333 print('Noise Realizations: %d' % self.noise_realizations) 334 elif opt == '--savetruth': 335 if arg == 'True': 336 self.save_truth = True 337 elif arg == 'False': 338 self.save_truth = False 339 else: 340 raise ValueError 341 elif opt == '--datarootdir': 342 self.root_folder = str(arg) 343 print('Root directory for data: %s' % self.root_folder) 344 elif opt == '--synctime': 345 self.sync_time = int(arg) 346 print('Sync time: %d' % self.sync_time) 347 elif opt == '--testtime': 348 self.test_time = int(arg) 349 if self.test_time == 0: 350 print('Testing duration: default') 351 else: 352 print('Testing duration: %d' % self.test_time) 353 elif opt == '--resstart': 354 self.res_start = int(arg) 355 print('Reservoir ensemble start: %d' % self.res_start) 356 elif opt == '--trainstart': 357 self.train_start = int(arg) 358 print('Train ensemble start: %d' % self.train_start) 359 elif opt == '--teststart': 360 self.test_start = int(arg) 361 print('Test ensemble start: %d' % self.test_start) 362 elif opt == '--saveeigenvals': 363 if arg == 'True': 364 self.save_eigenvals = True 365 elif arg == 'False': 366 self.save_eigenvals = False 367 else: 368 raise ValueError 369 print('Save grad reg eigenvalues: %s' % arg) 370 elif opt == '--prior': 371 self.prior = str(arg) 372 print('Prior type: %s' % self.prior) 373 elif opt == '--discardlen': 374 self.discard_time = int(arg) 375 print('Discard iterations: %d' % self.discard_time) 376 elif opt == '--squarenodes': 377 if arg == 'True': 378 self.squarenodes = True 379 elif arg == 'False': 380 self.squarenodes = False 381 else: 382 raise ValueError 383 print('Square reservoir nodes: %s' % arg) 384 elif opt == '--maxvt': 385 self.max_valid_time = int(arg) 386 print('Maximum valid time: %d' % self.max_valid_time) 387 elif opt == '--noisevals': 388 self.noise_values_array = np.array([float(noise) for noise in arg.split(',')]) 389 noise_str = '[ ' 390 for noise in self.noise_values_array: 391 noise_str += '%0.3e, ' % noise 392 noise_str = noise_str[:-2] + ' ]' 393 print('Noise values: %s' % noise_str) 394 elif opt == '--regvals': 395 self.reg_values = np.array([float(reg) for reg in arg.split(',')]) 396 reg_str = '[ ' 397 for reg in self.reg_values: 398 reg_str += '%0.3e, ' % reg 399 reg_str = reg_str[:-2] + ' ]' 400 print('Regularization values: %s' % reg_str) 401 elif opt == '--regtraintimes': 402 if arg != 'None': 403 self.reg_train_times = np.array([int(reg_train) for reg_train in arg.split(',')]) 404 reg_train_str = '[ ' 405 for reg_train in self.reg_train_times: 406 reg_train_str += '%0.3e, ' % reg_train 407 reg_train_str = reg_train_str[:-2] + ' ]' 408 print('Regularization training times: %s' % reg_train_str) 409 elif opt == '--savetime': 410 if str(arg) == 'True': 411 self.save_time_rms = True 412 elif str(arg) == 'False': 413 self.save_time_rms = False 414 else: 415 raise ValueError 416 elif opt == '--metric': 417 self.metric = str(arg) 418 if self.metric not in ['pmap_max_wass_dist', 'mean_rms', 'max_rms', 'mss_var']: 419 raise ValueError 420 print('Stability metric: %s' % self.metric) 421 elif opt == '--returnall': 422 if arg == 'True': 423 self.return_all = True 424 elif arg == 'False': 425 self.return_all = False 426 else: 427 raise ValueError 428 elif opt == '--parallel': 429 parallel_in = str(arg) 430 if parallel_in == 'True': 431 self.ifray = True 432 elif parallel_in == 'False': 433 self.ifray = False 434 else: 435 raise ValueError 436 elif opt == '--pmap': 437 pmap_in = str(arg) 438 if pmap_in == 'True': 439 self.pmap = True 440 elif pmap_in == 'False': 441 self.pmap = False 442 else: 443 raise ValueError 444 elif opt == '--machine': 445 self.machine = str(arg) 446 if self.machine not in ['deepthought2', 'personal']: 447 raise ValueError 448 print('Machine: %s' % self.machine) 449 elif opt == '--num_cpus': 450 self.num_cpus = int(arg) 451 print('Number of CPUS: %d' % self.num_cpus) 452 elif opt == '--rho': 453 self.rho = float(arg) 454 print('Rho: %f' % self.rho) 455 elif opt == '--sigma': 456 self.sigma = float(arg) 457 print('Sigma: %f' % self.sigma) 458 elif opt == '--theta': 459 self.theta = float(arg) 460 print('Theta: %f' % self.theta) 461 elif opt == '--leakage': 462 self.leakage = float(arg) 463 print('Leakage: %f' % self.leakage) 464 elif opt == '--tau': 465 self.tau = float(arg) 466 print('Reservoir timestep: %f' % self.tau) 467 elif opt == '--win_type': 468 self.win_type = str(arg) 469 print('Win Type: %s' % self.win_type) 470 elif opt == '--bias_type': 471 self.bias_type = str(arg) 472 print('Bias Type: %s' % self.bias_type) 473 elif opt == '--res': 474 self.res_per_test = int(arg) 475 print('Number of reservoirs: %d' % self.res_per_test) 476 elif opt == '--tests': 477 self.num_tests = int(arg) 478 print('Number of tests: %d' % self.num_tests) 479 elif opt == '--trains': 480 self.num_trains = int(arg) 481 print('Number of training data sequences: %d' % self.num_trains) 482 elif opt == '--savepred': 483 if arg == 'True': 484 self.savepred = True 485 elif arg == 'False': 486 self.savepred = False 487 print('Saving predictions: %s' % arg) 488 elif opt == '--noisetype': 489 self.noisetype = str(arg) 490 print('Noise type: %s' % self.noisetype) 491 elif opt == '--traintype': 492 self.traintype = str(arg) 493 print('Training type: %s' % self.traintype) 494 elif opt == '--system': 495 self.system = str(arg) 496 print('System: %s' % self.system) 497 elif opt == '--debug': 498 if arg == 'True': 499 self.debug_mode = True 500 elif arg == 'False': 501 self.debug_mode = False 502 print('Debug Mode: %s' % arg)
This class contains the various parameters and options that will be used during the time series data generation, reservoir generation, training, testing, and output.
22 def __init__(self, argv=None, 23 runflag=True, 24 train_time=20000, 25 test_time=None, 26 sync_time=2000, 27 discard_time=500, 28 res_size=500, 29 res_per_test=1, 30 noise_realizations=1, 31 num_tests=1, 32 num_trains=1, 33 traintype='normal', 34 noisetype='gaussian', 35 system='KS', 36 savepred=False, 37 save_time_rms=False, 38 squarenodes=True, 39 rho=0.6, 40 sigma=0.1, 41 theta=0.1, 42 leakage=1.0, 43 bias_type='new_random', 44 win_type='full_0centered', 45 debug_mode=False, 46 pmap=False, 47 machine='deepthought2', 48 ifray=False, 49 tau=None, 50 num_cpus=1, 51 metric='mean_rms', 52 return_all=True, 53 save_eigenvals=False, 54 max_valid_time=2000, 55 noise_values_array=np.logspace(-4, 3, num=3, base=10), 56 reg_values=np.append(0., np.logspace(-11, -9, 5)), 57 res_start=0, 58 train_start=0, 59 test_start=0, 60 reg_train_times=None, 61 root_folder=None, 62 prior='zero', 63 save_truth=False): 64 """Initializes the RunOpts class. 65 66 Raises: 67 ValueError: If an input instance variable has an unaccepted value. 68 TypeError: If an input instance variable has an incorrect type.""" 69 self.argv = argv 70 """Command line input for generating a RunOpts object. If left as None, then the object will be generated using 71 the other inputs or defaults. If multiple instances of the same variable are given, the class will default 72 to the command line input. See RunOpts.get_run_opts for more information. Default: None""" 73 self.system = system 74 """String denoting which dynamical system we are obtaining time series data from. Options: 'lorenz' for the 75 Lorenz 63 equations, 'KS' for the Kuramoto-Sivashinsky equation with 64 grid points and a periodicity length of 76 22, and KS_d2175 for the Kuramoto-Sivashinksy equation with 64 grid points and a periodicity length of 21.75. 77 Default: 'KS'""" 78 self.tau = tau 79 """Time between each data point in the time series data. If system = 'lorenz', this value must be evenly divided 80 by 0.01 (the integration time step). If left as None, then the class will set tau to the default value for the 81 particular dynamical system. Default: None""" 82 self.train_time = train_time 83 """Number of training data points. Default: 20000""" 84 self.test_time = test_time 85 """Number of testing data points. If left as None, then the class will default to 4000 if system = 'lorenz', or 86 16000 if system = 'KS' or 'KS_d2175'. Default: None""" 87 self.sync_time = sync_time 88 """Number of data points used to synchronize the reservoir to each test data set. Default: 2000""" 89 self.discard_time = discard_time 90 """Number of data points used to synchronize the reservoir to each training data set. Default: 500""" 91 self.res_size = res_size 92 """Number of nodes in the reservoir. Default: 500""" 93 self.rho = rho 94 """Reservoir spectral radius. Default: 0.6""" 95 self.sigma = sigma 96 """Reservoir input scaling. Default: 0.1""" 97 self.theta = theta 98 """Reservoir input bias scaling. Default: 0.1""" 99 self.leakage = leakage 100 """Reservoir leaking rate. Default: 1.0""" 101 self.squarenodes = squarenodes 102 """Boolean denoting whether or not the squared node states are including in the reservoir feature vector. 103 Default: True""" 104 self.bias_type = bias_type 105 """Type of reservoir input bias to be used. See the Reservoir class for available options. 106 Default: new_random""" 107 self.win_type = win_type 108 """Type of input coupling matrix to be used. See the Reservoir class for available options. 109 Default: full_0centered""" 110 self.traintype = traintype 111 """Type of training to be used to determine the reservoir output coupling matrix. There are a number of options 112 available, but those used in the paper are: 113 114 'normal' - Standard reservoir training, potentially with input noise added. 115 116 'gradientk%d' % (Number of noise steps) - Reservoir training with no noise and LMNT regularization for a number of 117 noise steps > 1, or Jacobian regularization for a number of noise steps = 1. 118 119 'regzerok%d' % (Number of noise steps) - Reservoir training with no noise and LMNT/Jacobian regularization computed 120 using a zero-input and zero reservoir state. 121 122 Default: 'normal' 123 """ 124 self.noisetype = noisetype 125 """Type of noise to be added to the reservoir input during training. Options are 'none' and 'gaussian'. 126 Default: 'none'""" 127 self.noise_values_array = noise_values_array 128 """Numpy array containing the variance of the added input noise (if noisetype = 'gaussian') or the LMNT/Jacobian 129 regularization parameter value (if traintype = 'gradientk%d' or 'regzerok%d'). Each value contained in the array 130 will be tested separately using each of the reservoirs, training, and testing data sets. 131 Default: np.logspace(-4, 3, num=3, base=10)""" 132 self.reg_train_times = reg_train_times 133 """Numpy array containing the number of training data points to be used to train the LMNT or Jacobian 134 regularization. If left as None, then the class will default to an array containing only 135 the total number of training data points for standard LMNT/Jacobian or the number of noise steps 136 if using zero-input LMNT. Default: None""" 137 self.noise_realizations = noise_realizations 138 """Number of input noise realizations used to train the reservoir (if training with noise). If not training with 139 noise, set to 1. Default: 1""" 140 self.reg_values = reg_values 141 """Numpy array containing the Tikhonov regularization parameter values. Each value contained in the array 142 will be tested separately using each of the reservoirs, training, and testing data sets. 143 Default: np.append(0., np.logspace(-11, -9, 5))""" 144 self.prior = prior 145 """Prior to be used when computing the output coupling matrix using Tikhonov regularization. Options are: 146 147 'zero' - Standard Tikhonov regularization with a zero prior. 148 149 'input_pass' - Tikhonov regularization with a persistence prior (i.e., set the input pass-through weights to 1). 150 151 Default: 'zero'""" 152 self.max_valid_time = max_valid_time 153 """Maximum valid time for each valid time test during the testing period. This should be set so that 154 test_time / max_valid_time is a whole number greater than 0. Default: 2000""" 155 self.res_per_test = res_per_test 156 """Number of random reservoir realizations to test. Default: 1""" 157 self.num_trains = num_trains 158 """Number of independently generated training data sets to test with. Default: 1""" 159 self.num_tests = num_tests 160 """Number of independently generated testing data sets to test with. Default: 1""" 161 self.res_start = res_start 162 """Starting iterate for generating the random seeds that are used to generate the reservoir. Default: 0""" 163 self.train_start = train_start 164 """Starting iterate for generating the random seeds that are used to generate the training data sets. 165 Default: 0""" 166 self.test_start = test_start 167 """Starting iterate for generating the random seeds that are used to generate the testing data sets. 168 Default: 0""" 169 self.root_folder = root_folder 170 """Location where output data will be stored in the Data folder. If None, then defaults to the current working 171 directory. Default: None""" 172 self.return_all = return_all 173 """Boolean for determining of all results should be returned, or only the results with the obtained using the 174 "best" Tikhonov regularization parameter value based on the selected metric. Default: True""" 175 self.metric = metric 176 """Metric for determining the "best" results. Not used if return_all = True. Options include 'mean_rms', 'max_rms', 177 and 'stable_frac'. Caution: Some options may be deprecated. Default: 'mss-var'""" 178 self.savepred = savepred 179 """Boolean for determining if reservoir prediction time series should be saved. Default: False""" 180 self.save_time_rms = save_time_rms 181 """Boolean for determining if reservoir prediction RMS error should be saved. Default: False""" 182 self.pmap = pmap 183 """Boolean for determining if reservoir prediction Poincare maximum map should be saved. Default: False""" 184 self.save_eigenvals = save_eigenvals 185 """Boolean for determining if the eigenvalues of the LMNT/Jacobian regularization matrices should be saved. 186 Default: False""" 187 self.save_truth = save_truth 188 """Boolean for determining if the true testing data should be saved. Default: False""" 189 self.ifray = ifray 190 """Boolean for determining if ray should be used to compute results for multiple reservoirs and training 191 data sets. Default: False""" 192 self.num_cpus = num_cpus 193 """If using ray for paralellization, this sets the number of cpus to be used. Default: 1""" 194 self.machine = machine 195 """Machine which results are computed on. Leave as personal unless you are connecting to a ray cluster 196 elsewhere. Default: 'personal'""" 197 self.runflag = runflag 198 """True indicates that we are about to compute results, and the appropriate directories should be created. 199 Otherwise, we do not create additional directories. Default: True""" 200 self.debug_mode = debug_mode 201 """Boolean for determining if errors during reservoir training which could arise from non-convergence of the 202 eigenvalue solver should be suppressed. If left as False, will suppress errors im much of the core code, 203 so this should be set to True if making changes. Default: False""" 204 if not isinstance(argv, type(None)): 205 self.get_run_opts(argv) 206 if isinstance(self.tau, type(None)): 207 if self.system == 'lorenz': 208 self.tau = 0.1 209 elif self.system in ['KS', 'KS_d2175']: 210 self.tau = 0.25 211 if isinstance(self.test_time, type(None)): 212 if self.system == 'lorenz': 213 self.test_time = 4000 214 elif 'KS' in self.system: 215 self.test_time = 16000 216 if not isinstance(self.reg_train_times, np.ndarray): 217 if isinstance(self.reg_train_times, type(None)): 218 self.reg_train_times = np.array([self.train_time]) 219 elif isinstance(self.reg_train_times, int): 220 self.reg_train_times = np.array([self.reg_train_times]) 221 else: 222 raise TypeError() 223 if isinstance(self.root_folder, type(None)): 224 self.root_folder = os.getcwd() 225 if isinstance(self.reg_train_times, np.ndarray) or isinstance(self.reg_train_times, list): 226 if (self.reg_train_times[0] != self.train_time or len(self.reg_train_times) != 1) and ( 227 self.traintype in ['normal', 'normalres1', 'normalres2', 'rmean', 'rmeanres1', 228 'rmeanres2', 'rplusq', 'rplusqres1', 229 'rplusqres2'] or 'confined' in self.traintype): 230 print(('Traintypes "normal", "rmean", and "rplusq" are not ' 231 'compatible with fractional regularization training.')) 232 raise ValueError 233 if self.prior not in ['zero', 'input_pass']: 234 print('Prior type not recognized.') 235 raise ValueError 236 self.run_file_name = '' 237 self.run_folder_name = '' 238 self.get_file_name()
Initializes the RunOpts class.
Raises
- ValueError: If an input instance variable has an unaccepted value.
- TypeError: If an input instance variable has an incorrect type.
Command line input for generating a RunOpts object. If left as None, then the object will be generated using the other inputs or defaults. If multiple instances of the same variable are given, the class will default to the command line input. See RunOpts.get_run_opts for more information. Default: None
String denoting which dynamical system we are obtaining time series data from. Options: 'lorenz' for the Lorenz 63 equations, 'KS' for the Kuramoto-Sivashinsky equation with 64 grid points and a periodicity length of 22, and KS_d2175 for the Kuramoto-Sivashinksy equation with 64 grid points and a periodicity length of 21.75. Default: 'KS'
Time between each data point in the time series data. If system = 'lorenz', this value must be evenly divided by 0.01 (the integration time step). If left as None, then the class will set tau to the default value for the particular dynamical system. Default: None
Number of testing data points. If left as None, then the class will default to 4000 if system = 'lorenz', or 16000 if system = 'KS' or 'KS_d2175'. Default: None
Number of data points used to synchronize the reservoir to each test data set. Default: 2000
Number of data points used to synchronize the reservoir to each training data set. Default: 500
Boolean denoting whether or not the squared node states are including in the reservoir feature vector. Default: True
Type of reservoir input bias to be used. See the Reservoir class for available options. Default: new_random
Type of input coupling matrix to be used. See the Reservoir class for available options. Default: full_0centered
Type of training to be used to determine the reservoir output coupling matrix. There are a number of options available, but those used in the paper are:
'normal' - Standard reservoir training, potentially with input noise added.
'gradientk%d' % (Number of noise steps) - Reservoir training with no noise and LMNT regularization for a number of noise steps > 1, or Jacobian regularization for a number of noise steps = 1.
'regzerok%d' % (Number of noise steps) - Reservoir training with no noise and LMNT/Jacobian regularization computed using a zero-input and zero reservoir state.
Default: 'normal'
Type of noise to be added to the reservoir input during training. Options are 'none' and 'gaussian'. Default: 'none'
Numpy array containing the variance of the added input noise (if noisetype = 'gaussian') or the LMNT/Jacobian regularization parameter value (if traintype = 'gradientk%d' or 'regzerok%d'). Each value contained in the array will be tested separately using each of the reservoirs, training, and testing data sets. Default: np.logspace(-4, 3, num=3, base=10)
Numpy array containing the number of training data points to be used to train the LMNT or Jacobian regularization. If left as None, then the class will default to an array containing only the total number of training data points for standard LMNT/Jacobian or the number of noise steps if using zero-input LMNT. Default: None
Number of input noise realizations used to train the reservoir (if training with noise). If not training with noise, set to 1. Default: 1
Numpy array containing the Tikhonov regularization parameter values. Each value contained in the array will be tested separately using each of the reservoirs, training, and testing data sets. Default: np.append(0., np.logspace(-11, -9, 5))
Prior to be used when computing the output coupling matrix using Tikhonov regularization. Options are:
'zero' - Standard Tikhonov regularization with a zero prior.
'input_pass' - Tikhonov regularization with a persistence prior (i.e., set the input pass-through weights to 1).
Default: 'zero'
Maximum valid time for each valid time test during the testing period. This should be set so that test_time / max_valid_time is a whole number greater than 0. Default: 2000
Starting iterate for generating the random seeds that are used to generate the reservoir. Default: 0
Starting iterate for generating the random seeds that are used to generate the training data sets. Default: 0
Starting iterate for generating the random seeds that are used to generate the testing data sets. Default: 0
Location where output data will be stored in the Data folder. If None, then defaults to the current working directory. Default: None
Boolean for determining of all results should be returned, or only the results with the obtained using the "best" Tikhonov regularization parameter value based on the selected metric. Default: True
Metric for determining the "best" results. Not used if return_all = True. Options include 'mean_rms', 'max_rms', and 'stable_frac'. Caution: Some options may be deprecated. Default: 'mss-var'
Boolean for determining if reservoir prediction time series should be saved. Default: False
Boolean for determining if reservoir prediction RMS error should be saved. Default: False
Boolean for determining if reservoir prediction Poincare maximum map should be saved. Default: False
Boolean for determining if the eigenvalues of the LMNT/Jacobian regularization matrices should be saved. Default: False
Boolean for determining if ray should be used to compute results for multiple reservoirs and training data sets. Default: False
Machine which results are computed on. Leave as personal unless you are connecting to a ray cluster elsewhere. Default: 'personal'
True indicates that we are about to compute results, and the appropriate directories should be created. Otherwise, we do not create additional directories. Default: True
Boolean for determining if errors during reservoir training which could arise from non-convergence of the eigenvalue solver should be suppressed. If left as False, will suppress errors im much of the core code, so this should be set to True if making changes. Default: False
240 def get_file_name(self): 241 """Creates the folder and final data file name for the tests about to be run. 242 Args: 243 self: RunOpts object. 244 Returns: 245 RunOpts object with initialized folder and file name variables. Also creates the aforementioned folder 246 if not already created. 247 """ 248 if self.prior == 'zero': 249 prior_str = '' 250 else: 251 prior_str = '_prior_%s' % self.prior 252 if self.savepred: 253 predflag = '_wpred' 254 else: 255 predflag = '' 256 if self.save_time_rms: 257 timeflag = '_savetime' 258 else: 259 timeflag = '' 260 if self.squarenodes: 261 squarenodes_flag = '_squarenodes' 262 else: 263 squarenodes_flag = '' 264 if self.save_eigenvals: 265 eigenval_flag = '_wmoregradeigs' 266 else: 267 eigenval_flag = '' 268 if self.pmap: 269 pmap_flag = '_wpmap0' 270 else: 271 pmap_flag = '' 272 data_folder_base = os.path.join(self.root_folder, 'Data') 273 if not os.path.isdir(data_folder_base): 274 os.mkdir(data_folder_base) 275 276 if not self.return_all: 277 data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % ( 278 self.system, self.noisetype, self.traintype)) 279 run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f' 280 '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s_metric_%s') \ 281 % (self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho, 282 self.sigma, self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, 283 self.train_time, self.noise_realizations, self.noisetype, self.traintype, prior_str, 284 self.metric) 285 else: 286 data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % ( 287 self.system, self.noisetype, self.traintype)) 288 run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f' 289 '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s') % ( 290 self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho, 291 self.sigma, 292 self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, self.train_time, 293 self.noise_realizations, self.noisetype, self.traintype, prior_str) 294 295 if self.runflag: 296 if not os.path.isdir(data_folder): 297 os.mkdir(data_folder) 298 if not os.path.isdir(os.path.join(data_folder, run_name + '_folder')): 299 os.mkdir(os.path.join(data_folder, run_name + '_folder')) 300 self.run_file_name = os.path.join(data_folder, run_name + '.bz2') 301 self.run_folder_name = os.path.join(data_folder, run_name + '_folder')
Creates the folder and final data file name for the tests about to be run.
Args
- self: RunOpts object.
Returns
RunOpts object with initialized folder and file name variables. Also creates the aforementioned folder if not already created.
303 def get_run_opts(self): 304 """Processes the command line input into instance variables. 305 Args: 306 self: RunOpts object. 307 Returns: 308 RunOpts object with instance variables set from command line input. 309 Raises: 310 GetoptError: Raises an error of command line arguments no recognized.""" 311 try: 312 opts, args = getopt.getopt(self.argv, "T:N:r:", 313 ['testtime=', 'noisetype=', 'traintype=', 'system=', 'res=', 314 'tests=', 'trains=', 'savepred=', 'tau=', 'rho=', 315 'sigma=', 'theta=','leakage=', 'bias_type=', 'debug=', 'win_type=', 316 'machine=', 'num_cpus=', 'pmap=', 'parallel=', 'metric=', 'returnall=', 317 'savetime=', 'saveeigenvals=', 'noisevals=', 'regvals=', 'maxvt=', 318 'resstart=', 'trainstart=', 'teststart=', 319 'squarenodes=', 'regtraintimes=', 'discardlen=', 320 'prior=', 'synctime=', 'datarootdir=', 'savetruth=']) 321 except getopt.GetoptError: 322 print('Error: Some options not recognized') 323 sys.exit(2) 324 for opt, arg in opts: 325 if opt == '-T': 326 self.train_time = int(arg) 327 print('Training iterations: %d' % self.train_time) 328 elif opt == '-N': 329 self.res_size = int(arg) 330 print('Reservoir nodes: %d' % self.res_size) 331 elif opt == '-r': 332 self.noise_realizations = int(arg) 333 print('Noise Realizations: %d' % self.noise_realizations) 334 elif opt == '--savetruth': 335 if arg == 'True': 336 self.save_truth = True 337 elif arg == 'False': 338 self.save_truth = False 339 else: 340 raise ValueError 341 elif opt == '--datarootdir': 342 self.root_folder = str(arg) 343 print('Root directory for data: %s' % self.root_folder) 344 elif opt == '--synctime': 345 self.sync_time = int(arg) 346 print('Sync time: %d' % self.sync_time) 347 elif opt == '--testtime': 348 self.test_time = int(arg) 349 if self.test_time == 0: 350 print('Testing duration: default') 351 else: 352 print('Testing duration: %d' % self.test_time) 353 elif opt == '--resstart': 354 self.res_start = int(arg) 355 print('Reservoir ensemble start: %d' % self.res_start) 356 elif opt == '--trainstart': 357 self.train_start = int(arg) 358 print('Train ensemble start: %d' % self.train_start) 359 elif opt == '--teststart': 360 self.test_start = int(arg) 361 print('Test ensemble start: %d' % self.test_start) 362 elif opt == '--saveeigenvals': 363 if arg == 'True': 364 self.save_eigenvals = True 365 elif arg == 'False': 366 self.save_eigenvals = False 367 else: 368 raise ValueError 369 print('Save grad reg eigenvalues: %s' % arg) 370 elif opt == '--prior': 371 self.prior = str(arg) 372 print('Prior type: %s' % self.prior) 373 elif opt == '--discardlen': 374 self.discard_time = int(arg) 375 print('Discard iterations: %d' % self.discard_time) 376 elif opt == '--squarenodes': 377 if arg == 'True': 378 self.squarenodes = True 379 elif arg == 'False': 380 self.squarenodes = False 381 else: 382 raise ValueError 383 print('Square reservoir nodes: %s' % arg) 384 elif opt == '--maxvt': 385 self.max_valid_time = int(arg) 386 print('Maximum valid time: %d' % self.max_valid_time) 387 elif opt == '--noisevals': 388 self.noise_values_array = np.array([float(noise) for noise in arg.split(',')]) 389 noise_str = '[ ' 390 for noise in self.noise_values_array: 391 noise_str += '%0.3e, ' % noise 392 noise_str = noise_str[:-2] + ' ]' 393 print('Noise values: %s' % noise_str) 394 elif opt == '--regvals': 395 self.reg_values = np.array([float(reg) for reg in arg.split(',')]) 396 reg_str = '[ ' 397 for reg in self.reg_values: 398 reg_str += '%0.3e, ' % reg 399 reg_str = reg_str[:-2] + ' ]' 400 print('Regularization values: %s' % reg_str) 401 elif opt == '--regtraintimes': 402 if arg != 'None': 403 self.reg_train_times = np.array([int(reg_train) for reg_train in arg.split(',')]) 404 reg_train_str = '[ ' 405 for reg_train in self.reg_train_times: 406 reg_train_str += '%0.3e, ' % reg_train 407 reg_train_str = reg_train_str[:-2] + ' ]' 408 print('Regularization training times: %s' % reg_train_str) 409 elif opt == '--savetime': 410 if str(arg) == 'True': 411 self.save_time_rms = True 412 elif str(arg) == 'False': 413 self.save_time_rms = False 414 else: 415 raise ValueError 416 elif opt == '--metric': 417 self.metric = str(arg) 418 if self.metric not in ['pmap_max_wass_dist', 'mean_rms', 'max_rms', 'mss_var']: 419 raise ValueError 420 print('Stability metric: %s' % self.metric) 421 elif opt == '--returnall': 422 if arg == 'True': 423 self.return_all = True 424 elif arg == 'False': 425 self.return_all = False 426 else: 427 raise ValueError 428 elif opt == '--parallel': 429 parallel_in = str(arg) 430 if parallel_in == 'True': 431 self.ifray = True 432 elif parallel_in == 'False': 433 self.ifray = False 434 else: 435 raise ValueError 436 elif opt == '--pmap': 437 pmap_in = str(arg) 438 if pmap_in == 'True': 439 self.pmap = True 440 elif pmap_in == 'False': 441 self.pmap = False 442 else: 443 raise ValueError 444 elif opt == '--machine': 445 self.machine = str(arg) 446 if self.machine not in ['deepthought2', 'personal']: 447 raise ValueError 448 print('Machine: %s' % self.machine) 449 elif opt == '--num_cpus': 450 self.num_cpus = int(arg) 451 print('Number of CPUS: %d' % self.num_cpus) 452 elif opt == '--rho': 453 self.rho = float(arg) 454 print('Rho: %f' % self.rho) 455 elif opt == '--sigma': 456 self.sigma = float(arg) 457 print('Sigma: %f' % self.sigma) 458 elif opt == '--theta': 459 self.theta = float(arg) 460 print('Theta: %f' % self.theta) 461 elif opt == '--leakage': 462 self.leakage = float(arg) 463 print('Leakage: %f' % self.leakage) 464 elif opt == '--tau': 465 self.tau = float(arg) 466 print('Reservoir timestep: %f' % self.tau) 467 elif opt == '--win_type': 468 self.win_type = str(arg) 469 print('Win Type: %s' % self.win_type) 470 elif opt == '--bias_type': 471 self.bias_type = str(arg) 472 print('Bias Type: %s' % self.bias_type) 473 elif opt == '--res': 474 self.res_per_test = int(arg) 475 print('Number of reservoirs: %d' % self.res_per_test) 476 elif opt == '--tests': 477 self.num_tests = int(arg) 478 print('Number of tests: %d' % self.num_tests) 479 elif opt == '--trains': 480 self.num_trains = int(arg) 481 print('Number of training data sequences: %d' % self.num_trains) 482 elif opt == '--savepred': 483 if arg == 'True': 484 self.savepred = True 485 elif arg == 'False': 486 self.savepred = False 487 print('Saving predictions: %s' % arg) 488 elif opt == '--noisetype': 489 self.noisetype = str(arg) 490 print('Noise type: %s' % self.noisetype) 491 elif opt == '--traintype': 492 self.traintype = str(arg) 493 print('Training type: %s' % self.traintype) 494 elif opt == '--system': 495 self.system = str(arg) 496 print('System: %s' % self.system) 497 elif opt == '--debug': 498 if arg == 'True': 499 self.debug_mode = True 500 elif arg == 'False': 501 self.debug_mode = False 502 print('Debug Mode: %s' % arg)
Processes the command line input into instance variables.
Args
- self: RunOpts object.
Returns
RunOpts object with instance variables set from command line input.
Raises
- GetoptError: Raises an error of command line arguments no recognized.
505class Reservoir: 506 """Class for initializing and storing the reservoir matrices and internal states.""" 507 def __init__(self, run_opts, res_gen, res_itr, input_size, avg_degree=3): 508 """Initializes the Reservoir object. 509 Args: 510 run_opts: RunOpts object containing parameters used to generate the reservoir. 511 res_gen: A numpy.random.Generator object used to generate the random matrices in the Reservoir. 512 res_itr: Reservoir iteration tag. 513 input_size: Number of elements in reservoir input. 514 avg_degree: Average in-degree of each reservoir node (i.e., the average number of edges that connect into each vertex in the graph). Default: 3 515 Returns: 516 Constructed Reservoir object. 517 Raises: 518 ValueError: If win_type or bias_type is not recognized.""" 519 # Define class for storing reservoir layers generated from input parameters and an input random number generator 520 self.rsvr_size = run_opts.res_size 521 self.res_itr = res_itr 522 523 density = avg_degree / self.rsvr_size 524 525 if run_opts.win_type == 'full_0centered': 526 unnormalized_W = 2 * res_gen.random((self.rsvr_size, self.rsvr_size)) - 1 527 else: 528 unnormalized_W = res_gen.random((self.rsvr_size, self.rsvr_size)) 529 for i in range(unnormalized_W[:, 0].size): 530 for j in range(unnormalized_W[0].size): 531 if res_gen.random(1) > density: 532 unnormalized_W[i][j] = 0 533 534 max_eig = eigs(unnormalized_W, k=1, 535 return_eigenvectors=False, maxiter=10 ** 5, v0=res_gen.random(self.rsvr_size)) 536 537 W_sp = csc_matrix(np.ascontiguousarray( 538 run_opts.rho / np.abs(max_eig[0]) * unnormalized_W)) 539 self.W_data, self.W_indices, self.W_indptr, self.W_shape = \ 540 (W_sp.data, W_sp.indices, W_sp.indptr, np.array(list(W_sp.shape))) 541 # print('Avg. degree of W:') 542 # print(self.W_data.size/rsvr_size) 543 544 # print('Adjacency matrix section:') 545 # print(self.W_data[:4]) 546 547 if run_opts.win_type == 'dense': 548 if run_opts.bias_type != 'new_random': 549 raise ValueError 550 Win = (res_gen.random(self.rsvr_size * (input_size + 1)).reshape(self.rsvr_size, 551 input_size + 1) * 2 - 1) * run_opts.sigma 552 else: 553 if 'full' in run_opts.win_type: 554 input_vars = np.arange(input_size) 555 elif run_opts.win_type == 'x': 556 input_vars = np.array([0]) 557 else: 558 print('Win_type not recognized.') 559 raise ValueError() 560 if run_opts.bias_type == 'old': 561 const_frac = 0.15 562 const_conn = int(self.rsvr_size * const_frac) 563 Win = np.zeros((self.rsvr_size, input_size + 1)) 564 Win[:const_conn, 0] = (res_gen.random( 565 Win[:const_conn, 0].size) * 2 - 1) * run_opts.theta 566 q = int((self.rsvr_size - const_conn) // input_vars.size) 567 for i, var in enumerate(input_vars): 568 Win[const_conn + q * i:const_conn + q * 569 (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 570 elif run_opts.bias_type == 'new_random': 571 Win = np.zeros((self.rsvr_size, input_size + 1)) 572 Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta 573 q = int(self.rsvr_size // input_vars.size) 574 for i, var in enumerate(input_vars): 575 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 576 leftover_nodes = self.rsvr_size - q * input_vars.size 577 for i in range(leftover_nodes): 578 Win[self.rsvr_size - leftover_nodes + i, input_vars[res_gen.integers(input_vars.size)]] = \ 579 (res_gen.random() * 2 - 1) * run_opts.sigma 580 elif run_opts.bias_type == 'new_new_random': 581 Win = np.zeros((self.rsvr_size, input_size + 1)) 582 Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta 583 q = int(self.rsvr_size // input_vars.size) 584 for i, var in enumerate(input_vars): 585 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 586 leftover_nodes = self.rsvr_size - q * input_vars.size 587 var = input_vars[res_gen.choice( 588 input_vars.size, size=leftover_nodes, replace=False)] 589 Win[self.rsvr_size - leftover_nodes:, var + 590 1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma 591 elif run_opts.bias_type == 'new_const': 592 Win = np.zeros((self.rsvr_size, input_size + 1)) 593 Win[:, 0] = run_opts.theta 594 q = int(self.rsvr_size // input_vars.size) 595 for i, var in enumerate(input_vars): 596 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 597 leftover_nodes = self.rsvr_size - q * input_vars.size 598 var = input_vars[res_gen.integers( 599 input_vars.size, size=leftover_nodes)] 600 Win[self.rsvr_size - leftover_nodes:, var + 601 1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma 602 else: 603 print('bias_type not recognized.') 604 raise ValueError() 605 606 Win_sp = csc_matrix(Win) 607 self.Win_data, self.Win_indices, self.Win_indptr, self.Win_shape = \ 608 np.ascontiguousarray(Win_sp.data), \ 609 np.ascontiguousarray(Win_sp.indices), \ 610 np.ascontiguousarray(Win_sp.indptr), \ 611 np.array(list(Win_sp.shape)) 612 613 self.X = (res_gen.random((self.rsvr_size, run_opts.train_time + run_opts.discard_time + 2)) * 2 - 1) 614 self.Wout = np.array([]) 615 self.leakage = run_opts.leakage
Class for initializing and storing the reservoir matrices and internal states.
507 def __init__(self, run_opts, res_gen, res_itr, input_size, avg_degree=3): 508 """Initializes the Reservoir object. 509 Args: 510 run_opts: RunOpts object containing parameters used to generate the reservoir. 511 res_gen: A numpy.random.Generator object used to generate the random matrices in the Reservoir. 512 res_itr: Reservoir iteration tag. 513 input_size: Number of elements in reservoir input. 514 avg_degree: Average in-degree of each reservoir node (i.e., the average number of edges that connect into each vertex in the graph). Default: 3 515 Returns: 516 Constructed Reservoir object. 517 Raises: 518 ValueError: If win_type or bias_type is not recognized.""" 519 # Define class for storing reservoir layers generated from input parameters and an input random number generator 520 self.rsvr_size = run_opts.res_size 521 self.res_itr = res_itr 522 523 density = avg_degree / self.rsvr_size 524 525 if run_opts.win_type == 'full_0centered': 526 unnormalized_W = 2 * res_gen.random((self.rsvr_size, self.rsvr_size)) - 1 527 else: 528 unnormalized_W = res_gen.random((self.rsvr_size, self.rsvr_size)) 529 for i in range(unnormalized_W[:, 0].size): 530 for j in range(unnormalized_W[0].size): 531 if res_gen.random(1) > density: 532 unnormalized_W[i][j] = 0 533 534 max_eig = eigs(unnormalized_W, k=1, 535 return_eigenvectors=False, maxiter=10 ** 5, v0=res_gen.random(self.rsvr_size)) 536 537 W_sp = csc_matrix(np.ascontiguousarray( 538 run_opts.rho / np.abs(max_eig[0]) * unnormalized_W)) 539 self.W_data, self.W_indices, self.W_indptr, self.W_shape = \ 540 (W_sp.data, W_sp.indices, W_sp.indptr, np.array(list(W_sp.shape))) 541 # print('Avg. degree of W:') 542 # print(self.W_data.size/rsvr_size) 543 544 # print('Adjacency matrix section:') 545 # print(self.W_data[:4]) 546 547 if run_opts.win_type == 'dense': 548 if run_opts.bias_type != 'new_random': 549 raise ValueError 550 Win = (res_gen.random(self.rsvr_size * (input_size + 1)).reshape(self.rsvr_size, 551 input_size + 1) * 2 - 1) * run_opts.sigma 552 else: 553 if 'full' in run_opts.win_type: 554 input_vars = np.arange(input_size) 555 elif run_opts.win_type == 'x': 556 input_vars = np.array([0]) 557 else: 558 print('Win_type not recognized.') 559 raise ValueError() 560 if run_opts.bias_type == 'old': 561 const_frac = 0.15 562 const_conn = int(self.rsvr_size * const_frac) 563 Win = np.zeros((self.rsvr_size, input_size + 1)) 564 Win[:const_conn, 0] = (res_gen.random( 565 Win[:const_conn, 0].size) * 2 - 1) * run_opts.theta 566 q = int((self.rsvr_size - const_conn) // input_vars.size) 567 for i, var in enumerate(input_vars): 568 Win[const_conn + q * i:const_conn + q * 569 (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 570 elif run_opts.bias_type == 'new_random': 571 Win = np.zeros((self.rsvr_size, input_size + 1)) 572 Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta 573 q = int(self.rsvr_size // input_vars.size) 574 for i, var in enumerate(input_vars): 575 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 576 leftover_nodes = self.rsvr_size - q * input_vars.size 577 for i in range(leftover_nodes): 578 Win[self.rsvr_size - leftover_nodes + i, input_vars[res_gen.integers(input_vars.size)]] = \ 579 (res_gen.random() * 2 - 1) * run_opts.sigma 580 elif run_opts.bias_type == 'new_new_random': 581 Win = np.zeros((self.rsvr_size, input_size + 1)) 582 Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta 583 q = int(self.rsvr_size // input_vars.size) 584 for i, var in enumerate(input_vars): 585 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 586 leftover_nodes = self.rsvr_size - q * input_vars.size 587 var = input_vars[res_gen.choice( 588 input_vars.size, size=leftover_nodes, replace=False)] 589 Win[self.rsvr_size - leftover_nodes:, var + 590 1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma 591 elif run_opts.bias_type == 'new_const': 592 Win = np.zeros((self.rsvr_size, input_size + 1)) 593 Win[:, 0] = run_opts.theta 594 q = int(self.rsvr_size // input_vars.size) 595 for i, var in enumerate(input_vars): 596 Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma 597 leftover_nodes = self.rsvr_size - q * input_vars.size 598 var = input_vars[res_gen.integers( 599 input_vars.size, size=leftover_nodes)] 600 Win[self.rsvr_size - leftover_nodes:, var + 601 1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma 602 else: 603 print('bias_type not recognized.') 604 raise ValueError() 605 606 Win_sp = csc_matrix(Win) 607 self.Win_data, self.Win_indices, self.Win_indptr, self.Win_shape = \ 608 np.ascontiguousarray(Win_sp.data), \ 609 np.ascontiguousarray(Win_sp.indices), \ 610 np.ascontiguousarray(Win_sp.indptr), \ 611 np.array(list(Win_sp.shape)) 612 613 self.X = (res_gen.random((self.rsvr_size, run_opts.train_time + run_opts.discard_time + 2)) * 2 - 1) 614 self.Wout = np.array([]) 615 self.leakage = run_opts.leakage
Initializes the Reservoir object.
Args
- run_opts: RunOpts object containing parameters used to generate the reservoir.
- res_gen: A numpy.random.Generator object used to generate the random matrices in the Reservoir.
- res_itr: Reservoir iteration tag.
- input_size: Number of elements in reservoir input.
- avg_degree: Average in-degree of each reservoir node (i.e., the average number of edges that connect into each vertex in the graph). Default: 3
Returns
Constructed Reservoir object.
Raises
- ValueError: If win_type or bias_type is not recognized.
618class ResOutput: 619 """Class for holding the output from a reservoir computer test. Is typically used to save the output from one of 620 the (potentially parallel) runs.""" 621 def __init__(self, run_opts, noise_array): 622 """Creates the ResOutput object. 623 Args: 624 self: ResOutput object 625 run_opts: RunOpts object for the test. 626 noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test. 627 Returns: 628 self: initialized ResOutput object.""" 629 self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 630 dtype=object) 631 self.mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 632 dtype=object) 633 self.max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 634 dtype=object) 635 self.variances_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 636 dtype=object) 637 self.valid_time_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 638 dtype=object) 639 self.rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 640 dtype=object) 641 self.preds_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 642 dtype=object) 643 self.wass_dist_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 644 dtype=object) 645 self.pmap_max_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 646 dtype=object) 647 self.pmap_max_wass_dist_out = np.zeros( 648 (noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), dtype=object) 649 self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 650 dtype=object) 651 self.train_mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 652 dtype=object) 653 self.train_max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 654 dtype=object) 655 self.grad_eigenvals_out = np.zeros((noise_array.size, run_opts.reg_train_times.size), dtype=object) 656 self.pred_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 657 dtype=object) 658 659 def save(self, run_opts, noise_array, res_itr, train_seed, test_idxs): 660 """Saves the data in the ResOutput object to a series of .csv files. 661 Args: 662 self: ResOutput object 663 run_opts: RunOpts object for test. 664 noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test. 665 res_itr: Index for the reservoir iteration used. 666 train_seed: Index for the training data iteration used. 667 test_idxs: Indices for the testing data iterations used. 668 Returns: 669 Saves .csv files.""" 670 for (i, noise_val), (j, reg_train_time) in product(enumerate(noise_array), enumerate(run_opts.reg_train_times)): 671 print((i, j)) 672 stable_frac = np.zeros(run_opts.reg_values.size) 673 for k, array_elem in enumerate(self.stable_frac_out[i, :, j]): 674 stable_frac[k] = array_elem 675 np.savetxt( 676 get_filename(run_opts.run_folder_name, 'stable_frac', res_itr, train_seed, noise_val, reg_train_time), 677 stable_frac, delimiter=',') 678 679 mean_rms = np.zeros((run_opts.reg_values.size, *self.mean_rms_out[i, 0, j].shape)) 680 for k, array_elem in enumerate(self.mean_rms_out[i, :, j]): 681 mean_rms[k] = array_elem 682 np.savetxt( 683 get_filename(run_opts.run_folder_name, 'mean_rms', res_itr, train_seed, noise_val, reg_train_time), 684 mean_rms, delimiter=',') 685 686 max_rms = np.zeros((run_opts.reg_values.size, *self.max_rms_out[i, 0, j].shape)) 687 for k, array_elem in enumerate(self.max_rms_out[i, :, j]): 688 max_rms[k] = array_elem 689 np.savetxt( 690 get_filename(run_opts.run_folder_name, 'max_rms', res_itr, train_seed, noise_val, reg_train_time), 691 max_rms, delimiter=',') 692 693 variances = np.zeros((run_opts.reg_values.size, *self.variances_out[i, 0, j].shape)) 694 for k, array_elem in enumerate(self.variances_out[i, :, j]): 695 variances[k] = array_elem 696 np.savetxt( 697 get_filename(run_opts.run_folder_name, 'variance', res_itr, train_seed, noise_val, reg_train_time), 698 variances, delimiter=',') 699 700 valid_time = np.zeros((run_opts.reg_values.size, *self.valid_time_out[i, 0, j].shape)) 701 for k, array_elem in enumerate(self.valid_time_out[i, :, j]): 702 valid_time[k] = array_elem 703 for k in range(run_opts.num_tests): 704 np.savetxt( 705 get_filename(run_opts.run_folder_name, 'valid_time', res_itr, train_seed, noise_val, reg_train_time, 706 test_idx=test_idxs[k]), valid_time[:, k], delimiter=',') 707 708 train_mean_rms = np.zeros((run_opts.reg_values.size, *self.train_mean_rms_out[i, 0, j].shape)) 709 for k, array_elem in enumerate(self.train_mean_rms_out[i, :, j]): 710 train_mean_rms[k] = array_elem 711 np.savetxt(get_filename(run_opts.run_folder_name, 'train_mean_rms', res_itr, train_seed, noise_val, 712 reg_train_time), 713 train_mean_rms, delimiter=',') 714 715 train_max_rms = np.zeros((run_opts.reg_values.size, *self.train_max_rms_out[i, 0, j].shape)) 716 for k, array_elem in enumerate(self.train_max_rms_out[i, :, j]): 717 train_max_rms[k] = array_elem 718 np.savetxt( 719 get_filename(run_opts.run_folder_name, 'train_max_rms', res_itr, train_seed, noise_val, reg_train_time), 720 train_max_rms, delimiter=',') 721 722 if run_opts.pmap: 723 pmap_max_wass_dist = np.zeros((run_opts.reg_values.size, *self.pmap_max_wass_dist_out[i, 0, j].shape)) 724 for k, array_elem in enumerate(self.pmap_max_wass_dist_out[i, :, j]): 725 pmap_max_wass_dist[k] = array_elem 726 np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max_wass_dist', res_itr, train_seed, noise_val, 727 reg_train_time), 728 pmap_max_wass_dist, delimiter=',') 729 730 pmap_max = np.zeros((run_opts.reg_values.size, run_opts.num_tests), dtype=object) 731 for k, l in product(np.arange(run_opts.reg_values.size), np.arange(run_opts.num_tests)): 732 pmap_max[k, l] = self.pmap_max_out[i, k, j][l] 733 pmap_len = [len(arr) for arr in pmap_max[k, l]] 734 max_pmap_len = max(pmap_len) 735 pmap_max_save = np.zeros((len(pmap_max[k, l]), max_pmap_len)) 736 for m in range(len(pmap_max[k, l])): 737 pmap_max_save[m, :pmap_len[m]] = pmap_max[k, l][m] 738 np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max', res_itr, train_seed, noise_val, 739 reg_train_time, 740 test_idx=test_idxs[l], reg=run_opts.reg_values[k]), 741 pmap_max_save, delimiter=',') 742 if run_opts.save_time_rms: 743 rms = np.zeros((run_opts.reg_values.size, *self.rms_out[i, 0, j].shape)) 744 for k, array_elem in enumerate(self.rms_out[i, :, j]): 745 rms[k] = array_elem 746 for k in range(run_opts.num_tests): 747 np.savetxt( 748 get_filename(run_opts.run_folder_name, 'rms', res_itr, train_seed, noise_val, reg_train_time, 749 test_idx=test_idxs[k]), 750 rms[:, k], delimiter=',') 751 if run_opts.save_eigenvals: 752 eigenvals = self.grad_eigenvals_out[i, j] 753 np.savetxt(get_filename(run_opts.run_folder_name, 'gradreg_eigenvals', res_itr, train_seed, noise_val, 754 reg_train_time), 755 eigenvals, delimiter=',') 756 if run_opts.savepred: 757 pred = np.zeros((run_opts.reg_values.size, *self.pred_out[i, 0, j].shape)) 758 for k, array_elem in enumerate(self.pred_out[i, :, j]): 759 pred[k] = array_elem 760 for l, (k, reg) in product(range(run_opts.num_tests), enumerate(run_opts.reg_values)): 761 np.savetxt( 762 get_filename(run_opts.run_folder_name, 'pred', res_itr, train_seed, noise_val, reg_train_time, 763 test_idx=test_idxs[l], reg=reg), 764 pred[k, l], delimiter=',')
Class for holding the output from a reservoir computer test. Is typically used to save the output from one of the (potentially parallel) runs.
621 def __init__(self, run_opts, noise_array): 622 """Creates the ResOutput object. 623 Args: 624 self: ResOutput object 625 run_opts: RunOpts object for the test. 626 noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test. 627 Returns: 628 self: initialized ResOutput object.""" 629 self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 630 dtype=object) 631 self.mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 632 dtype=object) 633 self.max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 634 dtype=object) 635 self.variances_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 636 dtype=object) 637 self.valid_time_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 638 dtype=object) 639 self.rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 640 dtype=object) 641 self.preds_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 642 dtype=object) 643 self.wass_dist_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 644 dtype=object) 645 self.pmap_max_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 646 dtype=object) 647 self.pmap_max_wass_dist_out = np.zeros( 648 (noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), dtype=object) 649 self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 650 dtype=object) 651 self.train_mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 652 dtype=object) 653 self.train_max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 654 dtype=object) 655 self.grad_eigenvals_out = np.zeros((noise_array.size, run_opts.reg_train_times.size), dtype=object) 656 self.pred_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), 657 dtype=object)
Creates the ResOutput object.
Args
- self: ResOutput object
- run_opts: RunOpts object for the test.
- noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
Returns
self: initialized ResOutput object.
659 def save(self, run_opts, noise_array, res_itr, train_seed, test_idxs): 660 """Saves the data in the ResOutput object to a series of .csv files. 661 Args: 662 self: ResOutput object 663 run_opts: RunOpts object for test. 664 noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test. 665 res_itr: Index for the reservoir iteration used. 666 train_seed: Index for the training data iteration used. 667 test_idxs: Indices for the testing data iterations used. 668 Returns: 669 Saves .csv files.""" 670 for (i, noise_val), (j, reg_train_time) in product(enumerate(noise_array), enumerate(run_opts.reg_train_times)): 671 print((i, j)) 672 stable_frac = np.zeros(run_opts.reg_values.size) 673 for k, array_elem in enumerate(self.stable_frac_out[i, :, j]): 674 stable_frac[k] = array_elem 675 np.savetxt( 676 get_filename(run_opts.run_folder_name, 'stable_frac', res_itr, train_seed, noise_val, reg_train_time), 677 stable_frac, delimiter=',') 678 679 mean_rms = np.zeros((run_opts.reg_values.size, *self.mean_rms_out[i, 0, j].shape)) 680 for k, array_elem in enumerate(self.mean_rms_out[i, :, j]): 681 mean_rms[k] = array_elem 682 np.savetxt( 683 get_filename(run_opts.run_folder_name, 'mean_rms', res_itr, train_seed, noise_val, reg_train_time), 684 mean_rms, delimiter=',') 685 686 max_rms = np.zeros((run_opts.reg_values.size, *self.max_rms_out[i, 0, j].shape)) 687 for k, array_elem in enumerate(self.max_rms_out[i, :, j]): 688 max_rms[k] = array_elem 689 np.savetxt( 690 get_filename(run_opts.run_folder_name, 'max_rms', res_itr, train_seed, noise_val, reg_train_time), 691 max_rms, delimiter=',') 692 693 variances = np.zeros((run_opts.reg_values.size, *self.variances_out[i, 0, j].shape)) 694 for k, array_elem in enumerate(self.variances_out[i, :, j]): 695 variances[k] = array_elem 696 np.savetxt( 697 get_filename(run_opts.run_folder_name, 'variance', res_itr, train_seed, noise_val, reg_train_time), 698 variances, delimiter=',') 699 700 valid_time = np.zeros((run_opts.reg_values.size, *self.valid_time_out[i, 0, j].shape)) 701 for k, array_elem in enumerate(self.valid_time_out[i, :, j]): 702 valid_time[k] = array_elem 703 for k in range(run_opts.num_tests): 704 np.savetxt( 705 get_filename(run_opts.run_folder_name, 'valid_time', res_itr, train_seed, noise_val, reg_train_time, 706 test_idx=test_idxs[k]), valid_time[:, k], delimiter=',') 707 708 train_mean_rms = np.zeros((run_opts.reg_values.size, *self.train_mean_rms_out[i, 0, j].shape)) 709 for k, array_elem in enumerate(self.train_mean_rms_out[i, :, j]): 710 train_mean_rms[k] = array_elem 711 np.savetxt(get_filename(run_opts.run_folder_name, 'train_mean_rms', res_itr, train_seed, noise_val, 712 reg_train_time), 713 train_mean_rms, delimiter=',') 714 715 train_max_rms = np.zeros((run_opts.reg_values.size, *self.train_max_rms_out[i, 0, j].shape)) 716 for k, array_elem in enumerate(self.train_max_rms_out[i, :, j]): 717 train_max_rms[k] = array_elem 718 np.savetxt( 719 get_filename(run_opts.run_folder_name, 'train_max_rms', res_itr, train_seed, noise_val, reg_train_time), 720 train_max_rms, delimiter=',') 721 722 if run_opts.pmap: 723 pmap_max_wass_dist = np.zeros((run_opts.reg_values.size, *self.pmap_max_wass_dist_out[i, 0, j].shape)) 724 for k, array_elem in enumerate(self.pmap_max_wass_dist_out[i, :, j]): 725 pmap_max_wass_dist[k] = array_elem 726 np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max_wass_dist', res_itr, train_seed, noise_val, 727 reg_train_time), 728 pmap_max_wass_dist, delimiter=',') 729 730 pmap_max = np.zeros((run_opts.reg_values.size, run_opts.num_tests), dtype=object) 731 for k, l in product(np.arange(run_opts.reg_values.size), np.arange(run_opts.num_tests)): 732 pmap_max[k, l] = self.pmap_max_out[i, k, j][l] 733 pmap_len = [len(arr) for arr in pmap_max[k, l]] 734 max_pmap_len = max(pmap_len) 735 pmap_max_save = np.zeros((len(pmap_max[k, l]), max_pmap_len)) 736 for m in range(len(pmap_max[k, l])): 737 pmap_max_save[m, :pmap_len[m]] = pmap_max[k, l][m] 738 np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max', res_itr, train_seed, noise_val, 739 reg_train_time, 740 test_idx=test_idxs[l], reg=run_opts.reg_values[k]), 741 pmap_max_save, delimiter=',') 742 if run_opts.save_time_rms: 743 rms = np.zeros((run_opts.reg_values.size, *self.rms_out[i, 0, j].shape)) 744 for k, array_elem in enumerate(self.rms_out[i, :, j]): 745 rms[k] = array_elem 746 for k in range(run_opts.num_tests): 747 np.savetxt( 748 get_filename(run_opts.run_folder_name, 'rms', res_itr, train_seed, noise_val, reg_train_time, 749 test_idx=test_idxs[k]), 750 rms[:, k], delimiter=',') 751 if run_opts.save_eigenvals: 752 eigenvals = self.grad_eigenvals_out[i, j] 753 np.savetxt(get_filename(run_opts.run_folder_name, 'gradreg_eigenvals', res_itr, train_seed, noise_val, 754 reg_train_time), 755 eigenvals, delimiter=',') 756 if run_opts.savepred: 757 pred = np.zeros((run_opts.reg_values.size, *self.pred_out[i, 0, j].shape)) 758 for k, array_elem in enumerate(self.pred_out[i, :, j]): 759 pred[k] = array_elem 760 for l, (k, reg) in product(range(run_opts.num_tests), enumerate(run_opts.reg_values)): 761 np.savetxt( 762 get_filename(run_opts.run_folder_name, 'pred', res_itr, train_seed, noise_val, reg_train_time, 763 test_idx=test_idxs[l], reg=reg), 764 pred[k, l], delimiter=',')
Saves the data in the ResOutput object to a series of .csv files.
Args
- self: ResOutput object
- run_opts: RunOpts object for test.
- noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
- res_itr: Index for the reservoir iteration used.
- train_seed: Index for the training data iteration used.
- test_idxs: Indices for the testing data iterations used.
Returns
Saves .csv files.
767class NumericalModel: 768 """Class for generating training or testing data using one of the test numerical models.""" 769 def __init__(self, tau=0.1, int_step=1, T=300, ttsplit=5000, u0=0, system='lorenz', 770 params=np.array([[], []], dtype=np.complex128)): 771 """Creates the NumericalModel object. 772 Args: 773 self: NumericalModel object. 774 tau: Time step between measurements of the system dynamics. 775 int_step: the number of numerical integration steps between each measurement. 776 T: Total number of measurements. 777 ttsplit: Number of measurements to be used in the training data set. 778 u0: Initial condition for integration. 779 system: Name of system to generate data from. Options: 'lorenz', 'KS','KS_d2175' 780 params: Internal parameters for model integration. Currently only used by KS options. 781 Returns: 782 Complete NumericalModel object with precomputed internal parameters.""" 783 784 if system == 'lorenz': 785 u_arr = np.ascontiguousarray(lorenzrungekutta( 786 u0[0], u0[1], u0[2], T, tau)) 787 self.input_size = 3 788 789 u_arr[0] = (u_arr[0] - 0) / 7.929788629895004 790 u_arr[1] = (u_arr[1] - 0) / 8.9932616136662 791 u_arr[2] = (u_arr[2] - 23.596294463016896) / 8.575917849311919 792 self.params = params 793 794 elif system == 'KS': 795 u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, params=params) 796 self.input_size = u_arr.shape[0] 797 u_arr = np.ascontiguousarray(u_arr) / (1.1876770355823614) 798 elif system == 'KS_d2175': 799 u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, d=21.75, params=params) 800 self.input_size = u_arr.shape[0] 801 u_arr = np.ascontiguousarray(u_arr) / (1.2146066380280796) 802 else: 803 raise ValueError 804 805 self.train_length = ttsplit 806 self.u_arr_train = u_arr[:, :ttsplit + 1] 807 808 # u[ttsplit], the (ttsplit + 1)st element, is the last in u_arr_train and the first in u_arr_test 809 self.u_arr_test = u_arr[:, ttsplit:]
Class for generating training or testing data using one of the test numerical models.
769 def __init__(self, tau=0.1, int_step=1, T=300, ttsplit=5000, u0=0, system='lorenz', 770 params=np.array([[], []], dtype=np.complex128)): 771 """Creates the NumericalModel object. 772 Args: 773 self: NumericalModel object. 774 tau: Time step between measurements of the system dynamics. 775 int_step: the number of numerical integration steps between each measurement. 776 T: Total number of measurements. 777 ttsplit: Number of measurements to be used in the training data set. 778 u0: Initial condition for integration. 779 system: Name of system to generate data from. Options: 'lorenz', 'KS','KS_d2175' 780 params: Internal parameters for model integration. Currently only used by KS options. 781 Returns: 782 Complete NumericalModel object with precomputed internal parameters.""" 783 784 if system == 'lorenz': 785 u_arr = np.ascontiguousarray(lorenzrungekutta( 786 u0[0], u0[1], u0[2], T, tau)) 787 self.input_size = 3 788 789 u_arr[0] = (u_arr[0] - 0) / 7.929788629895004 790 u_arr[1] = (u_arr[1] - 0) / 8.9932616136662 791 u_arr[2] = (u_arr[2] - 23.596294463016896) / 8.575917849311919 792 self.params = params 793 794 elif system == 'KS': 795 u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, params=params) 796 self.input_size = u_arr.shape[0] 797 u_arr = np.ascontiguousarray(u_arr) / (1.1876770355823614) 798 elif system == 'KS_d2175': 799 u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, d=21.75, params=params) 800 self.input_size = u_arr.shape[0] 801 u_arr = np.ascontiguousarray(u_arr) / (1.2146066380280796) 802 else: 803 raise ValueError 804 805 self.train_length = ttsplit 806 self.u_arr_train = u_arr[:, :ttsplit + 1] 807 808 # u[ttsplit], the (ttsplit + 1)st element, is the last in u_arr_train and the first in u_arr_test 809 self.u_arr_test = u_arr[:, ttsplit:]
Creates the NumericalModel object.
Args
- self: NumericalModel object.
- tau: Time step between measurements of the system dynamics.
- int_step: the number of numerical integration steps between each measurement.
- T: Total number of measurements.
- ttsplit: Number of measurements to be used in the training data set.
- u0: Initial condition for integration.
- system: Name of system to generate data from. Options: 'lorenz', 'KS','KS_d2175'
- params: Internal parameters for model integration. Currently only used by KS options.
Returns
Complete NumericalModel object with precomputed internal parameters.
812class ResPreds: 813 """Class for loading and storing prediction time series data generated from reservoir computer tests.""" 814 def __init__(self, run_opts): 815 """Loads the prediction data from .csv files. 816 Args: 817 run_opts: RunOpts object containing the run parameters.""" 818 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 819 self.noise_vals = run_opts.noise_values_array 820 self.reg_train_vals = run_opts.reg_train_times 821 self.reg_vals = run_opts.reg_values 822 print('Starding data read...') 823 # print(self.pred_folder) 824 self.preds = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size, 825 self.reg_train_vals.size, self.reg_vals.size), dtype=object) 826 total_vals = self.preds.size 827 with tqdm(total=total_vals) as pbar: 828 for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product( 829 enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)), 830 enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)), 831 enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)), 832 enumerate(self.noise_vals), 833 enumerate(self.reg_train_vals), enumerate(self.reg_vals)): 834 filename = get_filename(self.pred_folder, 'pred', res, train, noise, reg_train, reg = reg, test_idx = test) 835 if os.name == 'nt' and len(filename) >= 260: 836 filename = get_windows_path(filename) 837 self.preds[i, j, k, l, m, n] = np.loadtxt(filename, delimiter=',') 838 pbar.update(1)
Class for loading and storing prediction time series data generated from reservoir computer tests.
814 def __init__(self, run_opts): 815 """Loads the prediction data from .csv files. 816 Args: 817 run_opts: RunOpts object containing the run parameters.""" 818 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 819 self.noise_vals = run_opts.noise_values_array 820 self.reg_train_vals = run_opts.reg_train_times 821 self.reg_vals = run_opts.reg_values 822 print('Starding data read...') 823 # print(self.pred_folder) 824 self.preds = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size, 825 self.reg_train_vals.size, self.reg_vals.size), dtype=object) 826 total_vals = self.preds.size 827 with tqdm(total=total_vals) as pbar: 828 for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product( 829 enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)), 830 enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)), 831 enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)), 832 enumerate(self.noise_vals), 833 enumerate(self.reg_train_vals), enumerate(self.reg_vals)): 834 filename = get_filename(self.pred_folder, 'pred', res, train, noise, reg_train, reg = reg, test_idx = test) 835 if os.name == 'nt' and len(filename) >= 260: 836 filename = get_windows_path(filename) 837 self.preds[i, j, k, l, m, n] = np.loadtxt(filename, delimiter=',') 838 pbar.update(1)
Loads the prediction data from .csv files.
Args
- run_opts: RunOpts object containing the run parameters.
841class ResPmap: 842 """Class for loading and storing Poincare maximum map data from reservoir predictions 843 generated from reservoir computer tests.""" 844 def __init__(self, run_opts): 845 """Loads the Poincare maxmimum map data from .csv files. 846 Args: 847 run_opts: RunOpts object containing the run parameters.""" 848 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 849 self.noise_vals = run_opts.noise_values_array 850 self.reg_train_vals = run_opts.reg_train_times 851 self.reg_vals = run_opts.reg_values 852 print('Starding data read...') 853 # print(self.pred_folder) 854 self.pmap_max = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size, 855 self.reg_train_vals.size, self.reg_vals.size), dtype=object) 856 total_vals = self.pmap_max.size 857 with tqdm(total=total_vals) as pbar: 858 for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product( 859 enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)), 860 enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)), 861 enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)), 862 enumerate(self.noise_vals), 863 enumerate(self.reg_train_vals), enumerate(self.reg_vals)): 864 filename = get_filename(self.pred_folder, 'pmap_max', res, train, noise, reg_train, reg = reg, test_idx = test) 865 if os.name == 'nt' and len(filename) >= 260: 866 filename = get_windows_path(filename) 867 pmap_in = np.loadtxt(filename, delimiter=',') 868 pmap_max = [pmap_in[o, pmap_in[o] != 0.] for o in range(pmap_in.shape[0])] 869 self.pmap_max[i, j, k, l, m, n] = pmap_max 870 pbar.update(1)
Class for loading and storing Poincare maximum map data from reservoir predictions generated from reservoir computer tests.
844 def __init__(self, run_opts): 845 """Loads the Poincare maxmimum map data from .csv files. 846 Args: 847 run_opts: RunOpts object containing the run parameters.""" 848 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 849 self.noise_vals = run_opts.noise_values_array 850 self.reg_train_vals = run_opts.reg_train_times 851 self.reg_vals = run_opts.reg_values 852 print('Starding data read...') 853 # print(self.pred_folder) 854 self.pmap_max = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size, 855 self.reg_train_vals.size, self.reg_vals.size), dtype=object) 856 total_vals = self.pmap_max.size 857 with tqdm(total=total_vals) as pbar: 858 for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product( 859 enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)), 860 enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)), 861 enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)), 862 enumerate(self.noise_vals), 863 enumerate(self.reg_train_vals), enumerate(self.reg_vals)): 864 filename = get_filename(self.pred_folder, 'pmap_max', res, train, noise, reg_train, reg = reg, test_idx = test) 865 if os.name == 'nt' and len(filename) >= 260: 866 filename = get_windows_path(filename) 867 pmap_in = np.loadtxt(filename, delimiter=',') 868 pmap_max = [pmap_in[o, pmap_in[o] != 0.] for o in range(pmap_in.shape[0])] 869 self.pmap_max[i, j, k, l, m, n] = pmap_max 870 pbar.update(1)
Loads the Poincare maxmimum map data from .csv files.
Args
- run_opts: RunOpts object containing the run parameters.
873class ResData: 874 """Class for loading and storing prediction analysis data generated from reservoir computer tests.""" 875 def __init__(self, run_opts): 876 """Loads the prediction analysis data from a compressed pandas data file. 877 Args: 878 run_opts: RunOpts object containing the run parameters.""" 879 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 880 print('Starding data read...') 881 tic = time.perf_counter() 882 # print(self.data_filename) 883 if os.name == 'nt' and len(self.data_filename) >= 260: 884 self.data_filename = get_windows_path(self.data_filename) 885 self.data = pd.read_csv(self.data_filename, index_col=0) 886 toc = time.perf_counter() 887 print('Data reading finished in %0.2f sec.' % (toc - tic)) 888 self.res = pd.unique(self.data['res']) 889 self.train = pd.unique(self.data['train']) 890 self.test = pd.unique(self.data['test']) 891 self.noise = pd.unique(self.data['noise']) 892 self.reg = pd.unique(self.data['reg']) 893 self.reg_train = pd.unique(self.data['reg_train']) 894 self.nan = pd.isna(self.data['variance']) 895 896 def shape(self): 897 """Returns the shape of the pandas data frame""" 898 return self.data.shape 899 900 def size(self): 901 """Returns the size of the pandas data frame""" 902 return self.data.size 903 904 def data_slice(self, res=np.array([]), train=np.array([]), test=np.array([]), 905 reg_train=np.array([]), noise=np.array([]), reg=np.array([]), median_flag=False, 906 reduce_axes=[], metric='', gross_frac_metric='valid_time', gross_err_bnd=1e2, 907 reduce_fun=pd.DataFrame.median): 908 """Slices and/or finds the best results using a metric computed by the reduce_fun. 909 Args: 910 self: ResPreds object 911 res: Indices for reservoir results to be returned/optimized over. If left as an empty array, uses all indices. 912 train: Indices for training data set results to be returned/optimized over. If left as an empty array, uses all indices. 913 test: Indices for testing data set results to be returned/optimized over. If left as an empty array, uses all indices. 914 reg_train: Number of training data points for regularization to be returned/optimized over. If left as an empty array, uses all values. 915 noise: Noise/Jacobian/LMNT regularization parameter value results to be returned/optimized over. If left as an empty array, uses all values. 916 reg: Tikhonov regularization paramter value results to be returned/optimized over. If left as an empty array, uses all values. 917 median_flag: Boolean indicating whether the data should be optimized. 918 reduce_axes: List containing the axes to be optimized over. Elements can be 'res', 'train', 'test', 'reg_train', 'noise', or 'reg'. 919 metric: Metric to be used to compute which parameters give the best result. Options are: 920 'gross_frac': Lowest fraction of gross error 921 'mean_rms': Lowest mean map error. 922 'max_rms: Lowest maximum map error. 923 'valid_time': Highest valid prediction time.' 924 gross_frac_metrix: If using 'gross_frac' as a metric, this is the secondary metric that will be used if multiple parameters give equally good prediction results. 925 gross_err_bnd: The cutoff in the mean map error above which predictions are considered to have gross error. 926 reduce_fun: Function for computing the overall performance for a given set of parameters over many tests. 927 Returns: 928 Pandas DataFrame containing the sliced and optimized prediction results. 929 Raises: 930 ValueError: If any of the inputs are not recognized/incompatible.""" 931 input_list = [res, train, test, reg_train, noise, reg] 932 name_list = np.array(['res', 'train', 'test', 'reg_train', 'noise', 'reg']) 933 data_names = [name for name in self.data.columns if name not in name_list] 934 base_list = [self.res, self.train, self.test, self.reg_train, self.noise, self.reg] 935 slice_vals = np.zeros(len(input_list), dtype=object) 936 if median_flag: 937 if not isinstance(reduce_axes, list): 938 print('reduce_axes must be a list.') 939 return ValueError 940 elif len(reduce_axes) == 0: 941 print('median_flag is True, but no axes to compute the median over are specified.') 942 raise ValueError 943 elif not all(axis in ['res', 'train', 'test', 'noise', 'reg','reg_train'] for axis in reduce_axes) or \ 944 len(reduce_axes) != len(set(reduce_axes)): 945 print('reduce_axis max only contain res, train, test, noise, or reg.') 946 raise ValueError 947 elif metric not in ['', 'mean_rms', 'max_rms', 'valid_time', 'pmap_max_wass_dist', 'gross_frac']: 948 print('Metric not recognized.') 949 raise ValueError 950 elif len(metric) == 0 and any(axis in ['noise', 'reg'] for axis in reduce_axes): 951 print('Cannot reduce axes with unspecified metric.') 952 raise ValueError 953 954 for i, item in enumerate(input_list): 955 if isinstance(item, np.ndarray): 956 if item.size == 0: 957 slice_vals[i] = base_list[i] 958 else: 959 slice_vals[i] = item 960 elif isinstance(item, list): 961 if len(item) == 0: 962 slice_vals[i] = self.item 963 else: 964 slice_vals[i] = np.array(item) 965 else: 966 slice_vals[i] = np.array([item]) 967 968 sliced_data = self.data 969 for name, slice_val in zip(name_list, slice_vals): 970 sliced_data = sliced_data[sliced_data[name].isin(slice_val)] 971 972 if not median_flag: 973 return sliced_data 974 elif median_flag: 975 median_slice_data = pd.DataFrame() 976 remaining_vars = [var not in reduce_axes for var in name_list[:3]] 977 remaining_vars.extend([True, True, True]) 978 if np.all(remaining_vars): 979 median_slice_data = sliced_data 980 nans = pd.isna(median_slice_data['mean_rms']) 981 near_nans = median_slice_data['mean_rms'] > gross_err_bnd 982 median_slice_data['gross_count'] = np.zeros(median_slice_data.shape[0]) 983 median_slice_data['gross_frac'] = np.zeros(median_slice_data.shape[0]) 984 median_slice_data.loc[nans | near_nans, 'gross_count'] = 1.0 985 median_slice_data.loc[nans | near_nans, 'gross_frac'] = 1.0 986 else: 987 total_vars = 1 988 for slice_val in slice_vals[remaining_vars]: 989 total_vars *= slice_val.size 990 for vars_set in product(*slice_vals[remaining_vars]): 991 row_dict = {} 992 reduced_sliced_data = sliced_data 993 for var, name in zip(vars_set, name_list[remaining_vars]): 994 row_dict[name] = var 995 reduced_sliced_data = reduced_sliced_data[reduced_sliced_data[name] == var] 996 if reduced_sliced_data.size == 0: 997 for name in data_names: 998 if 'variance' in name: 999 row_dict[name] = 0. 1000 elif 'valid_time' not in name: 1001 row_dict[name] = 1e10 1002 row_dict['valid_time'] = 0. 1003 row_dict['gross_count'] = 0. 1004 row_dict['gross_frac'] = 1.0 1005 row_dict['data_present'] = False 1006 else: 1007 nans = pd.isna(reduced_sliced_data['mean_rms']) 1008 near_nans = reduced_sliced_data['mean_rms'] > gross_err_bnd 1009 nan_count = reduced_sliced_data[nans | near_nans].shape[0] 1010 nan_frac = nan_count / reduced_sliced_data.shape[0] 1011 valid_times = np.array([]) 1012 for name in data_names: 1013 if 'variance' in name: 1014 row_dict[name] = reduce_fun(reduced_sliced_data.loc[~nans, name]) 1015 elif 'valid_time' not in name: 1016 reduced_sliced_data.loc[nans, name] = 1e10 1017 row_dict[name] = reduce_fun(reduced_sliced_data[name]) 1018 else: 1019 reduced_sliced_data.loc[pd.isna(reduced_sliced_data[name]), name] = 0.0 1020 valid_times = np.append(valid_times, reduced_sliced_data[name].to_numpy()) 1021 row_dict['valid_time'] = reduce_fun(pd.DataFrame(valid_times)).to_numpy()[0] 1022 row_dict['gross_count'] = nan_count 1023 row_dict['gross_frac'] = nan_frac 1024 row_dict['data_present'] = True 1025 for key in row_dict.keys(): 1026 if not isinstance(row_dict[key], list) and not isinstance(row_dict[key], np.ndarray): 1027 row_dict[key] = [row_dict[key]] 1028 median_slice_data = pd.concat([median_slice_data, pd.DataFrame(row_dict)], ignore_index=True, 1029 verify_integrity=True) 1030 if len(metric) == 0: 1031 return median_slice_data 1032 else: 1033 best_median_slice_data = pd.DataFrame() 1034 remaining_vars = [var not in reduce_axes for var in name_list] 1035 total_vars = 1 1036 for slice_val in slice_vals[remaining_vars]: 1037 total_vars *= slice_val.size 1038 for vars_set in product(*slice_vals[remaining_vars]): 1039 row_dict = {} 1040 best_reduced_slice_data = median_slice_data 1041 for var, name in zip(vars_set, name_list[remaining_vars]): 1042 row_dict[name] = var 1043 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data[name] == var] 1044 if metric == 'valid_time': 1045 best_reduced_slice_data = best_reduced_slice_data[ 1046 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].max()] 1047 elif metric == 'gross_frac': 1048 best_reduced_slice_data = best_reduced_slice_data[ 1049 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()] 1050 if len(best_reduced_slice_data.shape) != 1: 1051 if gross_frac_metric == 'valid_time': 1052 best_reduced_slice_data = best_reduced_slice_data[ 1053 best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[ 1054 gross_frac_metric].max()] 1055 else: 1056 best_reduced_slice_data = best_reduced_slice_data[ 1057 best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[ 1058 gross_frac_metric].min()] 1059 else: 1060 best_reduced_slice_data = best_reduced_slice_data[ 1061 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()] 1062 if len(best_reduced_slice_data.shape) != 1: 1063 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['noise'] == 1064 best_reduced_slice_data['noise'].min()] 1065 if len(best_reduced_slice_data.shape) != 1: 1066 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['reg'] == 1067 best_reduced_slice_data['reg'].min()] 1068 for key in best_reduced_slice_data.keys(): 1069 if not isinstance(best_reduced_slice_data[key], list) and not isinstance( 1070 best_reduced_slice_data[key], np.ndarray): 1071 best_reduced_slice_data[key] = [best_reduced_slice_data[key]] 1072 best_median_slice_data = pd.concat([best_median_slice_data, best_reduced_slice_data], 1073 ignore_index=True, verify_integrity=True) 1074 return best_median_slice_data
Class for loading and storing prediction analysis data generated from reservoir computer tests.
875 def __init__(self, run_opts): 876 """Loads the prediction analysis data from a compressed pandas data file. 877 Args: 878 run_opts: RunOpts object containing the run parameters.""" 879 self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name 880 print('Starding data read...') 881 tic = time.perf_counter() 882 # print(self.data_filename) 883 if os.name == 'nt' and len(self.data_filename) >= 260: 884 self.data_filename = get_windows_path(self.data_filename) 885 self.data = pd.read_csv(self.data_filename, index_col=0) 886 toc = time.perf_counter() 887 print('Data reading finished in %0.2f sec.' % (toc - tic)) 888 self.res = pd.unique(self.data['res']) 889 self.train = pd.unique(self.data['train']) 890 self.test = pd.unique(self.data['test']) 891 self.noise = pd.unique(self.data['noise']) 892 self.reg = pd.unique(self.data['reg']) 893 self.reg_train = pd.unique(self.data['reg_train']) 894 self.nan = pd.isna(self.data['variance'])
Loads the prediction analysis data from a compressed pandas data file.
Args
- run_opts: RunOpts object containing the run parameters.
896 def shape(self): 897 """Returns the shape of the pandas data frame""" 898 return self.data.shape
Returns the shape of the pandas data frame
904 def data_slice(self, res=np.array([]), train=np.array([]), test=np.array([]), 905 reg_train=np.array([]), noise=np.array([]), reg=np.array([]), median_flag=False, 906 reduce_axes=[], metric='', gross_frac_metric='valid_time', gross_err_bnd=1e2, 907 reduce_fun=pd.DataFrame.median): 908 """Slices and/or finds the best results using a metric computed by the reduce_fun. 909 Args: 910 self: ResPreds object 911 res: Indices for reservoir results to be returned/optimized over. If left as an empty array, uses all indices. 912 train: Indices for training data set results to be returned/optimized over. If left as an empty array, uses all indices. 913 test: Indices for testing data set results to be returned/optimized over. If left as an empty array, uses all indices. 914 reg_train: Number of training data points for regularization to be returned/optimized over. If left as an empty array, uses all values. 915 noise: Noise/Jacobian/LMNT regularization parameter value results to be returned/optimized over. If left as an empty array, uses all values. 916 reg: Tikhonov regularization paramter value results to be returned/optimized over. If left as an empty array, uses all values. 917 median_flag: Boolean indicating whether the data should be optimized. 918 reduce_axes: List containing the axes to be optimized over. Elements can be 'res', 'train', 'test', 'reg_train', 'noise', or 'reg'. 919 metric: Metric to be used to compute which parameters give the best result. Options are: 920 'gross_frac': Lowest fraction of gross error 921 'mean_rms': Lowest mean map error. 922 'max_rms: Lowest maximum map error. 923 'valid_time': Highest valid prediction time.' 924 gross_frac_metrix: If using 'gross_frac' as a metric, this is the secondary metric that will be used if multiple parameters give equally good prediction results. 925 gross_err_bnd: The cutoff in the mean map error above which predictions are considered to have gross error. 926 reduce_fun: Function for computing the overall performance for a given set of parameters over many tests. 927 Returns: 928 Pandas DataFrame containing the sliced and optimized prediction results. 929 Raises: 930 ValueError: If any of the inputs are not recognized/incompatible.""" 931 input_list = [res, train, test, reg_train, noise, reg] 932 name_list = np.array(['res', 'train', 'test', 'reg_train', 'noise', 'reg']) 933 data_names = [name for name in self.data.columns if name not in name_list] 934 base_list = [self.res, self.train, self.test, self.reg_train, self.noise, self.reg] 935 slice_vals = np.zeros(len(input_list), dtype=object) 936 if median_flag: 937 if not isinstance(reduce_axes, list): 938 print('reduce_axes must be a list.') 939 return ValueError 940 elif len(reduce_axes) == 0: 941 print('median_flag is True, but no axes to compute the median over are specified.') 942 raise ValueError 943 elif not all(axis in ['res', 'train', 'test', 'noise', 'reg','reg_train'] for axis in reduce_axes) or \ 944 len(reduce_axes) != len(set(reduce_axes)): 945 print('reduce_axis max only contain res, train, test, noise, or reg.') 946 raise ValueError 947 elif metric not in ['', 'mean_rms', 'max_rms', 'valid_time', 'pmap_max_wass_dist', 'gross_frac']: 948 print('Metric not recognized.') 949 raise ValueError 950 elif len(metric) == 0 and any(axis in ['noise', 'reg'] for axis in reduce_axes): 951 print('Cannot reduce axes with unspecified metric.') 952 raise ValueError 953 954 for i, item in enumerate(input_list): 955 if isinstance(item, np.ndarray): 956 if item.size == 0: 957 slice_vals[i] = base_list[i] 958 else: 959 slice_vals[i] = item 960 elif isinstance(item, list): 961 if len(item) == 0: 962 slice_vals[i] = self.item 963 else: 964 slice_vals[i] = np.array(item) 965 else: 966 slice_vals[i] = np.array([item]) 967 968 sliced_data = self.data 969 for name, slice_val in zip(name_list, slice_vals): 970 sliced_data = sliced_data[sliced_data[name].isin(slice_val)] 971 972 if not median_flag: 973 return sliced_data 974 elif median_flag: 975 median_slice_data = pd.DataFrame() 976 remaining_vars = [var not in reduce_axes for var in name_list[:3]] 977 remaining_vars.extend([True, True, True]) 978 if np.all(remaining_vars): 979 median_slice_data = sliced_data 980 nans = pd.isna(median_slice_data['mean_rms']) 981 near_nans = median_slice_data['mean_rms'] > gross_err_bnd 982 median_slice_data['gross_count'] = np.zeros(median_slice_data.shape[0]) 983 median_slice_data['gross_frac'] = np.zeros(median_slice_data.shape[0]) 984 median_slice_data.loc[nans | near_nans, 'gross_count'] = 1.0 985 median_slice_data.loc[nans | near_nans, 'gross_frac'] = 1.0 986 else: 987 total_vars = 1 988 for slice_val in slice_vals[remaining_vars]: 989 total_vars *= slice_val.size 990 for vars_set in product(*slice_vals[remaining_vars]): 991 row_dict = {} 992 reduced_sliced_data = sliced_data 993 for var, name in zip(vars_set, name_list[remaining_vars]): 994 row_dict[name] = var 995 reduced_sliced_data = reduced_sliced_data[reduced_sliced_data[name] == var] 996 if reduced_sliced_data.size == 0: 997 for name in data_names: 998 if 'variance' in name: 999 row_dict[name] = 0. 1000 elif 'valid_time' not in name: 1001 row_dict[name] = 1e10 1002 row_dict['valid_time'] = 0. 1003 row_dict['gross_count'] = 0. 1004 row_dict['gross_frac'] = 1.0 1005 row_dict['data_present'] = False 1006 else: 1007 nans = pd.isna(reduced_sliced_data['mean_rms']) 1008 near_nans = reduced_sliced_data['mean_rms'] > gross_err_bnd 1009 nan_count = reduced_sliced_data[nans | near_nans].shape[0] 1010 nan_frac = nan_count / reduced_sliced_data.shape[0] 1011 valid_times = np.array([]) 1012 for name in data_names: 1013 if 'variance' in name: 1014 row_dict[name] = reduce_fun(reduced_sliced_data.loc[~nans, name]) 1015 elif 'valid_time' not in name: 1016 reduced_sliced_data.loc[nans, name] = 1e10 1017 row_dict[name] = reduce_fun(reduced_sliced_data[name]) 1018 else: 1019 reduced_sliced_data.loc[pd.isna(reduced_sliced_data[name]), name] = 0.0 1020 valid_times = np.append(valid_times, reduced_sliced_data[name].to_numpy()) 1021 row_dict['valid_time'] = reduce_fun(pd.DataFrame(valid_times)).to_numpy()[0] 1022 row_dict['gross_count'] = nan_count 1023 row_dict['gross_frac'] = nan_frac 1024 row_dict['data_present'] = True 1025 for key in row_dict.keys(): 1026 if not isinstance(row_dict[key], list) and not isinstance(row_dict[key], np.ndarray): 1027 row_dict[key] = [row_dict[key]] 1028 median_slice_data = pd.concat([median_slice_data, pd.DataFrame(row_dict)], ignore_index=True, 1029 verify_integrity=True) 1030 if len(metric) == 0: 1031 return median_slice_data 1032 else: 1033 best_median_slice_data = pd.DataFrame() 1034 remaining_vars = [var not in reduce_axes for var in name_list] 1035 total_vars = 1 1036 for slice_val in slice_vals[remaining_vars]: 1037 total_vars *= slice_val.size 1038 for vars_set in product(*slice_vals[remaining_vars]): 1039 row_dict = {} 1040 best_reduced_slice_data = median_slice_data 1041 for var, name in zip(vars_set, name_list[remaining_vars]): 1042 row_dict[name] = var 1043 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data[name] == var] 1044 if metric == 'valid_time': 1045 best_reduced_slice_data = best_reduced_slice_data[ 1046 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].max()] 1047 elif metric == 'gross_frac': 1048 best_reduced_slice_data = best_reduced_slice_data[ 1049 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()] 1050 if len(best_reduced_slice_data.shape) != 1: 1051 if gross_frac_metric == 'valid_time': 1052 best_reduced_slice_data = best_reduced_slice_data[ 1053 best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[ 1054 gross_frac_metric].max()] 1055 else: 1056 best_reduced_slice_data = best_reduced_slice_data[ 1057 best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[ 1058 gross_frac_metric].min()] 1059 else: 1060 best_reduced_slice_data = best_reduced_slice_data[ 1061 best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()] 1062 if len(best_reduced_slice_data.shape) != 1: 1063 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['noise'] == 1064 best_reduced_slice_data['noise'].min()] 1065 if len(best_reduced_slice_data.shape) != 1: 1066 best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['reg'] == 1067 best_reduced_slice_data['reg'].min()] 1068 for key in best_reduced_slice_data.keys(): 1069 if not isinstance(best_reduced_slice_data[key], list) and not isinstance( 1070 best_reduced_slice_data[key], np.ndarray): 1071 best_reduced_slice_data[key] = [best_reduced_slice_data[key]] 1072 best_median_slice_data = pd.concat([best_median_slice_data, best_reduced_slice_data], 1073 ignore_index=True, verify_integrity=True) 1074 return best_median_slice_data
Slices and/or finds the best results using a metric computed by the reduce_fun.
Args
- self: ResPreds object
- res: Indices for reservoir results to be returned/optimized over. If left as an empty array, uses all indices.
- train: Indices for training data set results to be returned/optimized over. If left as an empty array, uses all indices.
- test: Indices for testing data set results to be returned/optimized over. If left as an empty array, uses all indices.
- reg_train: Number of training data points for regularization to be returned/optimized over. If left as an empty array, uses all values.
- noise: Noise/Jacobian/LMNT regularization parameter value results to be returned/optimized over. If left as an empty array, uses all values.
- reg: Tikhonov regularization paramter value results to be returned/optimized over. If left as an empty array, uses all values.
- median_flag: Boolean indicating whether the data should be optimized.
- reduce_axes: List containing the axes to be optimized over. Elements can be 'res', 'train', 'test', 'reg_train', 'noise', or 'reg'.
- metric: Metric to be used to compute which parameters give the best result. Options are: 'gross_frac': Lowest fraction of gross error 'mean_rms': Lowest mean map error. 'max_rms: Lowest maximum map error. 'valid_time': Highest valid prediction time.'
- gross_frac_metrix: If using 'gross_frac' as a metric, this is the secondary metric that will be used if multiple parameters give equally good prediction results.
- gross_err_bnd: The cutoff in the mean map error above which predictions are considered to have gross error.
- reduce_fun: Function for computing the overall performance for a given set of parameters over many tests.
Returns
Pandas DataFrame containing the sliced and optimized prediction results.
Raises
- ValueError: If any of the inputs are not recognized/incompatible.