res_reg_lmnt_awikner.classes

   1import pandas as pd
   2from tqdm import tqdm
   3import numpy as np
   4import time
   5from itertools import product
   6import sys
   7import os
   8import getopt
   9from res_reg_lmnt_awikner.helpers import get_windows_path, get_filename
  10from res_reg_lmnt_awikner.lorenzrungekutta_numba import lorenzrungekutta
  11from res_reg_lmnt_awikner.ks_etdrk4 import kursiv_predict
  12from scipy.sparse.linalg import eigs
  13from scipy.sparse import csc_matrix
  14
  15__docformat__ = "google"
  16
  17
  18class RunOpts:
  19    """This class contains the various parameters and options that will be used during the time series data generation,
  20    reservoir generation, training, testing, and output."""
  21    def __init__(self, argv=None,
  22                 runflag=True,
  23                 train_time=20000,
  24                 test_time=None,
  25                 sync_time=2000,
  26                 discard_time=500,
  27                 res_size=500,
  28                 res_per_test=1,
  29                 noise_realizations=1,
  30                 num_tests=1,
  31                 num_trains=1,
  32                 traintype='normal',
  33                 noisetype='gaussian',
  34                 system='KS',
  35                 savepred=False,
  36                 save_time_rms=False,
  37                 squarenodes=True,
  38                 rho=0.6,
  39                 sigma=0.1,
  40                 theta=0.1,
  41                 leakage=1.0,
  42                 bias_type='new_random',
  43                 win_type='full_0centered',
  44                 debug_mode=False,
  45                 pmap=False,
  46                 machine='deepthought2',
  47                 ifray=False,
  48                 tau=None,
  49                 num_cpus=1,
  50                 metric='mean_rms',
  51                 return_all=True,
  52                 save_eigenvals=False,
  53                 max_valid_time=2000,
  54                 noise_values_array=np.logspace(-4, 3, num=3, base=10),
  55                 reg_values=np.append(0., np.logspace(-11, -9, 5)),
  56                 res_start=0,
  57                 train_start=0,
  58                 test_start=0,
  59                 reg_train_times=None,
  60                 root_folder=None,
  61                 prior='zero',
  62                 save_truth=False):
  63        """Initializes the RunOpts class.
  64
  65        Raises:
  66            ValueError: If an input instance variable has an unaccepted value.
  67            TypeError: If an input instance variable has an incorrect type."""
  68        self.argv = argv
  69        """Command line input for generating a RunOpts object. If left as None, then the object will be generated using
  70            the other inputs or defaults. If multiple instances of the same variable are given, the class will default
  71            to the command line input. See RunOpts.get_run_opts for more information. Default: None"""
  72        self.system = system
  73        """String denoting which dynamical system we are obtaining time series data from. Options: 'lorenz' for the 
  74        Lorenz 63 equations, 'KS' for the Kuramoto-Sivashinsky equation with 64 grid points and a periodicity length of
  75        22, and KS_d2175 for the Kuramoto-Sivashinksy equation with 64 grid points and a periodicity length of 21.75.
  76        Default: 'KS'"""
  77        self.tau = tau
  78        """Time between each data point in the time series data. If system = 'lorenz', this value must be evenly divided
  79        by 0.01 (the integration time step). If left as None, then the class will set tau to the default value for the
  80        particular dynamical system. Default: None"""
  81        self.train_time = train_time
  82        """Number of training data points. Default: 20000"""
  83        self.test_time = test_time
  84        """Number of testing data points. If left as None, then the class will default to 4000 if system = 'lorenz', or 
  85        16000 if system = 'KS' or 'KS_d2175'. Default: None"""
  86        self.sync_time = sync_time
  87        """Number of data points used to synchronize the reservoir to each test data set. Default: 2000"""
  88        self.discard_time = discard_time
  89        """Number of data points used to synchronize the reservoir to each training data set. Default: 500"""
  90        self.res_size = res_size
  91        """Number of nodes in the reservoir. Default: 500"""
  92        self.rho = rho
  93        """Reservoir spectral radius. Default: 0.6"""
  94        self.sigma = sigma
  95        """Reservoir input scaling. Default: 0.1"""
  96        self.theta = theta
  97        """Reservoir input bias scaling. Default: 0.1"""
  98        self.leakage = leakage
  99        """Reservoir leaking rate. Default: 1.0"""
 100        self.squarenodes = squarenodes
 101        """Boolean denoting whether or not the squared node states are including in the reservoir feature vector.
 102        Default: True"""
 103        self.bias_type = bias_type
 104        """Type of reservoir input bias to be used. See the Reservoir class for available options. 
 105        Default: new_random"""
 106        self.win_type = win_type
 107        """Type of input coupling matrix to be used. See the Reservoir class for available options. 
 108        Default: full_0centered"""
 109        self.traintype = traintype
 110        """Type of training to be used to determine the reservoir output coupling matrix. There are a number of options
 111        available, but those used in the paper are:
 112
 113        'normal' - Standard reservoir training, potentially with input noise added.
 114
 115        'gradientk%d' % (Number of noise steps) - Reservoir training with no noise and LMNT regularization for a number of 
 116        noise steps > 1, or Jacobian regularization for a number of noise steps = 1.
 117
 118        'regzerok%d' % (Number of noise steps) - Reservoir training with no noise and LMNT/Jacobian regularization computed
 119        using a zero-input and zero reservoir state.
 120        
 121        Default: 'normal'
 122        """
 123        self.noisetype = noisetype
 124        """Type of noise to be added to the reservoir input during training. Options are 'none' and 'gaussian'.
 125        Default: 'none'"""
 126        self.noise_values_array = noise_values_array
 127        """Numpy array containing the variance of the added input noise (if noisetype = 'gaussian') or the LMNT/Jacobian
 128        regularization parameter value (if traintype = 'gradientk%d' or 'regzerok%d'). Each value contained in the array
 129        will be tested separately using each of the reservoirs, training, and testing data sets.
 130        Default: np.logspace(-4, 3, num=3, base=10)"""
 131        self.reg_train_times = reg_train_times
 132        """Numpy array containing the number of training data points to be used to train the LMNT or Jacobian 
 133        regularization. If left as None, then the class will default to an array containing only
 134        the total number of training data points for standard LMNT/Jacobian or the number of noise steps 
 135        if using zero-input LMNT. Default: None"""
 136        self.noise_realizations = noise_realizations
 137        """Number of input noise realizations used to train the reservoir (if training with noise). If not training with
 138        noise, set to 1. Default: 1"""
 139        self.reg_values = reg_values
 140        """Numpy array containing the Tikhonov regularization parameter values. Each value contained in the array
 141        will be tested separately using each of the reservoirs, training, and testing data sets.
 142        Default: np.append(0., np.logspace(-11, -9, 5))"""
 143        self.prior = prior
 144        """Prior to be used when computing the output coupling matrix using Tikhonov regularization. Options are:
 145
 146        'zero' - Standard Tikhonov regularization with a zero prior.
 147
 148        'input_pass' - Tikhonov regularization with a persistence prior (i.e., set the input pass-through weights to 1).
 149        
 150        Default: 'zero'"""
 151        self.max_valid_time = max_valid_time
 152        """Maximum valid time for each valid time test during the testing period. This should be set so that
 153        test_time / max_valid_time is a whole number greater than 0. Default: 2000"""
 154        self.res_per_test = res_per_test
 155        """Number of random reservoir realizations to test. Default: 1"""
 156        self.num_trains = num_trains
 157        """Number of independently generated training data sets to test with. Default: 1"""
 158        self.num_tests = num_tests
 159        """Number of independently generated testing data sets to test with. Default: 1"""
 160        self.res_start = res_start
 161        """Starting iterate for generating the random seeds that are used to generate the reservoir. Default: 0"""
 162        self.train_start = train_start
 163        """Starting iterate for generating the random seeds that are used to generate the training data sets.
 164        Default: 0"""
 165        self.test_start = test_start
 166        """Starting iterate for generating the random seeds that are used to generate the testing data sets.
 167        Default: 0"""
 168        self.root_folder = root_folder
 169        """Location where output data will be stored in the Data folder. If None, then defaults to the current working 
 170        directory. Default: None"""
 171        self.return_all = return_all
 172        """Boolean for determining of all results should be returned, or only the results with the obtained using the
 173        "best" Tikhonov regularization parameter value based on the selected metric. Default: True"""
 174        self.metric = metric
 175        """Metric for determining the "best" results. Not used if return_all = True. Options include 'mean_rms', 'max_rms',
 176        and 'stable_frac'. Caution: Some options may be deprecated. Default: 'mss-var'"""
 177        self.savepred = savepred
 178        """Boolean for determining if reservoir prediction time series should be saved. Default: False"""
 179        self.save_time_rms = save_time_rms
 180        """Boolean for determining if reservoir prediction RMS error should be saved. Default: False"""
 181        self.pmap = pmap
 182        """Boolean for determining if reservoir prediction Poincare maximum map should be saved. Default: False"""
 183        self.save_eigenvals = save_eigenvals
 184        """Boolean for determining if the eigenvalues of the LMNT/Jacobian regularization matrices should be saved.
 185        Default: False"""
 186        self.save_truth = save_truth
 187        """Boolean for determining if the true testing data should be saved. Default: False"""
 188        self.ifray = ifray
 189        """Boolean for determining if ray should be used to compute results for multiple reservoirs and training 
 190        data sets. Default: False"""
 191        self.num_cpus = num_cpus
 192        """If using ray for paralellization, this sets the number of cpus to be used. Default: 1"""
 193        self.machine = machine
 194        """Machine which results are computed on. Leave as personal unless you are connecting to a ray cluster 
 195        elsewhere. Default: 'personal'"""
 196        self.runflag = runflag
 197        """True indicates that we are about to compute results, and the appropriate directories should be created.
 198        Otherwise, we do not create additional directories. Default: True"""
 199        self.debug_mode = debug_mode
 200        """Boolean for determining if errors during reservoir training which could arise from non-convergence of the 
 201        eigenvalue solver should be suppressed. If left as False, will suppress errors im much of the core code,
 202        so this should be set to True if making changes. Default: False"""
 203        if not isinstance(argv, type(None)):
 204            self.get_run_opts(argv)
 205        if isinstance(self.tau, type(None)):
 206            if self.system == 'lorenz':
 207                self.tau = 0.1
 208            elif self.system in ['KS', 'KS_d2175']:
 209                self.tau = 0.25
 210        if isinstance(self.test_time, type(None)):
 211            if self.system == 'lorenz':
 212                self.test_time = 4000
 213            elif 'KS' in self.system:
 214                self.test_time = 16000
 215        if not isinstance(self.reg_train_times, np.ndarray):
 216            if isinstance(self.reg_train_times, type(None)):
 217                self.reg_train_times = np.array([self.train_time])
 218            elif isinstance(self.reg_train_times, int):
 219                self.reg_train_times = np.array([self.reg_train_times])
 220            else:
 221                raise TypeError()
 222        if isinstance(self.root_folder, type(None)):
 223            self.root_folder = os.getcwd()
 224        if isinstance(self.reg_train_times, np.ndarray) or isinstance(self.reg_train_times, list):
 225            if (self.reg_train_times[0] != self.train_time or len(self.reg_train_times) != 1) and (
 226                    self.traintype in ['normal', 'normalres1', 'normalres2', 'rmean', 'rmeanres1',
 227                                       'rmeanres2', 'rplusq', 'rplusqres1',
 228                                       'rplusqres2'] or 'confined' in self.traintype):
 229                print(('Traintypes "normal", "rmean", and "rplusq" are not '
 230                       'compatible with fractional regularization training.'))
 231                raise ValueError
 232        if self.prior not in ['zero', 'input_pass']:
 233            print('Prior type not recognized.')
 234            raise ValueError
 235        self.run_file_name = ''
 236        self.run_folder_name = ''
 237        self.get_file_name()
 238
 239    def get_file_name(self):
 240        """Creates the folder and final data file name for the tests about to be run.
 241        Args:
 242            self: RunOpts object.
 243        Returns:
 244            RunOpts object with initialized folder and file name variables. Also creates the aforementioned folder
 245            if not already created.
 246        """
 247        if self.prior == 'zero':
 248            prior_str = ''
 249        else:
 250            prior_str = '_prior_%s' % self.prior
 251        if self.savepred:
 252            predflag = '_wpred'
 253        else:
 254            predflag = ''
 255        if self.save_time_rms:
 256            timeflag = '_savetime'
 257        else:
 258            timeflag = ''
 259        if self.squarenodes:
 260            squarenodes_flag = '_squarenodes'
 261        else:
 262            squarenodes_flag = ''
 263        if self.save_eigenvals:
 264            eigenval_flag = '_wmoregradeigs'
 265        else:
 266            eigenval_flag = ''
 267        if self.pmap:
 268            pmap_flag = '_wpmap0'
 269        else:
 270            pmap_flag = ''
 271        data_folder_base = os.path.join(self.root_folder, 'Data')
 272        if not os.path.isdir(data_folder_base):
 273            os.mkdir(data_folder_base)
 274
 275        if not self.return_all:
 276            data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % (
 277                self.system, self.noisetype, self.traintype))
 278            run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f'
 279                        '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s_metric_%s') \
 280                       % (self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho,
 281                          self.sigma, self.leakage, self.win_type, self.bias_type, self.tau, self.res_size,
 282                          self.train_time, self.noise_realizations, self.noisetype, self.traintype, prior_str,
 283                          self.metric)
 284        else:
 285            data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % (
 286                self.system, self.noisetype, self.traintype))
 287            run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f'
 288                        '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s') % (
 289                           self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho,
 290                           self.sigma,
 291                           self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, self.train_time,
 292                           self.noise_realizations, self.noisetype, self.traintype, prior_str)
 293
 294        if self.runflag:
 295            if not os.path.isdir(data_folder):
 296                os.mkdir(data_folder)
 297            if not os.path.isdir(os.path.join(data_folder, run_name + '_folder')):
 298                os.mkdir(os.path.join(data_folder, run_name + '_folder'))
 299        self.run_file_name = os.path.join(data_folder, run_name + '.bz2')
 300        self.run_folder_name = os.path.join(data_folder, run_name + '_folder')
 301
 302    def get_run_opts(self):
 303        """Processes the command line input into instance variables.
 304        Args:
 305            self: RunOpts object.
 306        Returns:
 307            RunOpts object with instance variables set from command line input.
 308        Raises:
 309            GetoptError: Raises an error of command line arguments no recognized."""
 310        try:
 311            opts, args = getopt.getopt(self.argv, "T:N:r:",
 312                                       ['testtime=', 'noisetype=', 'traintype=', 'system=', 'res=',
 313                                        'tests=', 'trains=', 'savepred=', 'tau=', 'rho=',
 314                                        'sigma=', 'theta=','leakage=', 'bias_type=', 'debug=', 'win_type=',
 315                                        'machine=', 'num_cpus=', 'pmap=', 'parallel=', 'metric=', 'returnall=',
 316                                        'savetime=', 'saveeigenvals=', 'noisevals=', 'regvals=', 'maxvt=',
 317                                        'resstart=', 'trainstart=', 'teststart=',
 318                                        'squarenodes=', 'regtraintimes=', 'discardlen=',
 319                                        'prior=', 'synctime=', 'datarootdir=', 'savetruth='])
 320        except getopt.GetoptError:
 321            print('Error: Some options not recognized')
 322            sys.exit(2)
 323        for opt, arg in opts:
 324            if opt == '-T':
 325                self.train_time = int(arg)
 326                print('Training iterations: %d' % self.train_time)
 327            elif opt == '-N':
 328                self.res_size = int(arg)
 329                print('Reservoir nodes: %d' % self.res_size)
 330            elif opt == '-r':
 331                self.noise_realizations = int(arg)
 332                print('Noise Realizations: %d' % self.noise_realizations)
 333            elif opt == '--savetruth':
 334                if arg == 'True':
 335                    self.save_truth = True
 336                elif arg == 'False':
 337                    self.save_truth = False
 338                else:
 339                    raise ValueError
 340            elif opt == '--datarootdir':
 341                self.root_folder = str(arg)
 342                print('Root directory for data: %s' % self.root_folder)
 343            elif opt == '--synctime':
 344                self.sync_time = int(arg)
 345                print('Sync time: %d' % self.sync_time)
 346            elif opt == '--testtime':
 347                self.test_time = int(arg)
 348                if self.test_time == 0:
 349                    print('Testing duration: default')
 350                else:
 351                    print('Testing duration: %d' % self.test_time)
 352            elif opt == '--resstart':
 353                self.res_start = int(arg)
 354                print('Reservoir ensemble start: %d' % self.res_start)
 355            elif opt == '--trainstart':
 356                self.train_start = int(arg)
 357                print('Train ensemble start: %d' % self.train_start)
 358            elif opt == '--teststart':
 359                self.test_start = int(arg)
 360                print('Test ensemble start: %d' % self.test_start)
 361            elif opt == '--saveeigenvals':
 362                if arg == 'True':
 363                    self.save_eigenvals = True
 364                elif arg == 'False':
 365                    self.save_eigenvals = False
 366                else:
 367                    raise ValueError
 368                print('Save grad reg eigenvalues: %s' % arg)
 369            elif opt == '--prior':
 370                self.prior = str(arg)
 371                print('Prior type: %s' % self.prior)
 372            elif opt == '--discardlen':
 373                self.discard_time = int(arg)
 374                print('Discard iterations: %d' % self.discard_time)
 375            elif opt == '--squarenodes':
 376                if arg == 'True':
 377                    self.squarenodes = True
 378                elif arg == 'False':
 379                    self.squarenodes = False
 380                else:
 381                    raise ValueError
 382                print('Square reservoir nodes: %s' % arg)
 383            elif opt == '--maxvt':
 384                self.max_valid_time = int(arg)
 385                print('Maximum valid time: %d' % self.max_valid_time)
 386            elif opt == '--noisevals':
 387                self.noise_values_array = np.array([float(noise) for noise in arg.split(',')])
 388                noise_str = '[ '
 389                for noise in self.noise_values_array:
 390                    noise_str += '%0.3e, ' % noise
 391                noise_str = noise_str[:-2] + ' ]'
 392                print('Noise values: %s' % noise_str)
 393            elif opt == '--regvals':
 394                self.reg_values = np.array([float(reg) for reg in arg.split(',')])
 395                reg_str = '[ '
 396                for reg in self.reg_values:
 397                    reg_str += '%0.3e, ' % reg
 398                reg_str = reg_str[:-2] + ' ]'
 399                print('Regularization values: %s' % reg_str)
 400            elif opt == '--regtraintimes':
 401                if arg != 'None':
 402                    self.reg_train_times = np.array([int(reg_train) for reg_train in arg.split(',')])
 403                    reg_train_str = '[ '
 404                    for reg_train in self.reg_train_times:
 405                        reg_train_str += '%0.3e, ' % reg_train
 406                    reg_train_str = reg_train_str[:-2] + ' ]'
 407                    print('Regularization training times: %s' % reg_train_str)
 408            elif opt == '--savetime':
 409                if str(arg) == 'True':
 410                    self.save_time_rms = True
 411                elif str(arg) == 'False':
 412                    self.save_time_rms = False
 413                else:
 414                    raise ValueError
 415            elif opt == '--metric':
 416                self.metric = str(arg)
 417                if self.metric not in ['pmap_max_wass_dist', 'mean_rms', 'max_rms', 'mss_var']:
 418                    raise ValueError
 419                print('Stability metric: %s' % self.metric)
 420            elif opt == '--returnall':
 421                if arg == 'True':
 422                    self.return_all = True
 423                elif arg == 'False':
 424                    self.return_all = False
 425                else:
 426                    raise ValueError
 427            elif opt == '--parallel':
 428                parallel_in = str(arg)
 429                if parallel_in == 'True':
 430                    self.ifray = True
 431                elif parallel_in == 'False':
 432                    self.ifray = False
 433                else:
 434                    raise ValueError
 435            elif opt == '--pmap':
 436                pmap_in = str(arg)
 437                if pmap_in == 'True':
 438                    self.pmap = True
 439                elif pmap_in == 'False':
 440                    self.pmap = False
 441                else:
 442                    raise ValueError
 443            elif opt == '--machine':
 444                self.machine = str(arg)
 445                if self.machine not in ['deepthought2', 'personal']:
 446                    raise ValueError
 447                print('Machine: %s' % self.machine)
 448            elif opt == '--num_cpus':
 449                self.num_cpus = int(arg)
 450                print('Number of CPUS: %d' % self.num_cpus)
 451            elif opt == '--rho':
 452                self.rho = float(arg)
 453                print('Rho: %f' % self.rho)
 454            elif opt == '--sigma':
 455                self.sigma = float(arg)
 456                print('Sigma: %f' % self.sigma)
 457            elif opt == '--theta':
 458                self.theta = float(arg)
 459                print('Theta: %f' % self.theta)
 460            elif opt == '--leakage':
 461                self.leakage = float(arg)
 462                print('Leakage: %f' % self.leakage)
 463            elif opt == '--tau':
 464                self.tau = float(arg)
 465                print('Reservoir timestep: %f' % self.tau)
 466            elif opt == '--win_type':
 467                self.win_type = str(arg)
 468                print('Win Type: %s' % self.win_type)
 469            elif opt == '--bias_type':
 470                self.bias_type = str(arg)
 471                print('Bias Type: %s' % self.bias_type)
 472            elif opt == '--res':
 473                self.res_per_test = int(arg)
 474                print('Number of reservoirs: %d' % self.res_per_test)
 475            elif opt == '--tests':
 476                self.num_tests = int(arg)
 477                print('Number of tests: %d' % self.num_tests)
 478            elif opt == '--trains':
 479                self.num_trains = int(arg)
 480                print('Number of training data sequences: %d' % self.num_trains)
 481            elif opt == '--savepred':
 482                if arg == 'True':
 483                    self.savepred = True
 484                elif arg == 'False':
 485                    self.savepred = False
 486                print('Saving predictions: %s' % arg)
 487            elif opt == '--noisetype':
 488                self.noisetype = str(arg)
 489                print('Noise type: %s' % self.noisetype)
 490            elif opt == '--traintype':
 491                self.traintype = str(arg)
 492                print('Training type: %s' % self.traintype)
 493            elif opt == '--system':
 494                self.system = str(arg)
 495                print('System: %s' % self.system)
 496            elif opt == '--debug':
 497                if arg == 'True':
 498                    self.debug_mode = True
 499                elif arg == 'False':
 500                    self.debug_mode = False
 501                print('Debug Mode: %s' % arg)
 502
 503
 504class Reservoir:
 505    """Class for initializing and storing the reservoir matrices and internal states."""
 506    def __init__(self, run_opts, res_gen, res_itr, input_size, avg_degree=3):
 507        """Initializes the Reservoir object.
 508        Args:
 509            run_opts: RunOpts object containing parameters used to generate the reservoir.
 510            res_gen: A numpy.random.Generator object used to generate the random matrices in the Reservoir.
 511            res_itr: Reservoir iteration tag.
 512            input_size: Number of elements in reservoir input.
 513            avg_degree: Average in-degree of each reservoir node (i.e., the average number of edges that connect into each vertex in the graph). Default: 3
 514        Returns:
 515            Constructed Reservoir object.
 516        Raises:
 517            ValueError: If win_type or bias_type is not recognized."""
 518        # Define class for storing reservoir layers generated from input parameters and an input random number generator
 519        self.rsvr_size = run_opts.res_size
 520        self.res_itr = res_itr
 521
 522        density = avg_degree / self.rsvr_size
 523
 524        if run_opts.win_type == 'full_0centered':
 525            unnormalized_W = 2 * res_gen.random((self.rsvr_size, self.rsvr_size)) - 1
 526        else:
 527            unnormalized_W = res_gen.random((self.rsvr_size, self.rsvr_size))
 528        for i in range(unnormalized_W[:, 0].size):
 529            for j in range(unnormalized_W[0].size):
 530                if res_gen.random(1) > density:
 531                    unnormalized_W[i][j] = 0
 532
 533        max_eig = eigs(unnormalized_W, k=1,
 534                       return_eigenvectors=False, maxiter=10 ** 5, v0=res_gen.random(self.rsvr_size))
 535
 536        W_sp = csc_matrix(np.ascontiguousarray(
 537            run_opts.rho / np.abs(max_eig[0]) * unnormalized_W))
 538        self.W_data, self.W_indices, self.W_indptr, self.W_shape = \
 539            (W_sp.data, W_sp.indices, W_sp.indptr, np.array(list(W_sp.shape)))
 540        # print('Avg. degree of W:')
 541        # print(self.W_data.size/rsvr_size)
 542
 543        # print('Adjacency matrix section:')
 544        # print(self.W_data[:4])
 545
 546        if run_opts.win_type == 'dense':
 547            if run_opts.bias_type != 'new_random':
 548                raise ValueError
 549            Win = (res_gen.random(self.rsvr_size * (input_size + 1)).reshape(self.rsvr_size,
 550                                                                             input_size + 1) * 2 - 1) * run_opts.sigma
 551        else:
 552            if 'full' in run_opts.win_type:
 553                input_vars = np.arange(input_size)
 554            elif run_opts.win_type == 'x':
 555                input_vars = np.array([0])
 556            else:
 557                print('Win_type not recognized.')
 558                raise ValueError()
 559            if run_opts.bias_type == 'old':
 560                const_frac = 0.15
 561                const_conn = int(self.rsvr_size * const_frac)
 562                Win = np.zeros((self.rsvr_size, input_size + 1))
 563                Win[:const_conn, 0] = (res_gen.random(
 564                    Win[:const_conn, 0].size) * 2 - 1) * run_opts.theta
 565                q = int((self.rsvr_size - const_conn) // input_vars.size)
 566                for i, var in enumerate(input_vars):
 567                    Win[const_conn + q * i:const_conn + q *
 568                                           (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
 569            elif run_opts.bias_type == 'new_random':
 570                Win = np.zeros((self.rsvr_size, input_size + 1))
 571                Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta
 572                q = int(self.rsvr_size // input_vars.size)
 573                for i, var in enumerate(input_vars):
 574                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
 575                leftover_nodes = self.rsvr_size - q * input_vars.size
 576                for i in range(leftover_nodes):
 577                    Win[self.rsvr_size - leftover_nodes + i, input_vars[res_gen.integers(input_vars.size)]] = \
 578                        (res_gen.random() * 2 - 1) * run_opts.sigma
 579            elif run_opts.bias_type == 'new_new_random':
 580                Win = np.zeros((self.rsvr_size, input_size + 1))
 581                Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta
 582                q = int(self.rsvr_size // input_vars.size)
 583                for i, var in enumerate(input_vars):
 584                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
 585                leftover_nodes = self.rsvr_size - q * input_vars.size
 586                var = input_vars[res_gen.choice(
 587                    input_vars.size, size=leftover_nodes, replace=False)]
 588                Win[self.rsvr_size - leftover_nodes:, var +
 589                                                      1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma
 590            elif run_opts.bias_type == 'new_const':
 591                Win = np.zeros((self.rsvr_size, input_size + 1))
 592                Win[:, 0] = run_opts.theta
 593                q = int(self.rsvr_size // input_vars.size)
 594                for i, var in enumerate(input_vars):
 595                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
 596                leftover_nodes = self.rsvr_size - q * input_vars.size
 597                var = input_vars[res_gen.integers(
 598                    input_vars.size, size=leftover_nodes)]
 599                Win[self.rsvr_size - leftover_nodes:, var +
 600                                                      1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma
 601            else:
 602                print('bias_type not recognized.')
 603                raise ValueError()
 604
 605        Win_sp = csc_matrix(Win)
 606        self.Win_data, self.Win_indices, self.Win_indptr, self.Win_shape = \
 607            np.ascontiguousarray(Win_sp.data), \
 608            np.ascontiguousarray(Win_sp.indices), \
 609            np.ascontiguousarray(Win_sp.indptr), \
 610            np.array(list(Win_sp.shape))
 611
 612        self.X = (res_gen.random((self.rsvr_size, run_opts.train_time + run_opts.discard_time + 2)) * 2 - 1)
 613        self.Wout = np.array([])
 614        self.leakage = run_opts.leakage
 615
 616
 617class ResOutput:
 618    """Class for holding the output from a reservoir computer test. Is typically used to save the output from one of
 619    the (potentially parallel) runs."""
 620    def __init__(self, run_opts, noise_array):
 621        """Creates the ResOutput object.
 622        Args:
 623            self: ResOutput object
 624            run_opts: RunOpts object for the test.
 625            noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
 626        Returns:
 627            self: initialized ResOutput object."""
 628        self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 629                                        dtype=object)
 630        self.mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 631                                     dtype=object)
 632        self.max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 633                                    dtype=object)
 634        self.variances_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 635                                      dtype=object)
 636        self.valid_time_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 637                                       dtype=object)
 638        self.rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 639                                dtype=object)
 640        self.preds_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 641                                  dtype=object)
 642        self.wass_dist_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 643                                      dtype=object)
 644        self.pmap_max_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 645                                     dtype=object)
 646        self.pmap_max_wass_dist_out = np.zeros(
 647            (noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), dtype=object)
 648        self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 649                                        dtype=object)
 650        self.train_mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 651                                           dtype=object)
 652        self.train_max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 653                                          dtype=object)
 654        self.grad_eigenvals_out = np.zeros((noise_array.size, run_opts.reg_train_times.size), dtype=object)
 655        self.pred_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
 656                                 dtype=object)
 657
 658    def save(self, run_opts, noise_array, res_itr, train_seed, test_idxs):
 659        """Saves the data in the ResOutput object to a series of .csv files.
 660        Args:
 661            self: ResOutput object
 662            run_opts: RunOpts object for test.
 663            noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
 664            res_itr: Index for the reservoir iteration used.
 665            train_seed: Index for the training data iteration used.
 666            test_idxs: Indices for the testing data iterations used.
 667        Returns:
 668            Saves .csv files."""
 669        for (i, noise_val), (j, reg_train_time) in product(enumerate(noise_array), enumerate(run_opts.reg_train_times)):
 670            print((i, j))
 671            stable_frac = np.zeros(run_opts.reg_values.size)
 672            for k, array_elem in enumerate(self.stable_frac_out[i, :, j]):
 673                stable_frac[k] = array_elem
 674            np.savetxt(
 675                get_filename(run_opts.run_folder_name, 'stable_frac', res_itr, train_seed, noise_val, reg_train_time),
 676                stable_frac, delimiter=',')
 677
 678            mean_rms = np.zeros((run_opts.reg_values.size, *self.mean_rms_out[i, 0, j].shape))
 679            for k, array_elem in enumerate(self.mean_rms_out[i, :, j]):
 680                mean_rms[k] = array_elem
 681            np.savetxt(
 682                get_filename(run_opts.run_folder_name, 'mean_rms', res_itr, train_seed, noise_val, reg_train_time),
 683                mean_rms, delimiter=',')
 684
 685            max_rms = np.zeros((run_opts.reg_values.size, *self.max_rms_out[i, 0, j].shape))
 686            for k, array_elem in enumerate(self.max_rms_out[i, :, j]):
 687                max_rms[k] = array_elem
 688            np.savetxt(
 689                get_filename(run_opts.run_folder_name, 'max_rms', res_itr, train_seed, noise_val, reg_train_time),
 690                max_rms, delimiter=',')
 691
 692            variances = np.zeros((run_opts.reg_values.size, *self.variances_out[i, 0, j].shape))
 693            for k, array_elem in enumerate(self.variances_out[i, :, j]):
 694                variances[k] = array_elem
 695            np.savetxt(
 696                get_filename(run_opts.run_folder_name, 'variance', res_itr, train_seed, noise_val, reg_train_time),
 697                variances, delimiter=',')
 698
 699            valid_time = np.zeros((run_opts.reg_values.size, *self.valid_time_out[i, 0, j].shape))
 700            for k, array_elem in enumerate(self.valid_time_out[i, :, j]):
 701                valid_time[k] = array_elem
 702            for k in range(run_opts.num_tests):
 703                np.savetxt(
 704                    get_filename(run_opts.run_folder_name, 'valid_time', res_itr, train_seed, noise_val, reg_train_time,
 705                                 test_idx=test_idxs[k]), valid_time[:, k], delimiter=',')
 706
 707            train_mean_rms = np.zeros((run_opts.reg_values.size, *self.train_mean_rms_out[i, 0, j].shape))
 708            for k, array_elem in enumerate(self.train_mean_rms_out[i, :, j]):
 709                train_mean_rms[k] = array_elem
 710            np.savetxt(get_filename(run_opts.run_folder_name, 'train_mean_rms', res_itr, train_seed, noise_val,
 711                                    reg_train_time),
 712                       train_mean_rms, delimiter=',')
 713
 714            train_max_rms = np.zeros((run_opts.reg_values.size, *self.train_max_rms_out[i, 0, j].shape))
 715            for k, array_elem in enumerate(self.train_max_rms_out[i, :, j]):
 716                train_max_rms[k] = array_elem
 717            np.savetxt(
 718                get_filename(run_opts.run_folder_name, 'train_max_rms', res_itr, train_seed, noise_val, reg_train_time),
 719                train_max_rms, delimiter=',')
 720
 721            if run_opts.pmap:
 722                pmap_max_wass_dist = np.zeros((run_opts.reg_values.size, *self.pmap_max_wass_dist_out[i, 0, j].shape))
 723                for k, array_elem in enumerate(self.pmap_max_wass_dist_out[i, :, j]):
 724                    pmap_max_wass_dist[k] = array_elem
 725                np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max_wass_dist', res_itr, train_seed, noise_val,
 726                                        reg_train_time),
 727                           pmap_max_wass_dist, delimiter=',')
 728
 729                pmap_max = np.zeros((run_opts.reg_values.size, run_opts.num_tests), dtype=object)
 730                for k, l in product(np.arange(run_opts.reg_values.size), np.arange(run_opts.num_tests)):
 731                    pmap_max[k, l] = self.pmap_max_out[i, k, j][l]
 732                    pmap_len = [len(arr) for arr in pmap_max[k, l]]
 733                    max_pmap_len = max(pmap_len)
 734                    pmap_max_save = np.zeros((len(pmap_max[k, l]), max_pmap_len))
 735                    for m in range(len(pmap_max[k, l])):
 736                        pmap_max_save[m, :pmap_len[m]] = pmap_max[k, l][m]
 737                    np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max', res_itr, train_seed, noise_val,
 738                                            reg_train_time,
 739                                            test_idx=test_idxs[l], reg=run_opts.reg_values[k]),
 740                               pmap_max_save, delimiter=',')
 741            if run_opts.save_time_rms:
 742                rms = np.zeros((run_opts.reg_values.size, *self.rms_out[i, 0, j].shape))
 743                for k, array_elem in enumerate(self.rms_out[i, :, j]):
 744                    rms[k] = array_elem
 745                for k in range(run_opts.num_tests):
 746                    np.savetxt(
 747                        get_filename(run_opts.run_folder_name, 'rms', res_itr, train_seed, noise_val, reg_train_time,
 748                                     test_idx=test_idxs[k]),
 749                        rms[:, k], delimiter=',')
 750            if run_opts.save_eigenvals:
 751                eigenvals = self.grad_eigenvals_out[i, j]
 752                np.savetxt(get_filename(run_opts.run_folder_name, 'gradreg_eigenvals', res_itr, train_seed, noise_val,
 753                                        reg_train_time),
 754                           eigenvals, delimiter=',')
 755            if run_opts.savepred:
 756                pred = np.zeros((run_opts.reg_values.size, *self.pred_out[i, 0, j].shape))
 757                for k, array_elem in enumerate(self.pred_out[i, :, j]):
 758                    pred[k] = array_elem
 759                for l, (k, reg) in product(range(run_opts.num_tests), enumerate(run_opts.reg_values)):
 760                    np.savetxt(
 761                        get_filename(run_opts.run_folder_name, 'pred', res_itr, train_seed, noise_val, reg_train_time,
 762                                     test_idx=test_idxs[l], reg=reg),
 763                        pred[k, l], delimiter=',')
 764
 765
 766class NumericalModel:
 767    """Class for generating training or testing data using one of the test numerical models."""
 768    def __init__(self, tau=0.1, int_step=1, T=300, ttsplit=5000, u0=0, system='lorenz',
 769                 params=np.array([[], []], dtype=np.complex128)):
 770        """Creates the NumericalModel object.
 771        Args:
 772            self: NumericalModel object.
 773            tau: Time step between measurements of the system dynamics.
 774            int_step: the number of numerical integration steps between each measurement.
 775            T: Total number of measurements.
 776            ttsplit: Number of measurements to be used in the training data set.
 777            u0: Initial condition for integration.
 778            system: Name of system to generate data from. Options: 'lorenz', 'KS','KS_d2175'
 779            params: Internal parameters for model integration. Currently only used by KS options.
 780        Returns:
 781            Complete NumericalModel object with precomputed internal parameters."""
 782
 783        if system == 'lorenz':
 784            u_arr = np.ascontiguousarray(lorenzrungekutta(
 785                u0[0], u0[1], u0[2], T, tau))
 786            self.input_size = 3
 787
 788            u_arr[0] = (u_arr[0] - 0) / 7.929788629895004
 789            u_arr[1] = (u_arr[1] - 0) / 8.9932616136662
 790            u_arr[2] = (u_arr[2] - 23.596294463016896) / 8.575917849311919
 791            self.params = params
 792
 793        elif system == 'KS':
 794            u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, params=params)
 795            self.input_size = u_arr.shape[0]
 796            u_arr = np.ascontiguousarray(u_arr) / (1.1876770355823614)
 797        elif system == 'KS_d2175':
 798            u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, d=21.75, params=params)
 799            self.input_size = u_arr.shape[0]
 800            u_arr = np.ascontiguousarray(u_arr) / (1.2146066380280796)
 801        else:
 802            raise ValueError
 803
 804        self.train_length = ttsplit
 805        self.u_arr_train = u_arr[:, :ttsplit + 1]
 806
 807        # u[ttsplit], the (ttsplit + 1)st element, is the last in u_arr_train and the first in u_arr_test
 808        self.u_arr_test = u_arr[:, ttsplit:]
 809
 810
 811class ResPreds:
 812    """Class for loading and storing prediction time series data generated from reservoir computer tests."""
 813    def __init__(self, run_opts):
 814        """Loads the prediction data from .csv files.
 815        Args:
 816            run_opts: RunOpts object containing the run parameters."""
 817        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
 818        self.noise_vals = run_opts.noise_values_array
 819        self.reg_train_vals = run_opts.reg_train_times
 820        self.reg_vals = run_opts.reg_values
 821        print('Starding data read...')
 822        # print(self.pred_folder)
 823        self.preds = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size,
 824                               self.reg_train_vals.size, self.reg_vals.size), dtype=object)
 825        total_vals = self.preds.size
 826        with tqdm(total=total_vals) as pbar:
 827            for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product(
 828                    enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)),
 829                    enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)),
 830                    enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)),
 831                    enumerate(self.noise_vals),
 832                    enumerate(self.reg_train_vals), enumerate(self.reg_vals)):
 833                filename = get_filename(self.pred_folder, 'pred', res, train, noise, reg_train, reg = reg, test_idx = test)
 834                if os.name == 'nt' and len(filename) >= 260:
 835                    filename = get_windows_path(filename)
 836                self.preds[i, j, k, l, m, n] = np.loadtxt(filename, delimiter=',')
 837                pbar.update(1)
 838
 839
 840class ResPmap:
 841    """Class for loading and storing Poincare maximum map data from reservoir predictions
 842    generated from reservoir computer tests."""
 843    def __init__(self, run_opts):
 844        """Loads the Poincare maxmimum map data from .csv files.
 845        Args:
 846            run_opts: RunOpts object containing the run parameters."""
 847        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
 848        self.noise_vals = run_opts.noise_values_array
 849        self.reg_train_vals = run_opts.reg_train_times
 850        self.reg_vals = run_opts.reg_values
 851        print('Starding data read...')
 852        # print(self.pred_folder)
 853        self.pmap_max = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size,
 854                                  self.reg_train_vals.size, self.reg_vals.size), dtype=object)
 855        total_vals = self.pmap_max.size
 856        with tqdm(total=total_vals) as pbar:
 857            for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product(
 858                    enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)),
 859                    enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)),
 860                    enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)),
 861                    enumerate(self.noise_vals),
 862                    enumerate(self.reg_train_vals), enumerate(self.reg_vals)):
 863                filename = get_filename(self.pred_folder, 'pmap_max', res, train, noise, reg_train, reg = reg, test_idx = test)
 864                if os.name == 'nt' and len(filename) >= 260:
 865                    filename = get_windows_path(filename)
 866                pmap_in = np.loadtxt(filename, delimiter=',')
 867                pmap_max = [pmap_in[o, pmap_in[o] != 0.] for o in range(pmap_in.shape[0])]
 868                self.pmap_max[i, j, k, l, m, n] = pmap_max
 869                pbar.update(1)
 870
 871
 872class ResData:
 873    """Class for loading and storing prediction analysis data generated from reservoir computer tests."""
 874    def __init__(self, run_opts):
 875        """Loads the prediction analysis data from a compressed pandas data file.
 876            Args:
 877                run_opts: RunOpts object containing the run parameters."""
 878        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
 879        print('Starding data read...')
 880        tic = time.perf_counter()
 881        # print(self.data_filename)
 882        if os.name == 'nt' and len(self.data_filename) >= 260:
 883            self.data_filename = get_windows_path(self.data_filename)
 884        self.data = pd.read_csv(self.data_filename, index_col=0)
 885        toc = time.perf_counter()
 886        print('Data reading finished in %0.2f sec.' % (toc - tic))
 887        self.res = pd.unique(self.data['res'])
 888        self.train = pd.unique(self.data['train'])
 889        self.test = pd.unique(self.data['test'])
 890        self.noise = pd.unique(self.data['noise'])
 891        self.reg = pd.unique(self.data['reg'])
 892        self.reg_train = pd.unique(self.data['reg_train'])
 893        self.nan = pd.isna(self.data['variance'])
 894
 895    def shape(self):
 896        """Returns the shape of the pandas data frame"""
 897        return self.data.shape
 898
 899    def size(self):
 900        """Returns the size of the pandas data frame"""
 901        return self.data.size
 902
 903    def data_slice(self, res=np.array([]), train=np.array([]), test=np.array([]),
 904                   reg_train=np.array([]), noise=np.array([]), reg=np.array([]), median_flag=False,
 905                   reduce_axes=[], metric='', gross_frac_metric='valid_time', gross_err_bnd=1e2,
 906                   reduce_fun=pd.DataFrame.median):
 907        """Slices and/or finds the best results using a metric computed by the reduce_fun.
 908        Args:
 909            self: ResPreds object
 910            res: Indices for reservoir results to be returned/optimized over. If left as an empty array, uses all indices.
 911            train: Indices for training data set results to be returned/optimized over. If left as an empty array, uses all indices.
 912            test: Indices for testing data set results to be returned/optimized over. If left as an empty array, uses all indices.
 913            reg_train: Number of training data points for regularization to be returned/optimized over. If left as an empty array, uses all values.
 914            noise: Noise/Jacobian/LMNT regularization parameter value results to be returned/optimized over. If left as an empty array, uses all values.
 915            reg: Tikhonov regularization paramter value results to be returned/optimized over. If left as an empty array, uses all values.
 916            median_flag: Boolean indicating whether the data should be optimized.
 917            reduce_axes: List containing the axes to be optimized over. Elements can be 'res', 'train', 'test', 'reg_train', 'noise', or 'reg'.
 918            metric: Metric to be used to compute which parameters give the best result. Options are:
 919                'gross_frac': Lowest fraction of gross error
 920                'mean_rms': Lowest mean map error.
 921                'max_rms: Lowest maximum map error.
 922                'valid_time': Highest valid prediction time.'
 923            gross_frac_metrix: If using 'gross_frac' as a metric, this is the secondary metric that will be used if multiple parameters give equally good prediction results.
 924            gross_err_bnd: The cutoff in the mean map error above which predictions are considered to have gross error.
 925            reduce_fun: Function for computing the overall performance for a given set of parameters over many tests.
 926        Returns:
 927            Pandas DataFrame containing the sliced and optimized prediction results.
 928        Raises:
 929            ValueError: If any of the inputs are not recognized/incompatible."""
 930        input_list = [res, train, test, reg_train, noise, reg]
 931        name_list = np.array(['res', 'train', 'test', 'reg_train', 'noise', 'reg'])
 932        data_names = [name for name in self.data.columns if name not in name_list]
 933        base_list = [self.res, self.train, self.test, self.reg_train, self.noise, self.reg]
 934        slice_vals = np.zeros(len(input_list), dtype=object)
 935        if median_flag:
 936            if not isinstance(reduce_axes, list):
 937                print('reduce_axes must be a list.')
 938                return ValueError
 939            elif len(reduce_axes) == 0:
 940                print('median_flag is True, but no axes to compute the median over are specified.')
 941                raise ValueError
 942            elif not all(axis in ['res', 'train', 'test', 'noise', 'reg','reg_train'] for axis in reduce_axes) or \
 943                    len(reduce_axes) != len(set(reduce_axes)):
 944                print('reduce_axis max only contain res, train, test, noise, or reg.')
 945                raise ValueError
 946            elif metric not in ['', 'mean_rms', 'max_rms', 'valid_time', 'pmap_max_wass_dist', 'gross_frac']:
 947                print('Metric not recognized.')
 948                raise ValueError
 949            elif len(metric) == 0 and any(axis in ['noise', 'reg'] for axis in reduce_axes):
 950                print('Cannot reduce axes with unspecified metric.')
 951                raise ValueError
 952
 953        for i, item in enumerate(input_list):
 954            if isinstance(item, np.ndarray):
 955                if item.size == 0:
 956                    slice_vals[i] = base_list[i]
 957                else:
 958                    slice_vals[i] = item
 959            elif isinstance(item, list):
 960                if len(item) == 0:
 961                    slice_vals[i] = self.item
 962                else:
 963                    slice_vals[i] = np.array(item)
 964            else:
 965                slice_vals[i] = np.array([item])
 966
 967        sliced_data = self.data
 968        for name, slice_val in zip(name_list, slice_vals):
 969            sliced_data = sliced_data[sliced_data[name].isin(slice_val)]
 970
 971        if not median_flag:
 972            return sliced_data
 973        elif median_flag:
 974            median_slice_data = pd.DataFrame()
 975            remaining_vars = [var not in reduce_axes for var in name_list[:3]]
 976            remaining_vars.extend([True, True, True])
 977            if np.all(remaining_vars):
 978                median_slice_data = sliced_data
 979                nans = pd.isna(median_slice_data['mean_rms'])
 980                near_nans = median_slice_data['mean_rms'] > gross_err_bnd
 981                median_slice_data['gross_count'] = np.zeros(median_slice_data.shape[0])
 982                median_slice_data['gross_frac'] = np.zeros(median_slice_data.shape[0])
 983                median_slice_data.loc[nans | near_nans, 'gross_count'] = 1.0
 984                median_slice_data.loc[nans | near_nans, 'gross_frac'] = 1.0
 985            else:
 986                total_vars = 1
 987                for slice_val in slice_vals[remaining_vars]:
 988                    total_vars *= slice_val.size
 989                for vars_set in product(*slice_vals[remaining_vars]):
 990                    row_dict = {}
 991                    reduced_sliced_data = sliced_data
 992                    for var, name in zip(vars_set, name_list[remaining_vars]):
 993                        row_dict[name] = var
 994                        reduced_sliced_data = reduced_sliced_data[reduced_sliced_data[name] == var]
 995                    if reduced_sliced_data.size == 0:
 996                        for name in data_names:
 997                            if 'variance' in name:
 998                                row_dict[name] = 0.
 999                            elif 'valid_time' not in name:
1000                                row_dict[name] = 1e10
1001                        row_dict['valid_time'] = 0.
1002                        row_dict['gross_count'] = 0.
1003                        row_dict['gross_frac'] = 1.0
1004                        row_dict['data_present'] = False
1005                    else:
1006                        nans = pd.isna(reduced_sliced_data['mean_rms'])
1007                        near_nans = reduced_sliced_data['mean_rms'] > gross_err_bnd
1008                        nan_count = reduced_sliced_data[nans | near_nans].shape[0]
1009                        nan_frac = nan_count / reduced_sliced_data.shape[0]
1010                        valid_times = np.array([])
1011                        for name in data_names:
1012                            if 'variance' in name:
1013                                row_dict[name] = reduce_fun(reduced_sliced_data.loc[~nans, name])
1014                            elif 'valid_time' not in name:
1015                                reduced_sliced_data.loc[nans, name] = 1e10
1016                                row_dict[name] = reduce_fun(reduced_sliced_data[name])
1017                            else:
1018                                reduced_sliced_data.loc[pd.isna(reduced_sliced_data[name]), name] = 0.0
1019                                valid_times = np.append(valid_times, reduced_sliced_data[name].to_numpy())
1020                        row_dict['valid_time'] = reduce_fun(pd.DataFrame(valid_times)).to_numpy()[0]
1021                        row_dict['gross_count'] = nan_count
1022                        row_dict['gross_frac'] = nan_frac
1023                        row_dict['data_present'] = True
1024                    for key in row_dict.keys():
1025                        if not isinstance(row_dict[key], list) and not isinstance(row_dict[key], np.ndarray):
1026                            row_dict[key] = [row_dict[key]]
1027                    median_slice_data = pd.concat([median_slice_data, pd.DataFrame(row_dict)], ignore_index=True,
1028                                                  verify_integrity=True)
1029            if len(metric) == 0:
1030                return median_slice_data
1031            else:
1032                best_median_slice_data = pd.DataFrame()
1033                remaining_vars = [var not in reduce_axes for var in name_list]
1034                total_vars = 1
1035                for slice_val in slice_vals[remaining_vars]:
1036                    total_vars *= slice_val.size
1037                for vars_set in product(*slice_vals[remaining_vars]):
1038                    row_dict = {}
1039                    best_reduced_slice_data = median_slice_data
1040                    for var, name in zip(vars_set, name_list[remaining_vars]):
1041                        row_dict[name] = var
1042                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data[name] == var]
1043                    if metric == 'valid_time':
1044                        best_reduced_slice_data = best_reduced_slice_data[
1045                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].max()]
1046                    elif metric == 'gross_frac':
1047                        best_reduced_slice_data = best_reduced_slice_data[
1048                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()]
1049                        if len(best_reduced_slice_data.shape) != 1:
1050                            if gross_frac_metric == 'valid_time':
1051                                best_reduced_slice_data = best_reduced_slice_data[
1052                                    best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[
1053                                        gross_frac_metric].max()]
1054                            else:
1055                                best_reduced_slice_data = best_reduced_slice_data[
1056                                    best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[
1057                                        gross_frac_metric].min()]
1058                    else:
1059                        best_reduced_slice_data = best_reduced_slice_data[
1060                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()]
1061                    if len(best_reduced_slice_data.shape) != 1:
1062                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['noise'] ==
1063                                                                          best_reduced_slice_data['noise'].min()]
1064                    if len(best_reduced_slice_data.shape) != 1:
1065                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['reg'] ==
1066                                                                          best_reduced_slice_data['reg'].min()]
1067                    for key in best_reduced_slice_data.keys():
1068                        if not isinstance(best_reduced_slice_data[key], list) and not isinstance(
1069                                best_reduced_slice_data[key], np.ndarray):
1070                            best_reduced_slice_data[key] = [best_reduced_slice_data[key]]
1071                    best_median_slice_data = pd.concat([best_median_slice_data, best_reduced_slice_data],
1072                                                       ignore_index=True, verify_integrity=True)
1073                return best_median_slice_data
class RunOpts:
 19class RunOpts:
 20    """This class contains the various parameters and options that will be used during the time series data generation,
 21    reservoir generation, training, testing, and output."""
 22    def __init__(self, argv=None,
 23                 runflag=True,
 24                 train_time=20000,
 25                 test_time=None,
 26                 sync_time=2000,
 27                 discard_time=500,
 28                 res_size=500,
 29                 res_per_test=1,
 30                 noise_realizations=1,
 31                 num_tests=1,
 32                 num_trains=1,
 33                 traintype='normal',
 34                 noisetype='gaussian',
 35                 system='KS',
 36                 savepred=False,
 37                 save_time_rms=False,
 38                 squarenodes=True,
 39                 rho=0.6,
 40                 sigma=0.1,
 41                 theta=0.1,
 42                 leakage=1.0,
 43                 bias_type='new_random',
 44                 win_type='full_0centered',
 45                 debug_mode=False,
 46                 pmap=False,
 47                 machine='deepthought2',
 48                 ifray=False,
 49                 tau=None,
 50                 num_cpus=1,
 51                 metric='mean_rms',
 52                 return_all=True,
 53                 save_eigenvals=False,
 54                 max_valid_time=2000,
 55                 noise_values_array=np.logspace(-4, 3, num=3, base=10),
 56                 reg_values=np.append(0., np.logspace(-11, -9, 5)),
 57                 res_start=0,
 58                 train_start=0,
 59                 test_start=0,
 60                 reg_train_times=None,
 61                 root_folder=None,
 62                 prior='zero',
 63                 save_truth=False):
 64        """Initializes the RunOpts class.
 65
 66        Raises:
 67            ValueError: If an input instance variable has an unaccepted value.
 68            TypeError: If an input instance variable has an incorrect type."""
 69        self.argv = argv
 70        """Command line input for generating a RunOpts object. If left as None, then the object will be generated using
 71            the other inputs or defaults. If multiple instances of the same variable are given, the class will default
 72            to the command line input. See RunOpts.get_run_opts for more information. Default: None"""
 73        self.system = system
 74        """String denoting which dynamical system we are obtaining time series data from. Options: 'lorenz' for the 
 75        Lorenz 63 equations, 'KS' for the Kuramoto-Sivashinsky equation with 64 grid points and a periodicity length of
 76        22, and KS_d2175 for the Kuramoto-Sivashinksy equation with 64 grid points and a periodicity length of 21.75.
 77        Default: 'KS'"""
 78        self.tau = tau
 79        """Time between each data point in the time series data. If system = 'lorenz', this value must be evenly divided
 80        by 0.01 (the integration time step). If left as None, then the class will set tau to the default value for the
 81        particular dynamical system. Default: None"""
 82        self.train_time = train_time
 83        """Number of training data points. Default: 20000"""
 84        self.test_time = test_time
 85        """Number of testing data points. If left as None, then the class will default to 4000 if system = 'lorenz', or 
 86        16000 if system = 'KS' or 'KS_d2175'. Default: None"""
 87        self.sync_time = sync_time
 88        """Number of data points used to synchronize the reservoir to each test data set. Default: 2000"""
 89        self.discard_time = discard_time
 90        """Number of data points used to synchronize the reservoir to each training data set. Default: 500"""
 91        self.res_size = res_size
 92        """Number of nodes in the reservoir. Default: 500"""
 93        self.rho = rho
 94        """Reservoir spectral radius. Default: 0.6"""
 95        self.sigma = sigma
 96        """Reservoir input scaling. Default: 0.1"""
 97        self.theta = theta
 98        """Reservoir input bias scaling. Default: 0.1"""
 99        self.leakage = leakage
100        """Reservoir leaking rate. Default: 1.0"""
101        self.squarenodes = squarenodes
102        """Boolean denoting whether or not the squared node states are including in the reservoir feature vector.
103        Default: True"""
104        self.bias_type = bias_type
105        """Type of reservoir input bias to be used. See the Reservoir class for available options. 
106        Default: new_random"""
107        self.win_type = win_type
108        """Type of input coupling matrix to be used. See the Reservoir class for available options. 
109        Default: full_0centered"""
110        self.traintype = traintype
111        """Type of training to be used to determine the reservoir output coupling matrix. There are a number of options
112        available, but those used in the paper are:
113
114        'normal' - Standard reservoir training, potentially with input noise added.
115
116        'gradientk%d' % (Number of noise steps) - Reservoir training with no noise and LMNT regularization for a number of 
117        noise steps > 1, or Jacobian regularization for a number of noise steps = 1.
118
119        'regzerok%d' % (Number of noise steps) - Reservoir training with no noise and LMNT/Jacobian regularization computed
120        using a zero-input and zero reservoir state.
121        
122        Default: 'normal'
123        """
124        self.noisetype = noisetype
125        """Type of noise to be added to the reservoir input during training. Options are 'none' and 'gaussian'.
126        Default: 'none'"""
127        self.noise_values_array = noise_values_array
128        """Numpy array containing the variance of the added input noise (if noisetype = 'gaussian') or the LMNT/Jacobian
129        regularization parameter value (if traintype = 'gradientk%d' or 'regzerok%d'). Each value contained in the array
130        will be tested separately using each of the reservoirs, training, and testing data sets.
131        Default: np.logspace(-4, 3, num=3, base=10)"""
132        self.reg_train_times = reg_train_times
133        """Numpy array containing the number of training data points to be used to train the LMNT or Jacobian 
134        regularization. If left as None, then the class will default to an array containing only
135        the total number of training data points for standard LMNT/Jacobian or the number of noise steps 
136        if using zero-input LMNT. Default: None"""
137        self.noise_realizations = noise_realizations
138        """Number of input noise realizations used to train the reservoir (if training with noise). If not training with
139        noise, set to 1. Default: 1"""
140        self.reg_values = reg_values
141        """Numpy array containing the Tikhonov regularization parameter values. Each value contained in the array
142        will be tested separately using each of the reservoirs, training, and testing data sets.
143        Default: np.append(0., np.logspace(-11, -9, 5))"""
144        self.prior = prior
145        """Prior to be used when computing the output coupling matrix using Tikhonov regularization. Options are:
146
147        'zero' - Standard Tikhonov regularization with a zero prior.
148
149        'input_pass' - Tikhonov regularization with a persistence prior (i.e., set the input pass-through weights to 1).
150        
151        Default: 'zero'"""
152        self.max_valid_time = max_valid_time
153        """Maximum valid time for each valid time test during the testing period. This should be set so that
154        test_time / max_valid_time is a whole number greater than 0. Default: 2000"""
155        self.res_per_test = res_per_test
156        """Number of random reservoir realizations to test. Default: 1"""
157        self.num_trains = num_trains
158        """Number of independently generated training data sets to test with. Default: 1"""
159        self.num_tests = num_tests
160        """Number of independently generated testing data sets to test with. Default: 1"""
161        self.res_start = res_start
162        """Starting iterate for generating the random seeds that are used to generate the reservoir. Default: 0"""
163        self.train_start = train_start
164        """Starting iterate for generating the random seeds that are used to generate the training data sets.
165        Default: 0"""
166        self.test_start = test_start
167        """Starting iterate for generating the random seeds that are used to generate the testing data sets.
168        Default: 0"""
169        self.root_folder = root_folder
170        """Location where output data will be stored in the Data folder. If None, then defaults to the current working 
171        directory. Default: None"""
172        self.return_all = return_all
173        """Boolean for determining of all results should be returned, or only the results with the obtained using the
174        "best" Tikhonov regularization parameter value based on the selected metric. Default: True"""
175        self.metric = metric
176        """Metric for determining the "best" results. Not used if return_all = True. Options include 'mean_rms', 'max_rms',
177        and 'stable_frac'. Caution: Some options may be deprecated. Default: 'mss-var'"""
178        self.savepred = savepred
179        """Boolean for determining if reservoir prediction time series should be saved. Default: False"""
180        self.save_time_rms = save_time_rms
181        """Boolean for determining if reservoir prediction RMS error should be saved. Default: False"""
182        self.pmap = pmap
183        """Boolean for determining if reservoir prediction Poincare maximum map should be saved. Default: False"""
184        self.save_eigenvals = save_eigenvals
185        """Boolean for determining if the eigenvalues of the LMNT/Jacobian regularization matrices should be saved.
186        Default: False"""
187        self.save_truth = save_truth
188        """Boolean for determining if the true testing data should be saved. Default: False"""
189        self.ifray = ifray
190        """Boolean for determining if ray should be used to compute results for multiple reservoirs and training 
191        data sets. Default: False"""
192        self.num_cpus = num_cpus
193        """If using ray for paralellization, this sets the number of cpus to be used. Default: 1"""
194        self.machine = machine
195        """Machine which results are computed on. Leave as personal unless you are connecting to a ray cluster 
196        elsewhere. Default: 'personal'"""
197        self.runflag = runflag
198        """True indicates that we are about to compute results, and the appropriate directories should be created.
199        Otherwise, we do not create additional directories. Default: True"""
200        self.debug_mode = debug_mode
201        """Boolean for determining if errors during reservoir training which could arise from non-convergence of the 
202        eigenvalue solver should be suppressed. If left as False, will suppress errors im much of the core code,
203        so this should be set to True if making changes. Default: False"""
204        if not isinstance(argv, type(None)):
205            self.get_run_opts(argv)
206        if isinstance(self.tau, type(None)):
207            if self.system == 'lorenz':
208                self.tau = 0.1
209            elif self.system in ['KS', 'KS_d2175']:
210                self.tau = 0.25
211        if isinstance(self.test_time, type(None)):
212            if self.system == 'lorenz':
213                self.test_time = 4000
214            elif 'KS' in self.system:
215                self.test_time = 16000
216        if not isinstance(self.reg_train_times, np.ndarray):
217            if isinstance(self.reg_train_times, type(None)):
218                self.reg_train_times = np.array([self.train_time])
219            elif isinstance(self.reg_train_times, int):
220                self.reg_train_times = np.array([self.reg_train_times])
221            else:
222                raise TypeError()
223        if isinstance(self.root_folder, type(None)):
224            self.root_folder = os.getcwd()
225        if isinstance(self.reg_train_times, np.ndarray) or isinstance(self.reg_train_times, list):
226            if (self.reg_train_times[0] != self.train_time or len(self.reg_train_times) != 1) and (
227                    self.traintype in ['normal', 'normalres1', 'normalres2', 'rmean', 'rmeanres1',
228                                       'rmeanres2', 'rplusq', 'rplusqres1',
229                                       'rplusqres2'] or 'confined' in self.traintype):
230                print(('Traintypes "normal", "rmean", and "rplusq" are not '
231                       'compatible with fractional regularization training.'))
232                raise ValueError
233        if self.prior not in ['zero', 'input_pass']:
234            print('Prior type not recognized.')
235            raise ValueError
236        self.run_file_name = ''
237        self.run_folder_name = ''
238        self.get_file_name()
239
240    def get_file_name(self):
241        """Creates the folder and final data file name for the tests about to be run.
242        Args:
243            self: RunOpts object.
244        Returns:
245            RunOpts object with initialized folder and file name variables. Also creates the aforementioned folder
246            if not already created.
247        """
248        if self.prior == 'zero':
249            prior_str = ''
250        else:
251            prior_str = '_prior_%s' % self.prior
252        if self.savepred:
253            predflag = '_wpred'
254        else:
255            predflag = ''
256        if self.save_time_rms:
257            timeflag = '_savetime'
258        else:
259            timeflag = ''
260        if self.squarenodes:
261            squarenodes_flag = '_squarenodes'
262        else:
263            squarenodes_flag = ''
264        if self.save_eigenvals:
265            eigenval_flag = '_wmoregradeigs'
266        else:
267            eigenval_flag = ''
268        if self.pmap:
269            pmap_flag = '_wpmap0'
270        else:
271            pmap_flag = ''
272        data_folder_base = os.path.join(self.root_folder, 'Data')
273        if not os.path.isdir(data_folder_base):
274            os.mkdir(data_folder_base)
275
276        if not self.return_all:
277            data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % (
278                self.system, self.noisetype, self.traintype))
279            run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f'
280                        '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s_metric_%s') \
281                       % (self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho,
282                          self.sigma, self.leakage, self.win_type, self.bias_type, self.tau, self.res_size,
283                          self.train_time, self.noise_realizations, self.noisetype, self.traintype, prior_str,
284                          self.metric)
285        else:
286            data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % (
287                self.system, self.noisetype, self.traintype))
288            run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f'
289                        '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s') % (
290                           self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho,
291                           self.sigma,
292                           self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, self.train_time,
293                           self.noise_realizations, self.noisetype, self.traintype, prior_str)
294
295        if self.runflag:
296            if not os.path.isdir(data_folder):
297                os.mkdir(data_folder)
298            if not os.path.isdir(os.path.join(data_folder, run_name + '_folder')):
299                os.mkdir(os.path.join(data_folder, run_name + '_folder'))
300        self.run_file_name = os.path.join(data_folder, run_name + '.bz2')
301        self.run_folder_name = os.path.join(data_folder, run_name + '_folder')
302
303    def get_run_opts(self):
304        """Processes the command line input into instance variables.
305        Args:
306            self: RunOpts object.
307        Returns:
308            RunOpts object with instance variables set from command line input.
309        Raises:
310            GetoptError: Raises an error of command line arguments no recognized."""
311        try:
312            opts, args = getopt.getopt(self.argv, "T:N:r:",
313                                       ['testtime=', 'noisetype=', 'traintype=', 'system=', 'res=',
314                                        'tests=', 'trains=', 'savepred=', 'tau=', 'rho=',
315                                        'sigma=', 'theta=','leakage=', 'bias_type=', 'debug=', 'win_type=',
316                                        'machine=', 'num_cpus=', 'pmap=', 'parallel=', 'metric=', 'returnall=',
317                                        'savetime=', 'saveeigenvals=', 'noisevals=', 'regvals=', 'maxvt=',
318                                        'resstart=', 'trainstart=', 'teststart=',
319                                        'squarenodes=', 'regtraintimes=', 'discardlen=',
320                                        'prior=', 'synctime=', 'datarootdir=', 'savetruth='])
321        except getopt.GetoptError:
322            print('Error: Some options not recognized')
323            sys.exit(2)
324        for opt, arg in opts:
325            if opt == '-T':
326                self.train_time = int(arg)
327                print('Training iterations: %d' % self.train_time)
328            elif opt == '-N':
329                self.res_size = int(arg)
330                print('Reservoir nodes: %d' % self.res_size)
331            elif opt == '-r':
332                self.noise_realizations = int(arg)
333                print('Noise Realizations: %d' % self.noise_realizations)
334            elif opt == '--savetruth':
335                if arg == 'True':
336                    self.save_truth = True
337                elif arg == 'False':
338                    self.save_truth = False
339                else:
340                    raise ValueError
341            elif opt == '--datarootdir':
342                self.root_folder = str(arg)
343                print('Root directory for data: %s' % self.root_folder)
344            elif opt == '--synctime':
345                self.sync_time = int(arg)
346                print('Sync time: %d' % self.sync_time)
347            elif opt == '--testtime':
348                self.test_time = int(arg)
349                if self.test_time == 0:
350                    print('Testing duration: default')
351                else:
352                    print('Testing duration: %d' % self.test_time)
353            elif opt == '--resstart':
354                self.res_start = int(arg)
355                print('Reservoir ensemble start: %d' % self.res_start)
356            elif opt == '--trainstart':
357                self.train_start = int(arg)
358                print('Train ensemble start: %d' % self.train_start)
359            elif opt == '--teststart':
360                self.test_start = int(arg)
361                print('Test ensemble start: %d' % self.test_start)
362            elif opt == '--saveeigenvals':
363                if arg == 'True':
364                    self.save_eigenvals = True
365                elif arg == 'False':
366                    self.save_eigenvals = False
367                else:
368                    raise ValueError
369                print('Save grad reg eigenvalues: %s' % arg)
370            elif opt == '--prior':
371                self.prior = str(arg)
372                print('Prior type: %s' % self.prior)
373            elif opt == '--discardlen':
374                self.discard_time = int(arg)
375                print('Discard iterations: %d' % self.discard_time)
376            elif opt == '--squarenodes':
377                if arg == 'True':
378                    self.squarenodes = True
379                elif arg == 'False':
380                    self.squarenodes = False
381                else:
382                    raise ValueError
383                print('Square reservoir nodes: %s' % arg)
384            elif opt == '--maxvt':
385                self.max_valid_time = int(arg)
386                print('Maximum valid time: %d' % self.max_valid_time)
387            elif opt == '--noisevals':
388                self.noise_values_array = np.array([float(noise) for noise in arg.split(',')])
389                noise_str = '[ '
390                for noise in self.noise_values_array:
391                    noise_str += '%0.3e, ' % noise
392                noise_str = noise_str[:-2] + ' ]'
393                print('Noise values: %s' % noise_str)
394            elif opt == '--regvals':
395                self.reg_values = np.array([float(reg) for reg in arg.split(',')])
396                reg_str = '[ '
397                for reg in self.reg_values:
398                    reg_str += '%0.3e, ' % reg
399                reg_str = reg_str[:-2] + ' ]'
400                print('Regularization values: %s' % reg_str)
401            elif opt == '--regtraintimes':
402                if arg != 'None':
403                    self.reg_train_times = np.array([int(reg_train) for reg_train in arg.split(',')])
404                    reg_train_str = '[ '
405                    for reg_train in self.reg_train_times:
406                        reg_train_str += '%0.3e, ' % reg_train
407                    reg_train_str = reg_train_str[:-2] + ' ]'
408                    print('Regularization training times: %s' % reg_train_str)
409            elif opt == '--savetime':
410                if str(arg) == 'True':
411                    self.save_time_rms = True
412                elif str(arg) == 'False':
413                    self.save_time_rms = False
414                else:
415                    raise ValueError
416            elif opt == '--metric':
417                self.metric = str(arg)
418                if self.metric not in ['pmap_max_wass_dist', 'mean_rms', 'max_rms', 'mss_var']:
419                    raise ValueError
420                print('Stability metric: %s' % self.metric)
421            elif opt == '--returnall':
422                if arg == 'True':
423                    self.return_all = True
424                elif arg == 'False':
425                    self.return_all = False
426                else:
427                    raise ValueError
428            elif opt == '--parallel':
429                parallel_in = str(arg)
430                if parallel_in == 'True':
431                    self.ifray = True
432                elif parallel_in == 'False':
433                    self.ifray = False
434                else:
435                    raise ValueError
436            elif opt == '--pmap':
437                pmap_in = str(arg)
438                if pmap_in == 'True':
439                    self.pmap = True
440                elif pmap_in == 'False':
441                    self.pmap = False
442                else:
443                    raise ValueError
444            elif opt == '--machine':
445                self.machine = str(arg)
446                if self.machine not in ['deepthought2', 'personal']:
447                    raise ValueError
448                print('Machine: %s' % self.machine)
449            elif opt == '--num_cpus':
450                self.num_cpus = int(arg)
451                print('Number of CPUS: %d' % self.num_cpus)
452            elif opt == '--rho':
453                self.rho = float(arg)
454                print('Rho: %f' % self.rho)
455            elif opt == '--sigma':
456                self.sigma = float(arg)
457                print('Sigma: %f' % self.sigma)
458            elif opt == '--theta':
459                self.theta = float(arg)
460                print('Theta: %f' % self.theta)
461            elif opt == '--leakage':
462                self.leakage = float(arg)
463                print('Leakage: %f' % self.leakage)
464            elif opt == '--tau':
465                self.tau = float(arg)
466                print('Reservoir timestep: %f' % self.tau)
467            elif opt == '--win_type':
468                self.win_type = str(arg)
469                print('Win Type: %s' % self.win_type)
470            elif opt == '--bias_type':
471                self.bias_type = str(arg)
472                print('Bias Type: %s' % self.bias_type)
473            elif opt == '--res':
474                self.res_per_test = int(arg)
475                print('Number of reservoirs: %d' % self.res_per_test)
476            elif opt == '--tests':
477                self.num_tests = int(arg)
478                print('Number of tests: %d' % self.num_tests)
479            elif opt == '--trains':
480                self.num_trains = int(arg)
481                print('Number of training data sequences: %d' % self.num_trains)
482            elif opt == '--savepred':
483                if arg == 'True':
484                    self.savepred = True
485                elif arg == 'False':
486                    self.savepred = False
487                print('Saving predictions: %s' % arg)
488            elif opt == '--noisetype':
489                self.noisetype = str(arg)
490                print('Noise type: %s' % self.noisetype)
491            elif opt == '--traintype':
492                self.traintype = str(arg)
493                print('Training type: %s' % self.traintype)
494            elif opt == '--system':
495                self.system = str(arg)
496                print('System: %s' % self.system)
497            elif opt == '--debug':
498                if arg == 'True':
499                    self.debug_mode = True
500                elif arg == 'False':
501                    self.debug_mode = False
502                print('Debug Mode: %s' % arg)

This class contains the various parameters and options that will be used during the time series data generation, reservoir generation, training, testing, and output.

RunOpts( argv=None, runflag=True, train_time=20000, test_time=None, sync_time=2000, discard_time=500, res_size=500, res_per_test=1, noise_realizations=1, num_tests=1, num_trains=1, traintype='normal', noisetype='gaussian', system='KS', savepred=False, save_time_rms=False, squarenodes=True, rho=0.6, sigma=0.1, theta=0.1, leakage=1.0, bias_type='new_random', win_type='full_0centered', debug_mode=False, pmap=False, machine='deepthought2', ifray=False, tau=None, num_cpus=1, metric='mean_rms', return_all=True, save_eigenvals=False, max_valid_time=2000, noise_values_array=array([1.00000000e-04, 3.16227766e-01, 1.00000000e+03]), reg_values=array([0.00000000e+00, 1.00000000e-11, 3.16227766e-11, 1.00000000e-10, 3.16227766e-10, 1.00000000e-09]), res_start=0, train_start=0, test_start=0, reg_train_times=None, root_folder=None, prior='zero', save_truth=False)
 22    def __init__(self, argv=None,
 23                 runflag=True,
 24                 train_time=20000,
 25                 test_time=None,
 26                 sync_time=2000,
 27                 discard_time=500,
 28                 res_size=500,
 29                 res_per_test=1,
 30                 noise_realizations=1,
 31                 num_tests=1,
 32                 num_trains=1,
 33                 traintype='normal',
 34                 noisetype='gaussian',
 35                 system='KS',
 36                 savepred=False,
 37                 save_time_rms=False,
 38                 squarenodes=True,
 39                 rho=0.6,
 40                 sigma=0.1,
 41                 theta=0.1,
 42                 leakage=1.0,
 43                 bias_type='new_random',
 44                 win_type='full_0centered',
 45                 debug_mode=False,
 46                 pmap=False,
 47                 machine='deepthought2',
 48                 ifray=False,
 49                 tau=None,
 50                 num_cpus=1,
 51                 metric='mean_rms',
 52                 return_all=True,
 53                 save_eigenvals=False,
 54                 max_valid_time=2000,
 55                 noise_values_array=np.logspace(-4, 3, num=3, base=10),
 56                 reg_values=np.append(0., np.logspace(-11, -9, 5)),
 57                 res_start=0,
 58                 train_start=0,
 59                 test_start=0,
 60                 reg_train_times=None,
 61                 root_folder=None,
 62                 prior='zero',
 63                 save_truth=False):
 64        """Initializes the RunOpts class.
 65
 66        Raises:
 67            ValueError: If an input instance variable has an unaccepted value.
 68            TypeError: If an input instance variable has an incorrect type."""
 69        self.argv = argv
 70        """Command line input for generating a RunOpts object. If left as None, then the object will be generated using
 71            the other inputs or defaults. If multiple instances of the same variable are given, the class will default
 72            to the command line input. See RunOpts.get_run_opts for more information. Default: None"""
 73        self.system = system
 74        """String denoting which dynamical system we are obtaining time series data from. Options: 'lorenz' for the 
 75        Lorenz 63 equations, 'KS' for the Kuramoto-Sivashinsky equation with 64 grid points and a periodicity length of
 76        22, and KS_d2175 for the Kuramoto-Sivashinksy equation with 64 grid points and a periodicity length of 21.75.
 77        Default: 'KS'"""
 78        self.tau = tau
 79        """Time between each data point in the time series data. If system = 'lorenz', this value must be evenly divided
 80        by 0.01 (the integration time step). If left as None, then the class will set tau to the default value for the
 81        particular dynamical system. Default: None"""
 82        self.train_time = train_time
 83        """Number of training data points. Default: 20000"""
 84        self.test_time = test_time
 85        """Number of testing data points. If left as None, then the class will default to 4000 if system = 'lorenz', or 
 86        16000 if system = 'KS' or 'KS_d2175'. Default: None"""
 87        self.sync_time = sync_time
 88        """Number of data points used to synchronize the reservoir to each test data set. Default: 2000"""
 89        self.discard_time = discard_time
 90        """Number of data points used to synchronize the reservoir to each training data set. Default: 500"""
 91        self.res_size = res_size
 92        """Number of nodes in the reservoir. Default: 500"""
 93        self.rho = rho
 94        """Reservoir spectral radius. Default: 0.6"""
 95        self.sigma = sigma
 96        """Reservoir input scaling. Default: 0.1"""
 97        self.theta = theta
 98        """Reservoir input bias scaling. Default: 0.1"""
 99        self.leakage = leakage
100        """Reservoir leaking rate. Default: 1.0"""
101        self.squarenodes = squarenodes
102        """Boolean denoting whether or not the squared node states are including in the reservoir feature vector.
103        Default: True"""
104        self.bias_type = bias_type
105        """Type of reservoir input bias to be used. See the Reservoir class for available options. 
106        Default: new_random"""
107        self.win_type = win_type
108        """Type of input coupling matrix to be used. See the Reservoir class for available options. 
109        Default: full_0centered"""
110        self.traintype = traintype
111        """Type of training to be used to determine the reservoir output coupling matrix. There are a number of options
112        available, but those used in the paper are:
113
114        'normal' - Standard reservoir training, potentially with input noise added.
115
116        'gradientk%d' % (Number of noise steps) - Reservoir training with no noise and LMNT regularization for a number of 
117        noise steps > 1, or Jacobian regularization for a number of noise steps = 1.
118
119        'regzerok%d' % (Number of noise steps) - Reservoir training with no noise and LMNT/Jacobian regularization computed
120        using a zero-input and zero reservoir state.
121        
122        Default: 'normal'
123        """
124        self.noisetype = noisetype
125        """Type of noise to be added to the reservoir input during training. Options are 'none' and 'gaussian'.
126        Default: 'none'"""
127        self.noise_values_array = noise_values_array
128        """Numpy array containing the variance of the added input noise (if noisetype = 'gaussian') or the LMNT/Jacobian
129        regularization parameter value (if traintype = 'gradientk%d' or 'regzerok%d'). Each value contained in the array
130        will be tested separately using each of the reservoirs, training, and testing data sets.
131        Default: np.logspace(-4, 3, num=3, base=10)"""
132        self.reg_train_times = reg_train_times
133        """Numpy array containing the number of training data points to be used to train the LMNT or Jacobian 
134        regularization. If left as None, then the class will default to an array containing only
135        the total number of training data points for standard LMNT/Jacobian or the number of noise steps 
136        if using zero-input LMNT. Default: None"""
137        self.noise_realizations = noise_realizations
138        """Number of input noise realizations used to train the reservoir (if training with noise). If not training with
139        noise, set to 1. Default: 1"""
140        self.reg_values = reg_values
141        """Numpy array containing the Tikhonov regularization parameter values. Each value contained in the array
142        will be tested separately using each of the reservoirs, training, and testing data sets.
143        Default: np.append(0., np.logspace(-11, -9, 5))"""
144        self.prior = prior
145        """Prior to be used when computing the output coupling matrix using Tikhonov regularization. Options are:
146
147        'zero' - Standard Tikhonov regularization with a zero prior.
148
149        'input_pass' - Tikhonov regularization with a persistence prior (i.e., set the input pass-through weights to 1).
150        
151        Default: 'zero'"""
152        self.max_valid_time = max_valid_time
153        """Maximum valid time for each valid time test during the testing period. This should be set so that
154        test_time / max_valid_time is a whole number greater than 0. Default: 2000"""
155        self.res_per_test = res_per_test
156        """Number of random reservoir realizations to test. Default: 1"""
157        self.num_trains = num_trains
158        """Number of independently generated training data sets to test with. Default: 1"""
159        self.num_tests = num_tests
160        """Number of independently generated testing data sets to test with. Default: 1"""
161        self.res_start = res_start
162        """Starting iterate for generating the random seeds that are used to generate the reservoir. Default: 0"""
163        self.train_start = train_start
164        """Starting iterate for generating the random seeds that are used to generate the training data sets.
165        Default: 0"""
166        self.test_start = test_start
167        """Starting iterate for generating the random seeds that are used to generate the testing data sets.
168        Default: 0"""
169        self.root_folder = root_folder
170        """Location where output data will be stored in the Data folder. If None, then defaults to the current working 
171        directory. Default: None"""
172        self.return_all = return_all
173        """Boolean for determining of all results should be returned, or only the results with the obtained using the
174        "best" Tikhonov regularization parameter value based on the selected metric. Default: True"""
175        self.metric = metric
176        """Metric for determining the "best" results. Not used if return_all = True. Options include 'mean_rms', 'max_rms',
177        and 'stable_frac'. Caution: Some options may be deprecated. Default: 'mss-var'"""
178        self.savepred = savepred
179        """Boolean for determining if reservoir prediction time series should be saved. Default: False"""
180        self.save_time_rms = save_time_rms
181        """Boolean for determining if reservoir prediction RMS error should be saved. Default: False"""
182        self.pmap = pmap
183        """Boolean for determining if reservoir prediction Poincare maximum map should be saved. Default: False"""
184        self.save_eigenvals = save_eigenvals
185        """Boolean for determining if the eigenvalues of the LMNT/Jacobian regularization matrices should be saved.
186        Default: False"""
187        self.save_truth = save_truth
188        """Boolean for determining if the true testing data should be saved. Default: False"""
189        self.ifray = ifray
190        """Boolean for determining if ray should be used to compute results for multiple reservoirs and training 
191        data sets. Default: False"""
192        self.num_cpus = num_cpus
193        """If using ray for paralellization, this sets the number of cpus to be used. Default: 1"""
194        self.machine = machine
195        """Machine which results are computed on. Leave as personal unless you are connecting to a ray cluster 
196        elsewhere. Default: 'personal'"""
197        self.runflag = runflag
198        """True indicates that we are about to compute results, and the appropriate directories should be created.
199        Otherwise, we do not create additional directories. Default: True"""
200        self.debug_mode = debug_mode
201        """Boolean for determining if errors during reservoir training which could arise from non-convergence of the 
202        eigenvalue solver should be suppressed. If left as False, will suppress errors im much of the core code,
203        so this should be set to True if making changes. Default: False"""
204        if not isinstance(argv, type(None)):
205            self.get_run_opts(argv)
206        if isinstance(self.tau, type(None)):
207            if self.system == 'lorenz':
208                self.tau = 0.1
209            elif self.system in ['KS', 'KS_d2175']:
210                self.tau = 0.25
211        if isinstance(self.test_time, type(None)):
212            if self.system == 'lorenz':
213                self.test_time = 4000
214            elif 'KS' in self.system:
215                self.test_time = 16000
216        if not isinstance(self.reg_train_times, np.ndarray):
217            if isinstance(self.reg_train_times, type(None)):
218                self.reg_train_times = np.array([self.train_time])
219            elif isinstance(self.reg_train_times, int):
220                self.reg_train_times = np.array([self.reg_train_times])
221            else:
222                raise TypeError()
223        if isinstance(self.root_folder, type(None)):
224            self.root_folder = os.getcwd()
225        if isinstance(self.reg_train_times, np.ndarray) or isinstance(self.reg_train_times, list):
226            if (self.reg_train_times[0] != self.train_time or len(self.reg_train_times) != 1) and (
227                    self.traintype in ['normal', 'normalres1', 'normalres2', 'rmean', 'rmeanres1',
228                                       'rmeanres2', 'rplusq', 'rplusqres1',
229                                       'rplusqres2'] or 'confined' in self.traintype):
230                print(('Traintypes "normal", "rmean", and "rplusq" are not '
231                       'compatible with fractional regularization training.'))
232                raise ValueError
233        if self.prior not in ['zero', 'input_pass']:
234            print('Prior type not recognized.')
235            raise ValueError
236        self.run_file_name = ''
237        self.run_folder_name = ''
238        self.get_file_name()

Initializes the RunOpts class.

Raises
  • ValueError: If an input instance variable has an unaccepted value.
  • TypeError: If an input instance variable has an incorrect type.
argv

Command line input for generating a RunOpts object. If left as None, then the object will be generated using the other inputs or defaults. If multiple instances of the same variable are given, the class will default to the command line input. See RunOpts.get_run_opts for more information. Default: None

system

String denoting which dynamical system we are obtaining time series data from. Options: 'lorenz' for the Lorenz 63 equations, 'KS' for the Kuramoto-Sivashinsky equation with 64 grid points and a periodicity length of 22, and KS_d2175 for the Kuramoto-Sivashinksy equation with 64 grid points and a periodicity length of 21.75. Default: 'KS'

tau

Time between each data point in the time series data. If system = 'lorenz', this value must be evenly divided by 0.01 (the integration time step). If left as None, then the class will set tau to the default value for the particular dynamical system. Default: None

train_time

Number of training data points. Default: 20000

test_time

Number of testing data points. If left as None, then the class will default to 4000 if system = 'lorenz', or 16000 if system = 'KS' or 'KS_d2175'. Default: None

sync_time

Number of data points used to synchronize the reservoir to each test data set. Default: 2000

discard_time

Number of data points used to synchronize the reservoir to each training data set. Default: 500

res_size

Number of nodes in the reservoir. Default: 500

rho

Reservoir spectral radius. Default: 0.6

sigma

Reservoir input scaling. Default: 0.1

theta

Reservoir input bias scaling. Default: 0.1

leakage

Reservoir leaking rate. Default: 1.0

squarenodes

Boolean denoting whether or not the squared node states are including in the reservoir feature vector. Default: True

bias_type

Type of reservoir input bias to be used. See the Reservoir class for available options. Default: new_random

win_type

Type of input coupling matrix to be used. See the Reservoir class for available options. Default: full_0centered

traintype

Type of training to be used to determine the reservoir output coupling matrix. There are a number of options available, but those used in the paper are:

'normal' - Standard reservoir training, potentially with input noise added.

'gradientk%d' % (Number of noise steps) - Reservoir training with no noise and LMNT regularization for a number of noise steps > 1, or Jacobian regularization for a number of noise steps = 1.

'regzerok%d' % (Number of noise steps) - Reservoir training with no noise and LMNT/Jacobian regularization computed using a zero-input and zero reservoir state.

Default: 'normal'

noisetype

Type of noise to be added to the reservoir input during training. Options are 'none' and 'gaussian'. Default: 'none'

noise_values_array

Numpy array containing the variance of the added input noise (if noisetype = 'gaussian') or the LMNT/Jacobian regularization parameter value (if traintype = 'gradientk%d' or 'regzerok%d'). Each value contained in the array will be tested separately using each of the reservoirs, training, and testing data sets. Default: np.logspace(-4, 3, num=3, base=10)

reg_train_times

Numpy array containing the number of training data points to be used to train the LMNT or Jacobian regularization. If left as None, then the class will default to an array containing only the total number of training data points for standard LMNT/Jacobian or the number of noise steps if using zero-input LMNT. Default: None

noise_realizations

Number of input noise realizations used to train the reservoir (if training with noise). If not training with noise, set to 1. Default: 1

reg_values

Numpy array containing the Tikhonov regularization parameter values. Each value contained in the array will be tested separately using each of the reservoirs, training, and testing data sets. Default: np.append(0., np.logspace(-11, -9, 5))

prior

Prior to be used when computing the output coupling matrix using Tikhonov regularization. Options are:

'zero' - Standard Tikhonov regularization with a zero prior.

'input_pass' - Tikhonov regularization with a persistence prior (i.e., set the input pass-through weights to 1).

Default: 'zero'

max_valid_time

Maximum valid time for each valid time test during the testing period. This should be set so that test_time / max_valid_time is a whole number greater than 0. Default: 2000

res_per_test

Number of random reservoir realizations to test. Default: 1

num_trains

Number of independently generated training data sets to test with. Default: 1

num_tests

Number of independently generated testing data sets to test with. Default: 1

res_start

Starting iterate for generating the random seeds that are used to generate the reservoir. Default: 0

train_start

Starting iterate for generating the random seeds that are used to generate the training data sets. Default: 0

test_start

Starting iterate for generating the random seeds that are used to generate the testing data sets. Default: 0

root_folder

Location where output data will be stored in the Data folder. If None, then defaults to the current working directory. Default: None

return_all

Boolean for determining of all results should be returned, or only the results with the obtained using the "best" Tikhonov regularization parameter value based on the selected metric. Default: True

metric

Metric for determining the "best" results. Not used if return_all = True. Options include 'mean_rms', 'max_rms', and 'stable_frac'. Caution: Some options may be deprecated. Default: 'mss-var'

savepred

Boolean for determining if reservoir prediction time series should be saved. Default: False

save_time_rms

Boolean for determining if reservoir prediction RMS error should be saved. Default: False

pmap

Boolean for determining if reservoir prediction Poincare maximum map should be saved. Default: False

save_eigenvals

Boolean for determining if the eigenvalues of the LMNT/Jacobian regularization matrices should be saved. Default: False

save_truth

Boolean for determining if the true testing data should be saved. Default: False

ifray

Boolean for determining if ray should be used to compute results for multiple reservoirs and training data sets. Default: False

num_cpus

If using ray for paralellization, this sets the number of cpus to be used. Default: 1

machine

Machine which results are computed on. Leave as personal unless you are connecting to a ray cluster elsewhere. Default: 'personal'

runflag

True indicates that we are about to compute results, and the appropriate directories should be created. Otherwise, we do not create additional directories. Default: True

debug_mode

Boolean for determining if errors during reservoir training which could arise from non-convergence of the eigenvalue solver should be suppressed. If left as False, will suppress errors im much of the core code, so this should be set to True if making changes. Default: False

def get_file_name(self):
240    def get_file_name(self):
241        """Creates the folder and final data file name for the tests about to be run.
242        Args:
243            self: RunOpts object.
244        Returns:
245            RunOpts object with initialized folder and file name variables. Also creates the aforementioned folder
246            if not already created.
247        """
248        if self.prior == 'zero':
249            prior_str = ''
250        else:
251            prior_str = '_prior_%s' % self.prior
252        if self.savepred:
253            predflag = '_wpred'
254        else:
255            predflag = ''
256        if self.save_time_rms:
257            timeflag = '_savetime'
258        else:
259            timeflag = ''
260        if self.squarenodes:
261            squarenodes_flag = '_squarenodes'
262        else:
263            squarenodes_flag = ''
264        if self.save_eigenvals:
265            eigenval_flag = '_wmoregradeigs'
266        else:
267            eigenval_flag = ''
268        if self.pmap:
269            pmap_flag = '_wpmap0'
270        else:
271            pmap_flag = ''
272        data_folder_base = os.path.join(self.root_folder, 'Data')
273        if not os.path.isdir(data_folder_base):
274            os.mkdir(data_folder_base)
275
276        if not self.return_all:
277            data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % (
278                self.system, self.noisetype, self.traintype))
279            run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f'
280                        '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s_metric_%s') \
281                       % (self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho,
282                          self.sigma, self.leakage, self.win_type, self.bias_type, self.tau, self.res_size,
283                          self.train_time, self.noise_realizations, self.noisetype, self.traintype, prior_str,
284                          self.metric)
285        else:
286            data_folder = os.path.join(data_folder_base, '%s_noisetest_noisetype_%s_traintype_%s' % (
287                self.system, self.noisetype, self.traintype))
288            run_name = ('%s%s%s%s%s%s_rho%0.1f_sigma%1.1e_leakage%0.3f_win_%s_bias_%s_tau%0.2f'
289                        '_%dnodes_%dtrain_%dreals_noisetype_%s_traintype_%s%s') % (
290                           self.system, predflag, timeflag, eigenval_flag, pmap_flag, squarenodes_flag, self.rho,
291                           self.sigma,
292                           self.leakage, self.win_type, self.bias_type, self.tau, self.res_size, self.train_time,
293                           self.noise_realizations, self.noisetype, self.traintype, prior_str)
294
295        if self.runflag:
296            if not os.path.isdir(data_folder):
297                os.mkdir(data_folder)
298            if not os.path.isdir(os.path.join(data_folder, run_name + '_folder')):
299                os.mkdir(os.path.join(data_folder, run_name + '_folder'))
300        self.run_file_name = os.path.join(data_folder, run_name + '.bz2')
301        self.run_folder_name = os.path.join(data_folder, run_name + '_folder')

Creates the folder and final data file name for the tests about to be run.

Args
  • self: RunOpts object.
Returns

RunOpts object with initialized folder and file name variables. Also creates the aforementioned folder if not already created.

def get_run_opts(self):
303    def get_run_opts(self):
304        """Processes the command line input into instance variables.
305        Args:
306            self: RunOpts object.
307        Returns:
308            RunOpts object with instance variables set from command line input.
309        Raises:
310            GetoptError: Raises an error of command line arguments no recognized."""
311        try:
312            opts, args = getopt.getopt(self.argv, "T:N:r:",
313                                       ['testtime=', 'noisetype=', 'traintype=', 'system=', 'res=',
314                                        'tests=', 'trains=', 'savepred=', 'tau=', 'rho=',
315                                        'sigma=', 'theta=','leakage=', 'bias_type=', 'debug=', 'win_type=',
316                                        'machine=', 'num_cpus=', 'pmap=', 'parallel=', 'metric=', 'returnall=',
317                                        'savetime=', 'saveeigenvals=', 'noisevals=', 'regvals=', 'maxvt=',
318                                        'resstart=', 'trainstart=', 'teststart=',
319                                        'squarenodes=', 'regtraintimes=', 'discardlen=',
320                                        'prior=', 'synctime=', 'datarootdir=', 'savetruth='])
321        except getopt.GetoptError:
322            print('Error: Some options not recognized')
323            sys.exit(2)
324        for opt, arg in opts:
325            if opt == '-T':
326                self.train_time = int(arg)
327                print('Training iterations: %d' % self.train_time)
328            elif opt == '-N':
329                self.res_size = int(arg)
330                print('Reservoir nodes: %d' % self.res_size)
331            elif opt == '-r':
332                self.noise_realizations = int(arg)
333                print('Noise Realizations: %d' % self.noise_realizations)
334            elif opt == '--savetruth':
335                if arg == 'True':
336                    self.save_truth = True
337                elif arg == 'False':
338                    self.save_truth = False
339                else:
340                    raise ValueError
341            elif opt == '--datarootdir':
342                self.root_folder = str(arg)
343                print('Root directory for data: %s' % self.root_folder)
344            elif opt == '--synctime':
345                self.sync_time = int(arg)
346                print('Sync time: %d' % self.sync_time)
347            elif opt == '--testtime':
348                self.test_time = int(arg)
349                if self.test_time == 0:
350                    print('Testing duration: default')
351                else:
352                    print('Testing duration: %d' % self.test_time)
353            elif opt == '--resstart':
354                self.res_start = int(arg)
355                print('Reservoir ensemble start: %d' % self.res_start)
356            elif opt == '--trainstart':
357                self.train_start = int(arg)
358                print('Train ensemble start: %d' % self.train_start)
359            elif opt == '--teststart':
360                self.test_start = int(arg)
361                print('Test ensemble start: %d' % self.test_start)
362            elif opt == '--saveeigenvals':
363                if arg == 'True':
364                    self.save_eigenvals = True
365                elif arg == 'False':
366                    self.save_eigenvals = False
367                else:
368                    raise ValueError
369                print('Save grad reg eigenvalues: %s' % arg)
370            elif opt == '--prior':
371                self.prior = str(arg)
372                print('Prior type: %s' % self.prior)
373            elif opt == '--discardlen':
374                self.discard_time = int(arg)
375                print('Discard iterations: %d' % self.discard_time)
376            elif opt == '--squarenodes':
377                if arg == 'True':
378                    self.squarenodes = True
379                elif arg == 'False':
380                    self.squarenodes = False
381                else:
382                    raise ValueError
383                print('Square reservoir nodes: %s' % arg)
384            elif opt == '--maxvt':
385                self.max_valid_time = int(arg)
386                print('Maximum valid time: %d' % self.max_valid_time)
387            elif opt == '--noisevals':
388                self.noise_values_array = np.array([float(noise) for noise in arg.split(',')])
389                noise_str = '[ '
390                for noise in self.noise_values_array:
391                    noise_str += '%0.3e, ' % noise
392                noise_str = noise_str[:-2] + ' ]'
393                print('Noise values: %s' % noise_str)
394            elif opt == '--regvals':
395                self.reg_values = np.array([float(reg) for reg in arg.split(',')])
396                reg_str = '[ '
397                for reg in self.reg_values:
398                    reg_str += '%0.3e, ' % reg
399                reg_str = reg_str[:-2] + ' ]'
400                print('Regularization values: %s' % reg_str)
401            elif opt == '--regtraintimes':
402                if arg != 'None':
403                    self.reg_train_times = np.array([int(reg_train) for reg_train in arg.split(',')])
404                    reg_train_str = '[ '
405                    for reg_train in self.reg_train_times:
406                        reg_train_str += '%0.3e, ' % reg_train
407                    reg_train_str = reg_train_str[:-2] + ' ]'
408                    print('Regularization training times: %s' % reg_train_str)
409            elif opt == '--savetime':
410                if str(arg) == 'True':
411                    self.save_time_rms = True
412                elif str(arg) == 'False':
413                    self.save_time_rms = False
414                else:
415                    raise ValueError
416            elif opt == '--metric':
417                self.metric = str(arg)
418                if self.metric not in ['pmap_max_wass_dist', 'mean_rms', 'max_rms', 'mss_var']:
419                    raise ValueError
420                print('Stability metric: %s' % self.metric)
421            elif opt == '--returnall':
422                if arg == 'True':
423                    self.return_all = True
424                elif arg == 'False':
425                    self.return_all = False
426                else:
427                    raise ValueError
428            elif opt == '--parallel':
429                parallel_in = str(arg)
430                if parallel_in == 'True':
431                    self.ifray = True
432                elif parallel_in == 'False':
433                    self.ifray = False
434                else:
435                    raise ValueError
436            elif opt == '--pmap':
437                pmap_in = str(arg)
438                if pmap_in == 'True':
439                    self.pmap = True
440                elif pmap_in == 'False':
441                    self.pmap = False
442                else:
443                    raise ValueError
444            elif opt == '--machine':
445                self.machine = str(arg)
446                if self.machine not in ['deepthought2', 'personal']:
447                    raise ValueError
448                print('Machine: %s' % self.machine)
449            elif opt == '--num_cpus':
450                self.num_cpus = int(arg)
451                print('Number of CPUS: %d' % self.num_cpus)
452            elif opt == '--rho':
453                self.rho = float(arg)
454                print('Rho: %f' % self.rho)
455            elif opt == '--sigma':
456                self.sigma = float(arg)
457                print('Sigma: %f' % self.sigma)
458            elif opt == '--theta':
459                self.theta = float(arg)
460                print('Theta: %f' % self.theta)
461            elif opt == '--leakage':
462                self.leakage = float(arg)
463                print('Leakage: %f' % self.leakage)
464            elif opt == '--tau':
465                self.tau = float(arg)
466                print('Reservoir timestep: %f' % self.tau)
467            elif opt == '--win_type':
468                self.win_type = str(arg)
469                print('Win Type: %s' % self.win_type)
470            elif opt == '--bias_type':
471                self.bias_type = str(arg)
472                print('Bias Type: %s' % self.bias_type)
473            elif opt == '--res':
474                self.res_per_test = int(arg)
475                print('Number of reservoirs: %d' % self.res_per_test)
476            elif opt == '--tests':
477                self.num_tests = int(arg)
478                print('Number of tests: %d' % self.num_tests)
479            elif opt == '--trains':
480                self.num_trains = int(arg)
481                print('Number of training data sequences: %d' % self.num_trains)
482            elif opt == '--savepred':
483                if arg == 'True':
484                    self.savepred = True
485                elif arg == 'False':
486                    self.savepred = False
487                print('Saving predictions: %s' % arg)
488            elif opt == '--noisetype':
489                self.noisetype = str(arg)
490                print('Noise type: %s' % self.noisetype)
491            elif opt == '--traintype':
492                self.traintype = str(arg)
493                print('Training type: %s' % self.traintype)
494            elif opt == '--system':
495                self.system = str(arg)
496                print('System: %s' % self.system)
497            elif opt == '--debug':
498                if arg == 'True':
499                    self.debug_mode = True
500                elif arg == 'False':
501                    self.debug_mode = False
502                print('Debug Mode: %s' % arg)

Processes the command line input into instance variables.

Args
  • self: RunOpts object.
Returns

RunOpts object with instance variables set from command line input.

Raises
  • GetoptError: Raises an error of command line arguments no recognized.
class Reservoir:
505class Reservoir:
506    """Class for initializing and storing the reservoir matrices and internal states."""
507    def __init__(self, run_opts, res_gen, res_itr, input_size, avg_degree=3):
508        """Initializes the Reservoir object.
509        Args:
510            run_opts: RunOpts object containing parameters used to generate the reservoir.
511            res_gen: A numpy.random.Generator object used to generate the random matrices in the Reservoir.
512            res_itr: Reservoir iteration tag.
513            input_size: Number of elements in reservoir input.
514            avg_degree: Average in-degree of each reservoir node (i.e., the average number of edges that connect into each vertex in the graph). Default: 3
515        Returns:
516            Constructed Reservoir object.
517        Raises:
518            ValueError: If win_type or bias_type is not recognized."""
519        # Define class for storing reservoir layers generated from input parameters and an input random number generator
520        self.rsvr_size = run_opts.res_size
521        self.res_itr = res_itr
522
523        density = avg_degree / self.rsvr_size
524
525        if run_opts.win_type == 'full_0centered':
526            unnormalized_W = 2 * res_gen.random((self.rsvr_size, self.rsvr_size)) - 1
527        else:
528            unnormalized_W = res_gen.random((self.rsvr_size, self.rsvr_size))
529        for i in range(unnormalized_W[:, 0].size):
530            for j in range(unnormalized_W[0].size):
531                if res_gen.random(1) > density:
532                    unnormalized_W[i][j] = 0
533
534        max_eig = eigs(unnormalized_W, k=1,
535                       return_eigenvectors=False, maxiter=10 ** 5, v0=res_gen.random(self.rsvr_size))
536
537        W_sp = csc_matrix(np.ascontiguousarray(
538            run_opts.rho / np.abs(max_eig[0]) * unnormalized_W))
539        self.W_data, self.W_indices, self.W_indptr, self.W_shape = \
540            (W_sp.data, W_sp.indices, W_sp.indptr, np.array(list(W_sp.shape)))
541        # print('Avg. degree of W:')
542        # print(self.W_data.size/rsvr_size)
543
544        # print('Adjacency matrix section:')
545        # print(self.W_data[:4])
546
547        if run_opts.win_type == 'dense':
548            if run_opts.bias_type != 'new_random':
549                raise ValueError
550            Win = (res_gen.random(self.rsvr_size * (input_size + 1)).reshape(self.rsvr_size,
551                                                                             input_size + 1) * 2 - 1) * run_opts.sigma
552        else:
553            if 'full' in run_opts.win_type:
554                input_vars = np.arange(input_size)
555            elif run_opts.win_type == 'x':
556                input_vars = np.array([0])
557            else:
558                print('Win_type not recognized.')
559                raise ValueError()
560            if run_opts.bias_type == 'old':
561                const_frac = 0.15
562                const_conn = int(self.rsvr_size * const_frac)
563                Win = np.zeros((self.rsvr_size, input_size + 1))
564                Win[:const_conn, 0] = (res_gen.random(
565                    Win[:const_conn, 0].size) * 2 - 1) * run_opts.theta
566                q = int((self.rsvr_size - const_conn) // input_vars.size)
567                for i, var in enumerate(input_vars):
568                    Win[const_conn + q * i:const_conn + q *
569                                           (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
570            elif run_opts.bias_type == 'new_random':
571                Win = np.zeros((self.rsvr_size, input_size + 1))
572                Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta
573                q = int(self.rsvr_size // input_vars.size)
574                for i, var in enumerate(input_vars):
575                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
576                leftover_nodes = self.rsvr_size - q * input_vars.size
577                for i in range(leftover_nodes):
578                    Win[self.rsvr_size - leftover_nodes + i, input_vars[res_gen.integers(input_vars.size)]] = \
579                        (res_gen.random() * 2 - 1) * run_opts.sigma
580            elif run_opts.bias_type == 'new_new_random':
581                Win = np.zeros((self.rsvr_size, input_size + 1))
582                Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta
583                q = int(self.rsvr_size // input_vars.size)
584                for i, var in enumerate(input_vars):
585                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
586                leftover_nodes = self.rsvr_size - q * input_vars.size
587                var = input_vars[res_gen.choice(
588                    input_vars.size, size=leftover_nodes, replace=False)]
589                Win[self.rsvr_size - leftover_nodes:, var +
590                                                      1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma
591            elif run_opts.bias_type == 'new_const':
592                Win = np.zeros((self.rsvr_size, input_size + 1))
593                Win[:, 0] = run_opts.theta
594                q = int(self.rsvr_size // input_vars.size)
595                for i, var in enumerate(input_vars):
596                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
597                leftover_nodes = self.rsvr_size - q * input_vars.size
598                var = input_vars[res_gen.integers(
599                    input_vars.size, size=leftover_nodes)]
600                Win[self.rsvr_size - leftover_nodes:, var +
601                                                      1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma
602            else:
603                print('bias_type not recognized.')
604                raise ValueError()
605
606        Win_sp = csc_matrix(Win)
607        self.Win_data, self.Win_indices, self.Win_indptr, self.Win_shape = \
608            np.ascontiguousarray(Win_sp.data), \
609            np.ascontiguousarray(Win_sp.indices), \
610            np.ascontiguousarray(Win_sp.indptr), \
611            np.array(list(Win_sp.shape))
612
613        self.X = (res_gen.random((self.rsvr_size, run_opts.train_time + run_opts.discard_time + 2)) * 2 - 1)
614        self.Wout = np.array([])
615        self.leakage = run_opts.leakage

Class for initializing and storing the reservoir matrices and internal states.

Reservoir(run_opts, res_gen, res_itr, input_size, avg_degree=3)
507    def __init__(self, run_opts, res_gen, res_itr, input_size, avg_degree=3):
508        """Initializes the Reservoir object.
509        Args:
510            run_opts: RunOpts object containing parameters used to generate the reservoir.
511            res_gen: A numpy.random.Generator object used to generate the random matrices in the Reservoir.
512            res_itr: Reservoir iteration tag.
513            input_size: Number of elements in reservoir input.
514            avg_degree: Average in-degree of each reservoir node (i.e., the average number of edges that connect into each vertex in the graph). Default: 3
515        Returns:
516            Constructed Reservoir object.
517        Raises:
518            ValueError: If win_type or bias_type is not recognized."""
519        # Define class for storing reservoir layers generated from input parameters and an input random number generator
520        self.rsvr_size = run_opts.res_size
521        self.res_itr = res_itr
522
523        density = avg_degree / self.rsvr_size
524
525        if run_opts.win_type == 'full_0centered':
526            unnormalized_W = 2 * res_gen.random((self.rsvr_size, self.rsvr_size)) - 1
527        else:
528            unnormalized_W = res_gen.random((self.rsvr_size, self.rsvr_size))
529        for i in range(unnormalized_W[:, 0].size):
530            for j in range(unnormalized_W[0].size):
531                if res_gen.random(1) > density:
532                    unnormalized_W[i][j] = 0
533
534        max_eig = eigs(unnormalized_W, k=1,
535                       return_eigenvectors=False, maxiter=10 ** 5, v0=res_gen.random(self.rsvr_size))
536
537        W_sp = csc_matrix(np.ascontiguousarray(
538            run_opts.rho / np.abs(max_eig[0]) * unnormalized_W))
539        self.W_data, self.W_indices, self.W_indptr, self.W_shape = \
540            (W_sp.data, W_sp.indices, W_sp.indptr, np.array(list(W_sp.shape)))
541        # print('Avg. degree of W:')
542        # print(self.W_data.size/rsvr_size)
543
544        # print('Adjacency matrix section:')
545        # print(self.W_data[:4])
546
547        if run_opts.win_type == 'dense':
548            if run_opts.bias_type != 'new_random':
549                raise ValueError
550            Win = (res_gen.random(self.rsvr_size * (input_size + 1)).reshape(self.rsvr_size,
551                                                                             input_size + 1) * 2 - 1) * run_opts.sigma
552        else:
553            if 'full' in run_opts.win_type:
554                input_vars = np.arange(input_size)
555            elif run_opts.win_type == 'x':
556                input_vars = np.array([0])
557            else:
558                print('Win_type not recognized.')
559                raise ValueError()
560            if run_opts.bias_type == 'old':
561                const_frac = 0.15
562                const_conn = int(self.rsvr_size * const_frac)
563                Win = np.zeros((self.rsvr_size, input_size + 1))
564                Win[:const_conn, 0] = (res_gen.random(
565                    Win[:const_conn, 0].size) * 2 - 1) * run_opts.theta
566                q = int((self.rsvr_size - const_conn) // input_vars.size)
567                for i, var in enumerate(input_vars):
568                    Win[const_conn + q * i:const_conn + q *
569                                           (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
570            elif run_opts.bias_type == 'new_random':
571                Win = np.zeros((self.rsvr_size, input_size + 1))
572                Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta
573                q = int(self.rsvr_size // input_vars.size)
574                for i, var in enumerate(input_vars):
575                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
576                leftover_nodes = self.rsvr_size - q * input_vars.size
577                for i in range(leftover_nodes):
578                    Win[self.rsvr_size - leftover_nodes + i, input_vars[res_gen.integers(input_vars.size)]] = \
579                        (res_gen.random() * 2 - 1) * run_opts.sigma
580            elif run_opts.bias_type == 'new_new_random':
581                Win = np.zeros((self.rsvr_size, input_size + 1))
582                Win[:, 0] = (res_gen.random(self.rsvr_size) * 2 - 1) * run_opts.theta
583                q = int(self.rsvr_size // input_vars.size)
584                for i, var in enumerate(input_vars):
585                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
586                leftover_nodes = self.rsvr_size - q * input_vars.size
587                var = input_vars[res_gen.choice(
588                    input_vars.size, size=leftover_nodes, replace=False)]
589                Win[self.rsvr_size - leftover_nodes:, var +
590                                                      1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma
591            elif run_opts.bias_type == 'new_const':
592                Win = np.zeros((self.rsvr_size, input_size + 1))
593                Win[:, 0] = run_opts.theta
594                q = int(self.rsvr_size // input_vars.size)
595                for i, var in enumerate(input_vars):
596                    Win[q * i:q * (i + 1), var + 1] = (res_gen.random(q) * 2 - 1) * run_opts.sigma
597                leftover_nodes = self.rsvr_size - q * input_vars.size
598                var = input_vars[res_gen.integers(
599                    input_vars.size, size=leftover_nodes)]
600                Win[self.rsvr_size - leftover_nodes:, var +
601                                                      1] = (res_gen.random(leftover_nodes) * 2 - 1) * run_opts.sigma
602            else:
603                print('bias_type not recognized.')
604                raise ValueError()
605
606        Win_sp = csc_matrix(Win)
607        self.Win_data, self.Win_indices, self.Win_indptr, self.Win_shape = \
608            np.ascontiguousarray(Win_sp.data), \
609            np.ascontiguousarray(Win_sp.indices), \
610            np.ascontiguousarray(Win_sp.indptr), \
611            np.array(list(Win_sp.shape))
612
613        self.X = (res_gen.random((self.rsvr_size, run_opts.train_time + run_opts.discard_time + 2)) * 2 - 1)
614        self.Wout = np.array([])
615        self.leakage = run_opts.leakage

Initializes the Reservoir object.

Args
  • run_opts: RunOpts object containing parameters used to generate the reservoir.
  • res_gen: A numpy.random.Generator object used to generate the random matrices in the Reservoir.
  • res_itr: Reservoir iteration tag.
  • input_size: Number of elements in reservoir input.
  • avg_degree: Average in-degree of each reservoir node (i.e., the average number of edges that connect into each vertex in the graph). Default: 3
Returns

Constructed Reservoir object.

Raises
  • ValueError: If win_type or bias_type is not recognized.
class ResOutput:
618class ResOutput:
619    """Class for holding the output from a reservoir computer test. Is typically used to save the output from one of
620    the (potentially parallel) runs."""
621    def __init__(self, run_opts, noise_array):
622        """Creates the ResOutput object.
623        Args:
624            self: ResOutput object
625            run_opts: RunOpts object for the test.
626            noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
627        Returns:
628            self: initialized ResOutput object."""
629        self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
630                                        dtype=object)
631        self.mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
632                                     dtype=object)
633        self.max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
634                                    dtype=object)
635        self.variances_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
636                                      dtype=object)
637        self.valid_time_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
638                                       dtype=object)
639        self.rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
640                                dtype=object)
641        self.preds_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
642                                  dtype=object)
643        self.wass_dist_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
644                                      dtype=object)
645        self.pmap_max_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
646                                     dtype=object)
647        self.pmap_max_wass_dist_out = np.zeros(
648            (noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), dtype=object)
649        self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
650                                        dtype=object)
651        self.train_mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
652                                           dtype=object)
653        self.train_max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
654                                          dtype=object)
655        self.grad_eigenvals_out = np.zeros((noise_array.size, run_opts.reg_train_times.size), dtype=object)
656        self.pred_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
657                                 dtype=object)
658
659    def save(self, run_opts, noise_array, res_itr, train_seed, test_idxs):
660        """Saves the data in the ResOutput object to a series of .csv files.
661        Args:
662            self: ResOutput object
663            run_opts: RunOpts object for test.
664            noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
665            res_itr: Index for the reservoir iteration used.
666            train_seed: Index for the training data iteration used.
667            test_idxs: Indices for the testing data iterations used.
668        Returns:
669            Saves .csv files."""
670        for (i, noise_val), (j, reg_train_time) in product(enumerate(noise_array), enumerate(run_opts.reg_train_times)):
671            print((i, j))
672            stable_frac = np.zeros(run_opts.reg_values.size)
673            for k, array_elem in enumerate(self.stable_frac_out[i, :, j]):
674                stable_frac[k] = array_elem
675            np.savetxt(
676                get_filename(run_opts.run_folder_name, 'stable_frac', res_itr, train_seed, noise_val, reg_train_time),
677                stable_frac, delimiter=',')
678
679            mean_rms = np.zeros((run_opts.reg_values.size, *self.mean_rms_out[i, 0, j].shape))
680            for k, array_elem in enumerate(self.mean_rms_out[i, :, j]):
681                mean_rms[k] = array_elem
682            np.savetxt(
683                get_filename(run_opts.run_folder_name, 'mean_rms', res_itr, train_seed, noise_val, reg_train_time),
684                mean_rms, delimiter=',')
685
686            max_rms = np.zeros((run_opts.reg_values.size, *self.max_rms_out[i, 0, j].shape))
687            for k, array_elem in enumerate(self.max_rms_out[i, :, j]):
688                max_rms[k] = array_elem
689            np.savetxt(
690                get_filename(run_opts.run_folder_name, 'max_rms', res_itr, train_seed, noise_val, reg_train_time),
691                max_rms, delimiter=',')
692
693            variances = np.zeros((run_opts.reg_values.size, *self.variances_out[i, 0, j].shape))
694            for k, array_elem in enumerate(self.variances_out[i, :, j]):
695                variances[k] = array_elem
696            np.savetxt(
697                get_filename(run_opts.run_folder_name, 'variance', res_itr, train_seed, noise_val, reg_train_time),
698                variances, delimiter=',')
699
700            valid_time = np.zeros((run_opts.reg_values.size, *self.valid_time_out[i, 0, j].shape))
701            for k, array_elem in enumerate(self.valid_time_out[i, :, j]):
702                valid_time[k] = array_elem
703            for k in range(run_opts.num_tests):
704                np.savetxt(
705                    get_filename(run_opts.run_folder_name, 'valid_time', res_itr, train_seed, noise_val, reg_train_time,
706                                 test_idx=test_idxs[k]), valid_time[:, k], delimiter=',')
707
708            train_mean_rms = np.zeros((run_opts.reg_values.size, *self.train_mean_rms_out[i, 0, j].shape))
709            for k, array_elem in enumerate(self.train_mean_rms_out[i, :, j]):
710                train_mean_rms[k] = array_elem
711            np.savetxt(get_filename(run_opts.run_folder_name, 'train_mean_rms', res_itr, train_seed, noise_val,
712                                    reg_train_time),
713                       train_mean_rms, delimiter=',')
714
715            train_max_rms = np.zeros((run_opts.reg_values.size, *self.train_max_rms_out[i, 0, j].shape))
716            for k, array_elem in enumerate(self.train_max_rms_out[i, :, j]):
717                train_max_rms[k] = array_elem
718            np.savetxt(
719                get_filename(run_opts.run_folder_name, 'train_max_rms', res_itr, train_seed, noise_val, reg_train_time),
720                train_max_rms, delimiter=',')
721
722            if run_opts.pmap:
723                pmap_max_wass_dist = np.zeros((run_opts.reg_values.size, *self.pmap_max_wass_dist_out[i, 0, j].shape))
724                for k, array_elem in enumerate(self.pmap_max_wass_dist_out[i, :, j]):
725                    pmap_max_wass_dist[k] = array_elem
726                np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max_wass_dist', res_itr, train_seed, noise_val,
727                                        reg_train_time),
728                           pmap_max_wass_dist, delimiter=',')
729
730                pmap_max = np.zeros((run_opts.reg_values.size, run_opts.num_tests), dtype=object)
731                for k, l in product(np.arange(run_opts.reg_values.size), np.arange(run_opts.num_tests)):
732                    pmap_max[k, l] = self.pmap_max_out[i, k, j][l]
733                    pmap_len = [len(arr) for arr in pmap_max[k, l]]
734                    max_pmap_len = max(pmap_len)
735                    pmap_max_save = np.zeros((len(pmap_max[k, l]), max_pmap_len))
736                    for m in range(len(pmap_max[k, l])):
737                        pmap_max_save[m, :pmap_len[m]] = pmap_max[k, l][m]
738                    np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max', res_itr, train_seed, noise_val,
739                                            reg_train_time,
740                                            test_idx=test_idxs[l], reg=run_opts.reg_values[k]),
741                               pmap_max_save, delimiter=',')
742            if run_opts.save_time_rms:
743                rms = np.zeros((run_opts.reg_values.size, *self.rms_out[i, 0, j].shape))
744                for k, array_elem in enumerate(self.rms_out[i, :, j]):
745                    rms[k] = array_elem
746                for k in range(run_opts.num_tests):
747                    np.savetxt(
748                        get_filename(run_opts.run_folder_name, 'rms', res_itr, train_seed, noise_val, reg_train_time,
749                                     test_idx=test_idxs[k]),
750                        rms[:, k], delimiter=',')
751            if run_opts.save_eigenvals:
752                eigenvals = self.grad_eigenvals_out[i, j]
753                np.savetxt(get_filename(run_opts.run_folder_name, 'gradreg_eigenvals', res_itr, train_seed, noise_val,
754                                        reg_train_time),
755                           eigenvals, delimiter=',')
756            if run_opts.savepred:
757                pred = np.zeros((run_opts.reg_values.size, *self.pred_out[i, 0, j].shape))
758                for k, array_elem in enumerate(self.pred_out[i, :, j]):
759                    pred[k] = array_elem
760                for l, (k, reg) in product(range(run_opts.num_tests), enumerate(run_opts.reg_values)):
761                    np.savetxt(
762                        get_filename(run_opts.run_folder_name, 'pred', res_itr, train_seed, noise_val, reg_train_time,
763                                     test_idx=test_idxs[l], reg=reg),
764                        pred[k, l], delimiter=',')

Class for holding the output from a reservoir computer test. Is typically used to save the output from one of the (potentially parallel) runs.

ResOutput(run_opts, noise_array)
621    def __init__(self, run_opts, noise_array):
622        """Creates the ResOutput object.
623        Args:
624            self: ResOutput object
625            run_opts: RunOpts object for the test.
626            noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
627        Returns:
628            self: initialized ResOutput object."""
629        self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
630                                        dtype=object)
631        self.mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
632                                     dtype=object)
633        self.max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
634                                    dtype=object)
635        self.variances_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
636                                      dtype=object)
637        self.valid_time_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
638                                       dtype=object)
639        self.rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
640                                dtype=object)
641        self.preds_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
642                                  dtype=object)
643        self.wass_dist_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
644                                      dtype=object)
645        self.pmap_max_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
646                                     dtype=object)
647        self.pmap_max_wass_dist_out = np.zeros(
648            (noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size), dtype=object)
649        self.stable_frac_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
650                                        dtype=object)
651        self.train_mean_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
652                                           dtype=object)
653        self.train_max_rms_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
654                                          dtype=object)
655        self.grad_eigenvals_out = np.zeros((noise_array.size, run_opts.reg_train_times.size), dtype=object)
656        self.pred_out = np.zeros((noise_array.size, run_opts.reg_values.size, run_opts.reg_train_times.size),
657                                 dtype=object)

Creates the ResOutput object.

Args
  • self: ResOutput object
  • run_opts: RunOpts object for the test.
  • noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
Returns

self: initialized ResOutput object.

def save(self, run_opts, noise_array, res_itr, train_seed, test_idxs):
659    def save(self, run_opts, noise_array, res_itr, train_seed, test_idxs):
660        """Saves the data in the ResOutput object to a series of .csv files.
661        Args:
662            self: ResOutput object
663            run_opts: RunOpts object for test.
664            noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
665            res_itr: Index for the reservoir iteration used.
666            train_seed: Index for the training data iteration used.
667            test_idxs: Indices for the testing data iterations used.
668        Returns:
669            Saves .csv files."""
670        for (i, noise_val), (j, reg_train_time) in product(enumerate(noise_array), enumerate(run_opts.reg_train_times)):
671            print((i, j))
672            stable_frac = np.zeros(run_opts.reg_values.size)
673            for k, array_elem in enumerate(self.stable_frac_out[i, :, j]):
674                stable_frac[k] = array_elem
675            np.savetxt(
676                get_filename(run_opts.run_folder_name, 'stable_frac', res_itr, train_seed, noise_val, reg_train_time),
677                stable_frac, delimiter=',')
678
679            mean_rms = np.zeros((run_opts.reg_values.size, *self.mean_rms_out[i, 0, j].shape))
680            for k, array_elem in enumerate(self.mean_rms_out[i, :, j]):
681                mean_rms[k] = array_elem
682            np.savetxt(
683                get_filename(run_opts.run_folder_name, 'mean_rms', res_itr, train_seed, noise_val, reg_train_time),
684                mean_rms, delimiter=',')
685
686            max_rms = np.zeros((run_opts.reg_values.size, *self.max_rms_out[i, 0, j].shape))
687            for k, array_elem in enumerate(self.max_rms_out[i, :, j]):
688                max_rms[k] = array_elem
689            np.savetxt(
690                get_filename(run_opts.run_folder_name, 'max_rms', res_itr, train_seed, noise_val, reg_train_time),
691                max_rms, delimiter=',')
692
693            variances = np.zeros((run_opts.reg_values.size, *self.variances_out[i, 0, j].shape))
694            for k, array_elem in enumerate(self.variances_out[i, :, j]):
695                variances[k] = array_elem
696            np.savetxt(
697                get_filename(run_opts.run_folder_name, 'variance', res_itr, train_seed, noise_val, reg_train_time),
698                variances, delimiter=',')
699
700            valid_time = np.zeros((run_opts.reg_values.size, *self.valid_time_out[i, 0, j].shape))
701            for k, array_elem in enumerate(self.valid_time_out[i, :, j]):
702                valid_time[k] = array_elem
703            for k in range(run_opts.num_tests):
704                np.savetxt(
705                    get_filename(run_opts.run_folder_name, 'valid_time', res_itr, train_seed, noise_val, reg_train_time,
706                                 test_idx=test_idxs[k]), valid_time[:, k], delimiter=',')
707
708            train_mean_rms = np.zeros((run_opts.reg_values.size, *self.train_mean_rms_out[i, 0, j].shape))
709            for k, array_elem in enumerate(self.train_mean_rms_out[i, :, j]):
710                train_mean_rms[k] = array_elem
711            np.savetxt(get_filename(run_opts.run_folder_name, 'train_mean_rms', res_itr, train_seed, noise_val,
712                                    reg_train_time),
713                       train_mean_rms, delimiter=',')
714
715            train_max_rms = np.zeros((run_opts.reg_values.size, *self.train_max_rms_out[i, 0, j].shape))
716            for k, array_elem in enumerate(self.train_max_rms_out[i, :, j]):
717                train_max_rms[k] = array_elem
718            np.savetxt(
719                get_filename(run_opts.run_folder_name, 'train_max_rms', res_itr, train_seed, noise_val, reg_train_time),
720                train_max_rms, delimiter=',')
721
722            if run_opts.pmap:
723                pmap_max_wass_dist = np.zeros((run_opts.reg_values.size, *self.pmap_max_wass_dist_out[i, 0, j].shape))
724                for k, array_elem in enumerate(self.pmap_max_wass_dist_out[i, :, j]):
725                    pmap_max_wass_dist[k] = array_elem
726                np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max_wass_dist', res_itr, train_seed, noise_val,
727                                        reg_train_time),
728                           pmap_max_wass_dist, delimiter=',')
729
730                pmap_max = np.zeros((run_opts.reg_values.size, run_opts.num_tests), dtype=object)
731                for k, l in product(np.arange(run_opts.reg_values.size), np.arange(run_opts.num_tests)):
732                    pmap_max[k, l] = self.pmap_max_out[i, k, j][l]
733                    pmap_len = [len(arr) for arr in pmap_max[k, l]]
734                    max_pmap_len = max(pmap_len)
735                    pmap_max_save = np.zeros((len(pmap_max[k, l]), max_pmap_len))
736                    for m in range(len(pmap_max[k, l])):
737                        pmap_max_save[m, :pmap_len[m]] = pmap_max[k, l][m]
738                    np.savetxt(get_filename(run_opts.run_folder_name, 'pmap_max', res_itr, train_seed, noise_val,
739                                            reg_train_time,
740                                            test_idx=test_idxs[l], reg=run_opts.reg_values[k]),
741                               pmap_max_save, delimiter=',')
742            if run_opts.save_time_rms:
743                rms = np.zeros((run_opts.reg_values.size, *self.rms_out[i, 0, j].shape))
744                for k, array_elem in enumerate(self.rms_out[i, :, j]):
745                    rms[k] = array_elem
746                for k in range(run_opts.num_tests):
747                    np.savetxt(
748                        get_filename(run_opts.run_folder_name, 'rms', res_itr, train_seed, noise_val, reg_train_time,
749                                     test_idx=test_idxs[k]),
750                        rms[:, k], delimiter=',')
751            if run_opts.save_eigenvals:
752                eigenvals = self.grad_eigenvals_out[i, j]
753                np.savetxt(get_filename(run_opts.run_folder_name, 'gradreg_eigenvals', res_itr, train_seed, noise_val,
754                                        reg_train_time),
755                           eigenvals, delimiter=',')
756            if run_opts.savepred:
757                pred = np.zeros((run_opts.reg_values.size, *self.pred_out[i, 0, j].shape))
758                for k, array_elem in enumerate(self.pred_out[i, :, j]):
759                    pred[k] = array_elem
760                for l, (k, reg) in product(range(run_opts.num_tests), enumerate(run_opts.reg_values)):
761                    np.savetxt(
762                        get_filename(run_opts.run_folder_name, 'pred', res_itr, train_seed, noise_val, reg_train_time,
763                                     test_idx=test_idxs[l], reg=reg),
764                        pred[k, l], delimiter=',')

Saves the data in the ResOutput object to a series of .csv files.

Args
  • self: ResOutput object
  • run_opts: RunOpts object for test.
  • noise_array: Array of noise/Jacobian/LMNT regularization parameter values used in this test.
  • res_itr: Index for the reservoir iteration used.
  • train_seed: Index for the training data iteration used.
  • test_idxs: Indices for the testing data iterations used.
Returns

Saves .csv files.

class NumericalModel:
767class NumericalModel:
768    """Class for generating training or testing data using one of the test numerical models."""
769    def __init__(self, tau=0.1, int_step=1, T=300, ttsplit=5000, u0=0, system='lorenz',
770                 params=np.array([[], []], dtype=np.complex128)):
771        """Creates the NumericalModel object.
772        Args:
773            self: NumericalModel object.
774            tau: Time step between measurements of the system dynamics.
775            int_step: the number of numerical integration steps between each measurement.
776            T: Total number of measurements.
777            ttsplit: Number of measurements to be used in the training data set.
778            u0: Initial condition for integration.
779            system: Name of system to generate data from. Options: 'lorenz', 'KS','KS_d2175'
780            params: Internal parameters for model integration. Currently only used by KS options.
781        Returns:
782            Complete NumericalModel object with precomputed internal parameters."""
783
784        if system == 'lorenz':
785            u_arr = np.ascontiguousarray(lorenzrungekutta(
786                u0[0], u0[1], u0[2], T, tau))
787            self.input_size = 3
788
789            u_arr[0] = (u_arr[0] - 0) / 7.929788629895004
790            u_arr[1] = (u_arr[1] - 0) / 8.9932616136662
791            u_arr[2] = (u_arr[2] - 23.596294463016896) / 8.575917849311919
792            self.params = params
793
794        elif system == 'KS':
795            u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, params=params)
796            self.input_size = u_arr.shape[0]
797            u_arr = np.ascontiguousarray(u_arr) / (1.1876770355823614)
798        elif system == 'KS_d2175':
799            u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, d=21.75, params=params)
800            self.input_size = u_arr.shape[0]
801            u_arr = np.ascontiguousarray(u_arr) / (1.2146066380280796)
802        else:
803            raise ValueError
804
805        self.train_length = ttsplit
806        self.u_arr_train = u_arr[:, :ttsplit + 1]
807
808        # u[ttsplit], the (ttsplit + 1)st element, is the last in u_arr_train and the first in u_arr_test
809        self.u_arr_test = u_arr[:, ttsplit:]

Class for generating training or testing data using one of the test numerical models.

NumericalModel( tau=0.1, int_step=1, T=300, ttsplit=5000, u0=0, system='lorenz', params=array([], shape=(2, 0), dtype=complex128))
769    def __init__(self, tau=0.1, int_step=1, T=300, ttsplit=5000, u0=0, system='lorenz',
770                 params=np.array([[], []], dtype=np.complex128)):
771        """Creates the NumericalModel object.
772        Args:
773            self: NumericalModel object.
774            tau: Time step between measurements of the system dynamics.
775            int_step: the number of numerical integration steps between each measurement.
776            T: Total number of measurements.
777            ttsplit: Number of measurements to be used in the training data set.
778            u0: Initial condition for integration.
779            system: Name of system to generate data from. Options: 'lorenz', 'KS','KS_d2175'
780            params: Internal parameters for model integration. Currently only used by KS options.
781        Returns:
782            Complete NumericalModel object with precomputed internal parameters."""
783
784        if system == 'lorenz':
785            u_arr = np.ascontiguousarray(lorenzrungekutta(
786                u0[0], u0[1], u0[2], T, tau))
787            self.input_size = 3
788
789            u_arr[0] = (u_arr[0] - 0) / 7.929788629895004
790            u_arr[1] = (u_arr[1] - 0) / 8.9932616136662
791            u_arr[2] = (u_arr[2] - 23.596294463016896) / 8.575917849311919
792            self.params = params
793
794        elif system == 'KS':
795            u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, params=params)
796            self.input_size = u_arr.shape[0]
797            u_arr = np.ascontiguousarray(u_arr) / (1.1876770355823614)
798        elif system == 'KS_d2175':
799            u_arr, self.params = kursiv_predict(u0, tau=tau, T=T, d=21.75, params=params)
800            self.input_size = u_arr.shape[0]
801            u_arr = np.ascontiguousarray(u_arr) / (1.2146066380280796)
802        else:
803            raise ValueError
804
805        self.train_length = ttsplit
806        self.u_arr_train = u_arr[:, :ttsplit + 1]
807
808        # u[ttsplit], the (ttsplit + 1)st element, is the last in u_arr_train and the first in u_arr_test
809        self.u_arr_test = u_arr[:, ttsplit:]

Creates the NumericalModel object.

Args
  • self: NumericalModel object.
  • tau: Time step between measurements of the system dynamics.
  • int_step: the number of numerical integration steps between each measurement.
  • T: Total number of measurements.
  • ttsplit: Number of measurements to be used in the training data set.
  • u0: Initial condition for integration.
  • system: Name of system to generate data from. Options: 'lorenz', 'KS','KS_d2175'
  • params: Internal parameters for model integration. Currently only used by KS options.
Returns

Complete NumericalModel object with precomputed internal parameters.

class ResPreds:
812class ResPreds:
813    """Class for loading and storing prediction time series data generated from reservoir computer tests."""
814    def __init__(self, run_opts):
815        """Loads the prediction data from .csv files.
816        Args:
817            run_opts: RunOpts object containing the run parameters."""
818        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
819        self.noise_vals = run_opts.noise_values_array
820        self.reg_train_vals = run_opts.reg_train_times
821        self.reg_vals = run_opts.reg_values
822        print('Starding data read...')
823        # print(self.pred_folder)
824        self.preds = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size,
825                               self.reg_train_vals.size, self.reg_vals.size), dtype=object)
826        total_vals = self.preds.size
827        with tqdm(total=total_vals) as pbar:
828            for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product(
829                    enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)),
830                    enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)),
831                    enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)),
832                    enumerate(self.noise_vals),
833                    enumerate(self.reg_train_vals), enumerate(self.reg_vals)):
834                filename = get_filename(self.pred_folder, 'pred', res, train, noise, reg_train, reg = reg, test_idx = test)
835                if os.name == 'nt' and len(filename) >= 260:
836                    filename = get_windows_path(filename)
837                self.preds[i, j, k, l, m, n] = np.loadtxt(filename, delimiter=',')
838                pbar.update(1)

Class for loading and storing prediction time series data generated from reservoir computer tests.

ResPreds(run_opts)
814    def __init__(self, run_opts):
815        """Loads the prediction data from .csv files.
816        Args:
817            run_opts: RunOpts object containing the run parameters."""
818        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
819        self.noise_vals = run_opts.noise_values_array
820        self.reg_train_vals = run_opts.reg_train_times
821        self.reg_vals = run_opts.reg_values
822        print('Starding data read...')
823        # print(self.pred_folder)
824        self.preds = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size,
825                               self.reg_train_vals.size, self.reg_vals.size), dtype=object)
826        total_vals = self.preds.size
827        with tqdm(total=total_vals) as pbar:
828            for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product(
829                    enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)),
830                    enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)),
831                    enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)),
832                    enumerate(self.noise_vals),
833                    enumerate(self.reg_train_vals), enumerate(self.reg_vals)):
834                filename = get_filename(self.pred_folder, 'pred', res, train, noise, reg_train, reg = reg, test_idx = test)
835                if os.name == 'nt' and len(filename) >= 260:
836                    filename = get_windows_path(filename)
837                self.preds[i, j, k, l, m, n] = np.loadtxt(filename, delimiter=',')
838                pbar.update(1)

Loads the prediction data from .csv files.

Args
  • run_opts: RunOpts object containing the run parameters.
class ResPmap:
841class ResPmap:
842    """Class for loading and storing Poincare maximum map data from reservoir predictions
843    generated from reservoir computer tests."""
844    def __init__(self, run_opts):
845        """Loads the Poincare maxmimum map data from .csv files.
846        Args:
847            run_opts: RunOpts object containing the run parameters."""
848        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
849        self.noise_vals = run_opts.noise_values_array
850        self.reg_train_vals = run_opts.reg_train_times
851        self.reg_vals = run_opts.reg_values
852        print('Starding data read...')
853        # print(self.pred_folder)
854        self.pmap_max = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size,
855                                  self.reg_train_vals.size, self.reg_vals.size), dtype=object)
856        total_vals = self.pmap_max.size
857        with tqdm(total=total_vals) as pbar:
858            for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product(
859                    enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)),
860                    enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)),
861                    enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)),
862                    enumerate(self.noise_vals),
863                    enumerate(self.reg_train_vals), enumerate(self.reg_vals)):
864                filename = get_filename(self.pred_folder, 'pmap_max', res, train, noise, reg_train, reg = reg, test_idx = test)
865                if os.name == 'nt' and len(filename) >= 260:
866                    filename = get_windows_path(filename)
867                pmap_in = np.loadtxt(filename, delimiter=',')
868                pmap_max = [pmap_in[o, pmap_in[o] != 0.] for o in range(pmap_in.shape[0])]
869                self.pmap_max[i, j, k, l, m, n] = pmap_max
870                pbar.update(1)

Class for loading and storing Poincare maximum map data from reservoir predictions generated from reservoir computer tests.

ResPmap(run_opts)
844    def __init__(self, run_opts):
845        """Loads the Poincare maxmimum map data from .csv files.
846        Args:
847            run_opts: RunOpts object containing the run parameters."""
848        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
849        self.noise_vals = run_opts.noise_values_array
850        self.reg_train_vals = run_opts.reg_train_times
851        self.reg_vals = run_opts.reg_values
852        print('Starding data read...')
853        # print(self.pred_folder)
854        self.pmap_max = np.zeros((run_opts.res_per_test, run_opts.num_trains, run_opts.num_tests, self.noise_vals.size,
855                                  self.reg_train_vals.size, self.reg_vals.size), dtype=object)
856        total_vals = self.pmap_max.size
857        with tqdm(total=total_vals) as pbar:
858            for (i, res), (j, train), (k, test), (l, noise), (m, reg_train), (n, reg) in product(
859                    enumerate(np.arange(run_opts.res_start, run_opts.res_start + run_opts.res_per_test)),
860                    enumerate(np.arange(run_opts.train_start, run_opts.train_start + run_opts.num_trains)),
861                    enumerate(np.arange(run_opts.test_start, run_opts.test_start + run_opts.num_tests)),
862                    enumerate(self.noise_vals),
863                    enumerate(self.reg_train_vals), enumerate(self.reg_vals)):
864                filename = get_filename(self.pred_folder, 'pmap_max', res, train, noise, reg_train, reg = reg, test_idx = test)
865                if os.name == 'nt' and len(filename) >= 260:
866                    filename = get_windows_path(filename)
867                pmap_in = np.loadtxt(filename, delimiter=',')
868                pmap_max = [pmap_in[o, pmap_in[o] != 0.] for o in range(pmap_in.shape[0])]
869                self.pmap_max[i, j, k, l, m, n] = pmap_max
870                pbar.update(1)

Loads the Poincare maxmimum map data from .csv files.

Args
  • run_opts: RunOpts object containing the run parameters.
class ResData:
 873class ResData:
 874    """Class for loading and storing prediction analysis data generated from reservoir computer tests."""
 875    def __init__(self, run_opts):
 876        """Loads the prediction analysis data from a compressed pandas data file.
 877            Args:
 878                run_opts: RunOpts object containing the run parameters."""
 879        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
 880        print('Starding data read...')
 881        tic = time.perf_counter()
 882        # print(self.data_filename)
 883        if os.name == 'nt' and len(self.data_filename) >= 260:
 884            self.data_filename = get_windows_path(self.data_filename)
 885        self.data = pd.read_csv(self.data_filename, index_col=0)
 886        toc = time.perf_counter()
 887        print('Data reading finished in %0.2f sec.' % (toc - tic))
 888        self.res = pd.unique(self.data['res'])
 889        self.train = pd.unique(self.data['train'])
 890        self.test = pd.unique(self.data['test'])
 891        self.noise = pd.unique(self.data['noise'])
 892        self.reg = pd.unique(self.data['reg'])
 893        self.reg_train = pd.unique(self.data['reg_train'])
 894        self.nan = pd.isna(self.data['variance'])
 895
 896    def shape(self):
 897        """Returns the shape of the pandas data frame"""
 898        return self.data.shape
 899
 900    def size(self):
 901        """Returns the size of the pandas data frame"""
 902        return self.data.size
 903
 904    def data_slice(self, res=np.array([]), train=np.array([]), test=np.array([]),
 905                   reg_train=np.array([]), noise=np.array([]), reg=np.array([]), median_flag=False,
 906                   reduce_axes=[], metric='', gross_frac_metric='valid_time', gross_err_bnd=1e2,
 907                   reduce_fun=pd.DataFrame.median):
 908        """Slices and/or finds the best results using a metric computed by the reduce_fun.
 909        Args:
 910            self: ResPreds object
 911            res: Indices for reservoir results to be returned/optimized over. If left as an empty array, uses all indices.
 912            train: Indices for training data set results to be returned/optimized over. If left as an empty array, uses all indices.
 913            test: Indices for testing data set results to be returned/optimized over. If left as an empty array, uses all indices.
 914            reg_train: Number of training data points for regularization to be returned/optimized over. If left as an empty array, uses all values.
 915            noise: Noise/Jacobian/LMNT regularization parameter value results to be returned/optimized over. If left as an empty array, uses all values.
 916            reg: Tikhonov regularization paramter value results to be returned/optimized over. If left as an empty array, uses all values.
 917            median_flag: Boolean indicating whether the data should be optimized.
 918            reduce_axes: List containing the axes to be optimized over. Elements can be 'res', 'train', 'test', 'reg_train', 'noise', or 'reg'.
 919            metric: Metric to be used to compute which parameters give the best result. Options are:
 920                'gross_frac': Lowest fraction of gross error
 921                'mean_rms': Lowest mean map error.
 922                'max_rms: Lowest maximum map error.
 923                'valid_time': Highest valid prediction time.'
 924            gross_frac_metrix: If using 'gross_frac' as a metric, this is the secondary metric that will be used if multiple parameters give equally good prediction results.
 925            gross_err_bnd: The cutoff in the mean map error above which predictions are considered to have gross error.
 926            reduce_fun: Function for computing the overall performance for a given set of parameters over many tests.
 927        Returns:
 928            Pandas DataFrame containing the sliced and optimized prediction results.
 929        Raises:
 930            ValueError: If any of the inputs are not recognized/incompatible."""
 931        input_list = [res, train, test, reg_train, noise, reg]
 932        name_list = np.array(['res', 'train', 'test', 'reg_train', 'noise', 'reg'])
 933        data_names = [name for name in self.data.columns if name not in name_list]
 934        base_list = [self.res, self.train, self.test, self.reg_train, self.noise, self.reg]
 935        slice_vals = np.zeros(len(input_list), dtype=object)
 936        if median_flag:
 937            if not isinstance(reduce_axes, list):
 938                print('reduce_axes must be a list.')
 939                return ValueError
 940            elif len(reduce_axes) == 0:
 941                print('median_flag is True, but no axes to compute the median over are specified.')
 942                raise ValueError
 943            elif not all(axis in ['res', 'train', 'test', 'noise', 'reg','reg_train'] for axis in reduce_axes) or \
 944                    len(reduce_axes) != len(set(reduce_axes)):
 945                print('reduce_axis max only contain res, train, test, noise, or reg.')
 946                raise ValueError
 947            elif metric not in ['', 'mean_rms', 'max_rms', 'valid_time', 'pmap_max_wass_dist', 'gross_frac']:
 948                print('Metric not recognized.')
 949                raise ValueError
 950            elif len(metric) == 0 and any(axis in ['noise', 'reg'] for axis in reduce_axes):
 951                print('Cannot reduce axes with unspecified metric.')
 952                raise ValueError
 953
 954        for i, item in enumerate(input_list):
 955            if isinstance(item, np.ndarray):
 956                if item.size == 0:
 957                    slice_vals[i] = base_list[i]
 958                else:
 959                    slice_vals[i] = item
 960            elif isinstance(item, list):
 961                if len(item) == 0:
 962                    slice_vals[i] = self.item
 963                else:
 964                    slice_vals[i] = np.array(item)
 965            else:
 966                slice_vals[i] = np.array([item])
 967
 968        sliced_data = self.data
 969        for name, slice_val in zip(name_list, slice_vals):
 970            sliced_data = sliced_data[sliced_data[name].isin(slice_val)]
 971
 972        if not median_flag:
 973            return sliced_data
 974        elif median_flag:
 975            median_slice_data = pd.DataFrame()
 976            remaining_vars = [var not in reduce_axes for var in name_list[:3]]
 977            remaining_vars.extend([True, True, True])
 978            if np.all(remaining_vars):
 979                median_slice_data = sliced_data
 980                nans = pd.isna(median_slice_data['mean_rms'])
 981                near_nans = median_slice_data['mean_rms'] > gross_err_bnd
 982                median_slice_data['gross_count'] = np.zeros(median_slice_data.shape[0])
 983                median_slice_data['gross_frac'] = np.zeros(median_slice_data.shape[0])
 984                median_slice_data.loc[nans | near_nans, 'gross_count'] = 1.0
 985                median_slice_data.loc[nans | near_nans, 'gross_frac'] = 1.0
 986            else:
 987                total_vars = 1
 988                for slice_val in slice_vals[remaining_vars]:
 989                    total_vars *= slice_val.size
 990                for vars_set in product(*slice_vals[remaining_vars]):
 991                    row_dict = {}
 992                    reduced_sliced_data = sliced_data
 993                    for var, name in zip(vars_set, name_list[remaining_vars]):
 994                        row_dict[name] = var
 995                        reduced_sliced_data = reduced_sliced_data[reduced_sliced_data[name] == var]
 996                    if reduced_sliced_data.size == 0:
 997                        for name in data_names:
 998                            if 'variance' in name:
 999                                row_dict[name] = 0.
1000                            elif 'valid_time' not in name:
1001                                row_dict[name] = 1e10
1002                        row_dict['valid_time'] = 0.
1003                        row_dict['gross_count'] = 0.
1004                        row_dict['gross_frac'] = 1.0
1005                        row_dict['data_present'] = False
1006                    else:
1007                        nans = pd.isna(reduced_sliced_data['mean_rms'])
1008                        near_nans = reduced_sliced_data['mean_rms'] > gross_err_bnd
1009                        nan_count = reduced_sliced_data[nans | near_nans].shape[0]
1010                        nan_frac = nan_count / reduced_sliced_data.shape[0]
1011                        valid_times = np.array([])
1012                        for name in data_names:
1013                            if 'variance' in name:
1014                                row_dict[name] = reduce_fun(reduced_sliced_data.loc[~nans, name])
1015                            elif 'valid_time' not in name:
1016                                reduced_sliced_data.loc[nans, name] = 1e10
1017                                row_dict[name] = reduce_fun(reduced_sliced_data[name])
1018                            else:
1019                                reduced_sliced_data.loc[pd.isna(reduced_sliced_data[name]), name] = 0.0
1020                                valid_times = np.append(valid_times, reduced_sliced_data[name].to_numpy())
1021                        row_dict['valid_time'] = reduce_fun(pd.DataFrame(valid_times)).to_numpy()[0]
1022                        row_dict['gross_count'] = nan_count
1023                        row_dict['gross_frac'] = nan_frac
1024                        row_dict['data_present'] = True
1025                    for key in row_dict.keys():
1026                        if not isinstance(row_dict[key], list) and not isinstance(row_dict[key], np.ndarray):
1027                            row_dict[key] = [row_dict[key]]
1028                    median_slice_data = pd.concat([median_slice_data, pd.DataFrame(row_dict)], ignore_index=True,
1029                                                  verify_integrity=True)
1030            if len(metric) == 0:
1031                return median_slice_data
1032            else:
1033                best_median_slice_data = pd.DataFrame()
1034                remaining_vars = [var not in reduce_axes for var in name_list]
1035                total_vars = 1
1036                for slice_val in slice_vals[remaining_vars]:
1037                    total_vars *= slice_val.size
1038                for vars_set in product(*slice_vals[remaining_vars]):
1039                    row_dict = {}
1040                    best_reduced_slice_data = median_slice_data
1041                    for var, name in zip(vars_set, name_list[remaining_vars]):
1042                        row_dict[name] = var
1043                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data[name] == var]
1044                    if metric == 'valid_time':
1045                        best_reduced_slice_data = best_reduced_slice_data[
1046                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].max()]
1047                    elif metric == 'gross_frac':
1048                        best_reduced_slice_data = best_reduced_slice_data[
1049                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()]
1050                        if len(best_reduced_slice_data.shape) != 1:
1051                            if gross_frac_metric == 'valid_time':
1052                                best_reduced_slice_data = best_reduced_slice_data[
1053                                    best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[
1054                                        gross_frac_metric].max()]
1055                            else:
1056                                best_reduced_slice_data = best_reduced_slice_data[
1057                                    best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[
1058                                        gross_frac_metric].min()]
1059                    else:
1060                        best_reduced_slice_data = best_reduced_slice_data[
1061                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()]
1062                    if len(best_reduced_slice_data.shape) != 1:
1063                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['noise'] ==
1064                                                                          best_reduced_slice_data['noise'].min()]
1065                    if len(best_reduced_slice_data.shape) != 1:
1066                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['reg'] ==
1067                                                                          best_reduced_slice_data['reg'].min()]
1068                    for key in best_reduced_slice_data.keys():
1069                        if not isinstance(best_reduced_slice_data[key], list) and not isinstance(
1070                                best_reduced_slice_data[key], np.ndarray):
1071                            best_reduced_slice_data[key] = [best_reduced_slice_data[key]]
1072                    best_median_slice_data = pd.concat([best_median_slice_data, best_reduced_slice_data],
1073                                                       ignore_index=True, verify_integrity=True)
1074                return best_median_slice_data

Class for loading and storing prediction analysis data generated from reservoir computer tests.

ResData(run_opts)
875    def __init__(self, run_opts):
876        """Loads the prediction analysis data from a compressed pandas data file.
877            Args:
878                run_opts: RunOpts object containing the run parameters."""
879        self.data_filename, self.pred_folder = run_opts.run_file_name, run_opts.run_folder_name
880        print('Starding data read...')
881        tic = time.perf_counter()
882        # print(self.data_filename)
883        if os.name == 'nt' and len(self.data_filename) >= 260:
884            self.data_filename = get_windows_path(self.data_filename)
885        self.data = pd.read_csv(self.data_filename, index_col=0)
886        toc = time.perf_counter()
887        print('Data reading finished in %0.2f sec.' % (toc - tic))
888        self.res = pd.unique(self.data['res'])
889        self.train = pd.unique(self.data['train'])
890        self.test = pd.unique(self.data['test'])
891        self.noise = pd.unique(self.data['noise'])
892        self.reg = pd.unique(self.data['reg'])
893        self.reg_train = pd.unique(self.data['reg_train'])
894        self.nan = pd.isna(self.data['variance'])

Loads the prediction analysis data from a compressed pandas data file.

Args
  • run_opts: RunOpts object containing the run parameters.
def shape(self):
896    def shape(self):
897        """Returns the shape of the pandas data frame"""
898        return self.data.shape

Returns the shape of the pandas data frame

def size(self):
900    def size(self):
901        """Returns the size of the pandas data frame"""
902        return self.data.size

Returns the size of the pandas data frame

def data_slice( self, res=array([], dtype=float64), train=array([], dtype=float64), test=array([], dtype=float64), reg_train=array([], dtype=float64), noise=array([], dtype=float64), reg=array([], dtype=float64), median_flag=False, reduce_axes=[], metric='', gross_frac_metric='valid_time', gross_err_bnd=100.0, reduce_fun=<function NDFrame._add_numeric_operations.<locals>.median>):
 904    def data_slice(self, res=np.array([]), train=np.array([]), test=np.array([]),
 905                   reg_train=np.array([]), noise=np.array([]), reg=np.array([]), median_flag=False,
 906                   reduce_axes=[], metric='', gross_frac_metric='valid_time', gross_err_bnd=1e2,
 907                   reduce_fun=pd.DataFrame.median):
 908        """Slices and/or finds the best results using a metric computed by the reduce_fun.
 909        Args:
 910            self: ResPreds object
 911            res: Indices for reservoir results to be returned/optimized over. If left as an empty array, uses all indices.
 912            train: Indices for training data set results to be returned/optimized over. If left as an empty array, uses all indices.
 913            test: Indices for testing data set results to be returned/optimized over. If left as an empty array, uses all indices.
 914            reg_train: Number of training data points for regularization to be returned/optimized over. If left as an empty array, uses all values.
 915            noise: Noise/Jacobian/LMNT regularization parameter value results to be returned/optimized over. If left as an empty array, uses all values.
 916            reg: Tikhonov regularization paramter value results to be returned/optimized over. If left as an empty array, uses all values.
 917            median_flag: Boolean indicating whether the data should be optimized.
 918            reduce_axes: List containing the axes to be optimized over. Elements can be 'res', 'train', 'test', 'reg_train', 'noise', or 'reg'.
 919            metric: Metric to be used to compute which parameters give the best result. Options are:
 920                'gross_frac': Lowest fraction of gross error
 921                'mean_rms': Lowest mean map error.
 922                'max_rms: Lowest maximum map error.
 923                'valid_time': Highest valid prediction time.'
 924            gross_frac_metrix: If using 'gross_frac' as a metric, this is the secondary metric that will be used if multiple parameters give equally good prediction results.
 925            gross_err_bnd: The cutoff in the mean map error above which predictions are considered to have gross error.
 926            reduce_fun: Function for computing the overall performance for a given set of parameters over many tests.
 927        Returns:
 928            Pandas DataFrame containing the sliced and optimized prediction results.
 929        Raises:
 930            ValueError: If any of the inputs are not recognized/incompatible."""
 931        input_list = [res, train, test, reg_train, noise, reg]
 932        name_list = np.array(['res', 'train', 'test', 'reg_train', 'noise', 'reg'])
 933        data_names = [name for name in self.data.columns if name not in name_list]
 934        base_list = [self.res, self.train, self.test, self.reg_train, self.noise, self.reg]
 935        slice_vals = np.zeros(len(input_list), dtype=object)
 936        if median_flag:
 937            if not isinstance(reduce_axes, list):
 938                print('reduce_axes must be a list.')
 939                return ValueError
 940            elif len(reduce_axes) == 0:
 941                print('median_flag is True, but no axes to compute the median over are specified.')
 942                raise ValueError
 943            elif not all(axis in ['res', 'train', 'test', 'noise', 'reg','reg_train'] for axis in reduce_axes) or \
 944                    len(reduce_axes) != len(set(reduce_axes)):
 945                print('reduce_axis max only contain res, train, test, noise, or reg.')
 946                raise ValueError
 947            elif metric not in ['', 'mean_rms', 'max_rms', 'valid_time', 'pmap_max_wass_dist', 'gross_frac']:
 948                print('Metric not recognized.')
 949                raise ValueError
 950            elif len(metric) == 0 and any(axis in ['noise', 'reg'] for axis in reduce_axes):
 951                print('Cannot reduce axes with unspecified metric.')
 952                raise ValueError
 953
 954        for i, item in enumerate(input_list):
 955            if isinstance(item, np.ndarray):
 956                if item.size == 0:
 957                    slice_vals[i] = base_list[i]
 958                else:
 959                    slice_vals[i] = item
 960            elif isinstance(item, list):
 961                if len(item) == 0:
 962                    slice_vals[i] = self.item
 963                else:
 964                    slice_vals[i] = np.array(item)
 965            else:
 966                slice_vals[i] = np.array([item])
 967
 968        sliced_data = self.data
 969        for name, slice_val in zip(name_list, slice_vals):
 970            sliced_data = sliced_data[sliced_data[name].isin(slice_val)]
 971
 972        if not median_flag:
 973            return sliced_data
 974        elif median_flag:
 975            median_slice_data = pd.DataFrame()
 976            remaining_vars = [var not in reduce_axes for var in name_list[:3]]
 977            remaining_vars.extend([True, True, True])
 978            if np.all(remaining_vars):
 979                median_slice_data = sliced_data
 980                nans = pd.isna(median_slice_data['mean_rms'])
 981                near_nans = median_slice_data['mean_rms'] > gross_err_bnd
 982                median_slice_data['gross_count'] = np.zeros(median_slice_data.shape[0])
 983                median_slice_data['gross_frac'] = np.zeros(median_slice_data.shape[0])
 984                median_slice_data.loc[nans | near_nans, 'gross_count'] = 1.0
 985                median_slice_data.loc[nans | near_nans, 'gross_frac'] = 1.0
 986            else:
 987                total_vars = 1
 988                for slice_val in slice_vals[remaining_vars]:
 989                    total_vars *= slice_val.size
 990                for vars_set in product(*slice_vals[remaining_vars]):
 991                    row_dict = {}
 992                    reduced_sliced_data = sliced_data
 993                    for var, name in zip(vars_set, name_list[remaining_vars]):
 994                        row_dict[name] = var
 995                        reduced_sliced_data = reduced_sliced_data[reduced_sliced_data[name] == var]
 996                    if reduced_sliced_data.size == 0:
 997                        for name in data_names:
 998                            if 'variance' in name:
 999                                row_dict[name] = 0.
1000                            elif 'valid_time' not in name:
1001                                row_dict[name] = 1e10
1002                        row_dict['valid_time'] = 0.
1003                        row_dict['gross_count'] = 0.
1004                        row_dict['gross_frac'] = 1.0
1005                        row_dict['data_present'] = False
1006                    else:
1007                        nans = pd.isna(reduced_sliced_data['mean_rms'])
1008                        near_nans = reduced_sliced_data['mean_rms'] > gross_err_bnd
1009                        nan_count = reduced_sliced_data[nans | near_nans].shape[0]
1010                        nan_frac = nan_count / reduced_sliced_data.shape[0]
1011                        valid_times = np.array([])
1012                        for name in data_names:
1013                            if 'variance' in name:
1014                                row_dict[name] = reduce_fun(reduced_sliced_data.loc[~nans, name])
1015                            elif 'valid_time' not in name:
1016                                reduced_sliced_data.loc[nans, name] = 1e10
1017                                row_dict[name] = reduce_fun(reduced_sliced_data[name])
1018                            else:
1019                                reduced_sliced_data.loc[pd.isna(reduced_sliced_data[name]), name] = 0.0
1020                                valid_times = np.append(valid_times, reduced_sliced_data[name].to_numpy())
1021                        row_dict['valid_time'] = reduce_fun(pd.DataFrame(valid_times)).to_numpy()[0]
1022                        row_dict['gross_count'] = nan_count
1023                        row_dict['gross_frac'] = nan_frac
1024                        row_dict['data_present'] = True
1025                    for key in row_dict.keys():
1026                        if not isinstance(row_dict[key], list) and not isinstance(row_dict[key], np.ndarray):
1027                            row_dict[key] = [row_dict[key]]
1028                    median_slice_data = pd.concat([median_slice_data, pd.DataFrame(row_dict)], ignore_index=True,
1029                                                  verify_integrity=True)
1030            if len(metric) == 0:
1031                return median_slice_data
1032            else:
1033                best_median_slice_data = pd.DataFrame()
1034                remaining_vars = [var not in reduce_axes for var in name_list]
1035                total_vars = 1
1036                for slice_val in slice_vals[remaining_vars]:
1037                    total_vars *= slice_val.size
1038                for vars_set in product(*slice_vals[remaining_vars]):
1039                    row_dict = {}
1040                    best_reduced_slice_data = median_slice_data
1041                    for var, name in zip(vars_set, name_list[remaining_vars]):
1042                        row_dict[name] = var
1043                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data[name] == var]
1044                    if metric == 'valid_time':
1045                        best_reduced_slice_data = best_reduced_slice_data[
1046                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].max()]
1047                    elif metric == 'gross_frac':
1048                        best_reduced_slice_data = best_reduced_slice_data[
1049                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()]
1050                        if len(best_reduced_slice_data.shape) != 1:
1051                            if gross_frac_metric == 'valid_time':
1052                                best_reduced_slice_data = best_reduced_slice_data[
1053                                    best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[
1054                                        gross_frac_metric].max()]
1055                            else:
1056                                best_reduced_slice_data = best_reduced_slice_data[
1057                                    best_reduced_slice_data[gross_frac_metric] == best_reduced_slice_data[
1058                                        gross_frac_metric].min()]
1059                    else:
1060                        best_reduced_slice_data = best_reduced_slice_data[
1061                            best_reduced_slice_data[metric] == best_reduced_slice_data[metric].min()]
1062                    if len(best_reduced_slice_data.shape) != 1:
1063                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['noise'] ==
1064                                                                          best_reduced_slice_data['noise'].min()]
1065                    if len(best_reduced_slice_data.shape) != 1:
1066                        best_reduced_slice_data = best_reduced_slice_data[best_reduced_slice_data['reg'] ==
1067                                                                          best_reduced_slice_data['reg'].min()]
1068                    for key in best_reduced_slice_data.keys():
1069                        if not isinstance(best_reduced_slice_data[key], list) and not isinstance(
1070                                best_reduced_slice_data[key], np.ndarray):
1071                            best_reduced_slice_data[key] = [best_reduced_slice_data[key]]
1072                    best_median_slice_data = pd.concat([best_median_slice_data, best_reduced_slice_data],
1073                                                       ignore_index=True, verify_integrity=True)
1074                return best_median_slice_data

Slices and/or finds the best results using a metric computed by the reduce_fun.

Args
  • self: ResPreds object
  • res: Indices for reservoir results to be returned/optimized over. If left as an empty array, uses all indices.
  • train: Indices for training data set results to be returned/optimized over. If left as an empty array, uses all indices.
  • test: Indices for testing data set results to be returned/optimized over. If left as an empty array, uses all indices.
  • reg_train: Number of training data points for regularization to be returned/optimized over. If left as an empty array, uses all values.
  • noise: Noise/Jacobian/LMNT regularization parameter value results to be returned/optimized over. If left as an empty array, uses all values.
  • reg: Tikhonov regularization paramter value results to be returned/optimized over. If left as an empty array, uses all values.
  • median_flag: Boolean indicating whether the data should be optimized.
  • reduce_axes: List containing the axes to be optimized over. Elements can be 'res', 'train', 'test', 'reg_train', 'noise', or 'reg'.
  • metric: Metric to be used to compute which parameters give the best result. Options are: 'gross_frac': Lowest fraction of gross error 'mean_rms': Lowest mean map error. 'max_rms: Lowest maximum map error. 'valid_time': Highest valid prediction time.'
  • gross_frac_metrix: If using 'gross_frac' as a metric, this is the secondary metric that will be used if multiple parameters give equally good prediction results.
  • gross_err_bnd: The cutoff in the mean map error above which predictions are considered to have gross error.
  • reduce_fun: Function for computing the overall performance for a given set of parameters over many tests.
Returns

Pandas DataFrame containing the sliced and optimized prediction results.

Raises
  • ValueError: If any of the inputs are not recognized/incompatible.