Source code for pysmac.utils.smac_output_readers

import json
import functools
import re
import operator

import numpy as np

from pysmac.remote_smac import process_parameter_definitions


[docs]def convert_param_dict_types(param_dict, pcs): _, parser_dict = process_parameter_definitions(pcs) for k in param_dict: param_dict[k] = parser_dict[k](param_dict[k]) return(param_dict)
[docs]def json_parse(fileobj, decoder=json.JSONDecoder(), buffersize=2048): """ Small function to parse a file containing JSON objects separated by a new line. This format is used in the live-rundata-xx.json files produces by SMAC. taken from http://stackoverflow.com/questions/21708192/how-do-i-use-the-json-module-to-read-in-one-json-object-at-a-time/21709058#21709058 """ buffer = '' for chunk in iter(functools.partial(fileobj.read, buffersize), ''): buffer += chunk buffer = buffer.strip(' \n') while buffer: try: result, index = decoder.raw_decode(buffer) yield result buffer = buffer[index:] except ValueError: # Not enough data to decode, read more break
[docs]def read_runs_and_results_file(fn): """ Converting a runs_and_results file into a numpy array. Almost all entries in a runs_and_results file are numeric to begin with. Only the 14th column contains the status which is encoded as ints by SAT = 1, UNSAT = 0, TIMEOUT = -1, everything else = -2. \n +-------+----------------+ | Value | Representation | +=======+================+ |SAT | 2 | +-------+----------------+ |UNSAT | 1 | +-------+----------------+ |TIMEOUT| 0 | +-------+----------------+ |Others | -1 | +-------+----------------+ :returns: numpy_array(dtype = double) -- the data """ # to convert everything into floats, the run result needs to be mapped def map_run_result(res): if b'TIMEOUT' in res: return(0) if b'UNSAT' in res: return(1) # note UNSAT before SAT, b/c UNSAT contains SAT! if b'SAT' in res: return(2) return(-1) # covers ABORT, CRASHED, but that shouldn't happen return(np.loadtxt(fn, skiprows=1, delimiter=',', usecols = list(range(1,14))+[15], # skip empty 'algorithm run data' column converters={13:map_run_result}, ndmin=2))
[docs]def read_paramstrings_file(fn): """ Function to read a paramstring file. Every line in this file corresponds to a full configuration. Everything is stored as strings and without knowledge about the pcs, converting that into any other type would involve guessing, which we shall not do here. :param fn: the name of the paramstring file :type fn: str :returns: dict -- with key-value pairs 'parameter name'-'value as string' """ param_dict_list = [] with open(fn,'r') as fh: for line in fh.readlines(): # remove run id and single quotes line = line[line.find(':')+1:].replace("'","") pairs = [s.strip().split("=") for s in line.split(',')] param_dict_list.append({k:v for [k, v] in pairs}) return(param_dict_list)
[docs]def read_validationCallStrings_file(fn): """Reads a validationCallString file into a list of dictionaries. :returns: list of dicts -- each dictionary contains 'parameter name' and 'parameter value as string' key-value pairs """ param_dict_list = [] with open(fn,'r') as fh: for line in fh.readlines()[1:]: # skip header line config_string = line.split(",")[1].strip('"') config_string = config_string.split(' ') tmp_dict = {} for i in range(0,len(config_string),2): tmp_dict[config_string[i].lstrip('-')] = config_string[i+1].strip("'") param_dict_list.append(tmp_dict) return(param_dict_list)
[docs]def read_validationObjectiveMatrix_file(fn): """ reads the run data of a validation run performed by SMAC. For cases with instances, not necessarily every instance is used during the configuration phase to estimate a configuration's performance. If validation is enabled, SMAC reruns parameter settings (usually just the final incumbent) on the whole instance set/a designated test set. The data from those runs is stored in separate files. This function reads one of these files. :param fn: the name of the validationObjectiveMatrix file :type fn: str :returns: dict -- configuration ids as keys, list of performances on each instance as values. .. todo:: testing of validation runs where more than the final incumbent is validated """ values = {} with open(fn,'r') as fh: header = fh.readline().split(",") num_configs = len(header)-2 re_string = '\w?,\w?'.join(['"id\_(\d*)"', '"(\d*)"'] + ['"([0-9.]*)"']*num_configs) for line in fh.readlines(): match = (re.match(re_string, line)) values[int(match.group(1))] = list(map(float,list(map(match.group, list(range(3,3+num_configs)))))) return(values)
[docs]def read_trajectory_file(fn): """Reads a trajectory file and returns a list of dicts with all the information. Due to the way SMAC stores every parameter's value as a string, the configuration returned by this function also has every value stored as a string. All other values, like "Estimated Training Preformance" and so on are floats, though. :param fn: name of file to read :type fn: str :returns: list of dicts -- every dict contains the keys: "CPU Time Used","Estimated Training Performance","Wallclock Time","Incumbent ID","Automatic Configurator (CPU) Time","Configuration" """ return_list = [] with open(fn,'r') as fh: header = list(map(lambda s: s.strip('"'), fh.readline().split(","))) l_info = len(header)-1 for line in fh.readlines(): tmp = line.split(",") tmp_dict = {} for i in range(l_info): tmp_dict[header[i]] = float(tmp[i]) tmp_dict['Configuration'] = {} for i in range(l_info, len(tmp)): name, value = tmp[i].strip().split("=") tmp_dict['Configuration'][name] = value.strip("'").strip('"') return_list.append(tmp_dict) return(return_list)
[docs]def read_instances_file(fn): """Reads the instance names from an instace file :param fn: name of file to read :type fn: str :returns: list -- each element is a list where the first element is the instance name followed by additional information for the specific instance. """ with open(fn,'r') as fh: instance_names = fh.readlines() return([s.strip().split() for s in instance_names])
[docs]def read_instance_features_file(fn): """Function to read a instance_feature file. :returns: tuple -- first entry is a list of the feature names, second one is a dict with 'instance name' - 'numpy array containing the features' key-value pairs """ instances = {} with open(fn,'r') as fh: lines = fh.readlines() for line in lines[1:]: tmp = line.strip().split(",") instances[tmp[0]] = np.array(tmp[1:],dtype=np.double) return(lines[0].split(",")[1:], instances)