import numpy as np
import os
import sys
from typing import Union
from kgcnn.data.utils import save_yaml_file, load_pickle_file
from datetime import datetime
from kgcnn import __kgcnn_version__
import keras as ks
import keras.callbacks
from keras.backend import backend
from kgcnn.utils.devices import check_device
[docs]def load_history_list(file_path, folds):
history_list = []
for i in range(folds):
file_path_i = str(file_path).replace("(i)", str(i))
if os.path.exists(file_path_i):
history_list.append(load_pickle_file(file_path_i))
return history_list
load_time_list = load_history_list
[docs]def save_history_score(
histories: list,
filepath: str = None,
loss_name: str = None,
val_loss_name: str = None,
data_unit: str = "",
model_name: str = "",
file_name: str = "score.yaml",
model_version: str = "",
dataset_name: str = "",
model_class: str = "",
execute_folds: Union[list, int, None] = None,
multi_target_indices: Union[list, int, None] = None,
trajectory_name: str = None,
seed: int = None,
time_list: list = None
):
r"""Save fit results from fit histories to file.
This function is used in training scripts to record final training and validation metrics.
Args:
histories (list): List of :obj:`tf.keras.callbacks.History()` objects.
filepath (str): Full path where to save plot to, without the name of the file. Default is "".
loss_name (str): Which loss or metric to pick from history. Default is "loss".
val_loss_name (str): Which validation loss or metric to pick from history. Default is "val_loss".
data_unit (str): Unit of the loss. Default is "".
model_name (str): Name of the model. Default is "".
file_name (str): File name base. Model name and dataset will be added to the name. Default is "".
model_version (str): Version of the model. Default is "".
dataset_name (str): Name of the dataset which was fitted to. Default is "".
model_class (str): Model class or generator. Default is "".
execute_folds (list, int): Folds which where executed.
multi_target_indices (list): List of indices for multi target training. Default is None.
trajectory_name (str): Name of the trajectory if known. Default is None.
seed (int): Random seed to log. Default is None.
time_list (list): List of training time info.
Returns:
dict: Score which was saved to file.
"""
histories = [hist.history if isinstance(hist, ks.callbacks.History) else hist for hist in histories]
# We assume multiple fits as in KFold.
if data_unit is None:
data_unit = ""
if loss_name is None:
loss_name = [x for x in list(histories[0].keys()) if "val_" not in x]
if val_loss_name is None:
val_loss_name = [x for x in list(histories[0].keys()) if "val_" in x]
if not isinstance(loss_name, list):
loss_name = [loss_name]
if not isinstance(val_loss_name, list):
val_loss_name = [val_loss_name]
if isinstance(multi_target_indices, list):
multi_target_indices = [int(x) for x in multi_target_indices]
elif multi_target_indices is not None:
multi_target_indices = int(multi_target_indices)
train_loss = []
for x in loss_name:
loss = np.array([np.array(hist[x]) for hist in histories])
train_loss.append(loss)
val_loss = []
for x in val_loss_name:
loss = np.array([hist[x] for hist in histories])
val_loss.append(loss)
result_dict = {}
for i, l in zip(loss_name, train_loss):
result_dict.update({i: [float(x[-1]) for x in l]})
result_dict.update({"max_%s" % i: [float(np.amax(x)) for x in l]})
result_dict.update({"min_%s" % i: [float(np.amin(x)) for x in l]})
for i, l in zip(val_loss_name, val_loss):
result_dict.update({i: [float(x[-1]) for x in l]})
result_dict.update({"max_%s" % i: [float(np.amax(x)) for x in l]})
result_dict.update({"min_%s" % i: [float(np.amin(x)) for x in l]})
result_dict["data_unit"] = str(data_unit)
if len(train_loss) > 0:
result_dict["epochs"] = [int(len(x)) for x in train_loss[0]]
result_dict["date_time"] = str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
result_dict["model_class"] = str(model_class)
result_dict["model_version"] = str(model_version)
result_dict["model_name"] = str(model_name)
result_dict["kgcnn_version"] = str(__kgcnn_version__)
result_dict["number_histories"] = len(histories)
result_dict["multi_target_indices"] = multi_target_indices
result_dict["execute_folds"] = execute_folds
result_dict["time_list"] = time_list
result_dict["seed"] = seed
result_dict["backend"] = backend()
result_dict["OS"] = "%s_%s" % (os.name, sys.platform)
result_dict.update(check_device())
if trajectory_name:
result_dict["trajectory_name"] = trajectory_name
if filepath is not None:
save_yaml_file(result_dict, os.path.join(filepath, "%s_%s_%s" % (model_name, dataset_name, file_name)))
return result_dict