Source code for kgcnn.utils.plots

import os
import matplotlib.pyplot as plt
import numpy as np
import keras as ks
import keras.callbacks


[docs]def plot_train_test_loss(histories: list, loss_name: str = None, val_loss_name: str = None, data_unit: str = "", model_name: str = "", filepath: str = None, file_name: str = "", dataset_name: str = "", figsize: list = None, dpi: float = None, show_fig: bool = True ): r"""Plot training curves for a list of fit results in form of keras history objects. This means, training- and test-loss is plotted vs. epochs for all splits. Args: histories (list): List of :obj:`tf.keras.callbacks.History()` objects. loss_name (str): Which loss or metric to pick from history for plotting. Default is "loss". val_loss_name (str): Which validation loss or metric to pick from history for plotting. Default is "val_loss". data_unit (str): Unit of the loss. Default is "". model_name (str): Name of the model. Default is "". filepath (str): Full path where to save plot to, without the name of the file. Default is "". file_name (str): File name base. Model name and dataset will be added to the name. Default is "". dataset_name (str): Name of the dataset which was fitted to. Default is "". figsize (list): Size of the figure. Default is None. dpi (float): The resolution of the figure in dots-per-inch. Default is None. show_fig (bool): Whether to show figure. Default is True. Returns: matplotlib.pyplot.figure: Figure of the training curves. """ histories = [hist.history if isinstance(hist, ks.callbacks.History) else hist for hist in histories] # We assume multiple fits as in KFold. if data_unit is None: data_unit = "" if loss_name is None: loss_name = [x for x in list(histories[0].keys()) if "val_" not in x] if val_loss_name is None: val_loss_name = [x for x in list(histories[0].keys()) if "val_" in x] if not isinstance(loss_name, list): loss_name = [loss_name] if not isinstance(val_loss_name, list): val_loss_name = [val_loss_name] if not isinstance(data_unit, list): data_unit = [data_unit] if len(data_unit) < len(val_loss_name): data_unit = data_unit + [str(data_unit[-1])]*(len(val_loss_name)-len(data_unit)) train_loss = [] for x in loss_name: loss = np.array([np.array(hist[x]) for hist in histories]) train_loss.append(loss) val_loss = [] for x in val_loss_name: loss = np.array([hist[x] for hist in histories]) val_loss.append(loss) if figsize is None: figsize = [6.4, 4.8] if dpi is None: dpi = 100.0 fig = plt.figure(figsize=figsize, dpi=dpi) for i, x in enumerate(train_loss): vp = plt.plot(np.arange(len(np.mean(x, axis=0))), np.mean(x, axis=0), alpha=0.85, label=loss_name[i]) plt.fill_between(np.arange(len(np.mean(x, axis=0))), np.mean(x, axis=0) - np.std(x, axis=0), np.mean(x, axis=0) + np.std(x, axis=0), color=vp[0].get_color(), alpha=0.2 ) for i, y in enumerate(val_loss): val_step = len(train_loss[i][0]) / len(val_loss[i][0]) vp = plt.plot(np.arange(len(np.mean(y, axis=0))) * val_step + val_step, np.mean(y, axis=0), alpha=0.85, label=val_loss_name[i]) plt.fill_between(np.arange(len(np.mean(y, axis=0))) * val_step + val_step, np.mean(y, axis=0) - np.std(y, axis=0), np.mean(y, axis=0) + np.std(y, axis=0), color=vp[0].get_color(), alpha=0.2 ) plt.scatter([len(train_loss[i][0])], [np.mean(y, axis=0)[-1]], label=r"{0}: {1:0.4f} $\pm$ {2:0.4f} ".format( val_loss_name[i], np.mean(y, axis=0)[-1], np.std(y, axis=0)[-1]) + data_unit[i], color=vp[0].get_color() ) plt.xlabel('Epochs') plt.ylabel('Loss') plt.title("%s training curve for %s" % (dataset_name, model_name)) plt.legend(loc='upper right', fontsize='small') if filepath is not None: plt.savefig(os.path.join(filepath, model_name + "_" + dataset_name + "_" + file_name)) if show_fig: plt.show() return fig
[docs]def plot_predict_true(y_predict, y_true, data_unit: list = None, model_name: str = "", filepath: str = None, file_name: str = "", dataset_name: str = "", target_names: list = None, figsize: list = None, dpi: float = None, show_fig: bool = False, scaled_predictions: bool = False): r"""Make a scatter plot of predicted versus actual targets. Not for k-splits. Args: y_predict (np.ndarray): Numpy array of shape `(N_samples, n_targets)` or `(N_samples, )`. y_true (np.ndarray): Numpy array of shape `(N_samples, n_targets)` or `(N_samples, )`. data_unit (list): String or list of string that matches `n_targets`. Name of the data's unit. model_name (str): Name of the model. Default is "". filepath (str): Full path where to save plot to, without the name of the file. Default is "". file_name (str): File name base. Model name and dataset will be added to the name. Default is "". dataset_name (str): Name of the dataset which was fitted to. Default is "". target_names (list): String or list of string that matches `n_targets`. Name of the targets. figsize (list): Size of the figure. Default is None. dpi (float): The resolution of the figure in dots-per-inch. Default is None. show_fig (bool): Whether to show figure. Default is True. scaled_predictions (bool): Whether predictions had been standardized. Default is False. Returns: matplotlib.pyplot.figure: Figure of the scatter plot. """ if len(y_predict.shape) == 1: y_predict = np.expand_dims(y_predict, axis=-1) if len(y_true.shape) == 1: y_true = np.expand_dims(y_true, axis=-1) num_targets = y_true.shape[1] if data_unit is None: data_unit = "" if isinstance(data_unit, str): data_unit = [data_unit]*num_targets if len(data_unit) != num_targets: print("WARNING:kgcnn: Targets do not match units for plot.") if target_names is None: target_names = "" if isinstance(target_names, str): target_names = [target_names]*num_targets if len(target_names) != num_targets: print("WARNING:kgcnn: Targets do not match names for plot.") if figsize is None: figsize = [6.4, 4.8] if dpi is None: dpi = 100.0 fig = plt.figure(figsize=figsize, dpi=dpi) for i in range(num_targets): delta_valid = y_true[:, i] - y_predict[:, i] mae_valid = np.mean(np.abs(delta_valid[~np.isnan(delta_valid)])) plt.scatter(y_predict[:, i], y_true[:, i], alpha=0.3, label=target_names[i] + " MAE: {0:0.4f} ".format(mae_valid) + "[" + data_unit[i] + "]") min_max = np.amin(y_true[~np.isnan(y_true)]).astype("float"), np.amax(y_true[~np.isnan(y_true)]).astype("float") plt.plot(np.arange(*min_max, 0.05), np.arange(*min_max, 0.05), color='red') plt.xlabel('Predicted') plt.ylabel('Actual') plot_title = "Prediction of %s for %s " % (model_name, dataset_name) if scaled_predictions: plot_title = "(SCALED!) " + plot_title plt.title(plot_title) plt.legend(loc='upper left', fontsize='x-large') if filepath is not None: plt.savefig(os.path.join(filepath, model_name + "_" + dataset_name + "_" + file_name)) if show_fig: plt.show() return fig