Source code for kgcnn.data.utils

import pickle
import logging
import tensorflow as tf
import numpy as np
import yaml
import json
import os
from importlib.machinery import SourceFileLoader


logging.basicConfig()  # Module logger
module_logger = logging.getLogger(__name__)
module_logger.setLevel(logging.INFO)


[docs]def save_pickle_file(obj, file_path: str, **kwargs): """Save pickle file. Args: obj: Python-object to dump. file_path (str): File path or name to save 'obj' to. Returns: None. """ with open(file_path, 'wb') as f: pickle.dump(obj, f, **kwargs)
[docs]def load_pickle_file(file_path: str, **kwargs): """Load pickle file. Args: file_path (str): File path or name to load. Returns: obj: Python-object of file. """ with open(file_path, 'rb') as f: obj = pickle.load(f, **kwargs) return obj
[docs]def save_json_file(obj, file_path: str, **kwargs): """Save json file. Args: obj: Python-object to dump. file_path (str): File path or name to save 'obj' to. Returns: None. """ with open(file_path, 'w') as json_file: json.dump(obj, json_file, **kwargs)
[docs]def load_json_file(file_path: str, **kwargs): """Load json file. Args: file_path (str): File path or name to load. Returns: obj: Python-object of file. """ with open(file_path, 'r') as json_file: file_read = json.load(json_file, **kwargs) return file_read
[docs]def load_yaml_file(file_path: str): """Load yaml file. Args: file_path (str): File path or name to load. Returns: obj: Python-object of file. """ with open(file_path, 'r') as stream: obj = yaml.safe_load(stream) return obj
[docs]def save_yaml_file(obj, file_path: str, default_flow_style: bool = False, **kwargs): """Save yaml file. Args: obj: Python-object to dump. file_path (str): File path or name to save 'obj' to. default_flow_style (bool): Flag for flow style. Default to False. Returns: None. """ with open(file_path, 'w') as yaml_file: yaml.dump(obj, yaml_file, default_flow_style=default_flow_style, **kwargs)
[docs]def load_hyper_file(file_name: str, **kwargs) -> dict: """Load hyperparameter from file. File type can be '.yaml', '.json', '.pickle' or '.py'. Args: file_name (str): Path or name of the file containing hyperparameter. Returns: hyper (dict): Dictionary of hyperparameter. """ if "." not in file_name: module_logger.error("Can not determine file-type.") return {} type_ending = file_name.split(".")[-1] if type_ending == "json": return load_json_file(file_name, **kwargs) elif type_ending == "yaml": return load_yaml_file(file_name) elif type_ending == "pickle": return load_pickle_file(file_name, **kwargs) elif type_ending == "py": path = os.path.realpath(file_name) hyper = getattr(SourceFileLoader(os.path.basename(path).replace(".py", ""), path).load_module(), "hyper") return hyper else: module_logger.error("Unsupported file type '%s'." % type_ending) return {}
[docs]def ragged_tensor_from_nested_numpy(numpy_list: list, dtype: str = None, row_splits_dtype: str = "int64"): r"""Make ragged tensor from a list of numpy arrays. Each array can have different length but must match in shape except the first dimension. This will result in a ragged tensor with ragged dimension only at first axis (ragged_rank=1), like shape `(batch, None, ...)` . This way a tensor can be generated faster than `tf.ragged.constant()` . .. warning:: The data will be copied for this operation. .. code-block:: python import tensorflow as tf import numpy as np ragged_tensor = ragged_tensor_from_nested_numpy([np.array([[0]]), np.array([[1], [2], [3]])]) print(ragged_tensor) # <tf.RaggedTensor [[[0]], [[1], [2], [3]]]> print(ragged_tensor.shape) # (2, None, 1) Args: numpy_list (list): List of numpy arrays of different length but else identical shape. dtype (str): Data type of values tensor. Defaults to None. row_splits_dtype (str): Data type of partition tensor. Default is "int64". Returns: tf.RaggedTensor: Ragged tensor of former nested list of numpy arrays. """ return tf.RaggedTensor.from_row_lengths( np.concatenate(numpy_list, axis=0, dtype=dtype), np.array([len(x) for x in numpy_list], dtype=row_splits_dtype))
[docs]def pad_np_array_list_batch_dim(values: list, dtype: str = None): r"""Pad a list of numpy arrays along first dimension. Args: values (list): List of :obj:`np.ndarray` . dtype (str): Data type of values tensor. Defaults to None. Returns: tuple: Padded and mask :obj:`np.ndarray` of values. """ max_shape = np.amax([x.shape for x in values], axis=0) final_shape = np.concatenate([np.array([len(values)], dtype="int64"), np.array(max_shape, dtype="int64")]) padded = np.zeros(final_shape, dtype=values[0].dtype) mask = np.zeros(final_shape, dtype="bool") for i, x in enumerate(values): # noinspection PyTypeChecker index = [i] + [slice(0, int(j)) for j in x.shape] padded[tuple(index)] = x mask[tuple(index)] = True if dtype is not None: padded = padded.astype(dtype=dtype) return padded, mask