Source code for kgcnn.data.utils

import pickle
import logging
import tensorflow as tf
import numpy as np
import yaml
import json
import os
from importlib.machinery import SourceFileLoader


logging.basicConfig()  # Module logger
module_logger = logging.getLogger(__name__)
module_logger.setLevel(logging.INFO)


[docs]def save_pickle_file(obj, file_path: str, **kwargs):
    """Save pickle file.

    Args:
        obj: Python-object to dump.
        file_path (str): File path or name to save 'obj' to.

    Returns:
        None.
    """
    with open(file_path, 'wb') as f:
        pickle.dump(obj, f, **kwargs)


[docs]def load_pickle_file(file_path: str, **kwargs):
    """Load pickle file.

    Args:
        file_path (str): File path or name to load.

    Returns:
        obj: Python-object of file.
    """
    with open(file_path, 'rb') as f:
        obj = pickle.load(f, **kwargs)
    return obj


[docs]def save_json_file(obj, file_path: str, **kwargs):
    """Save json file.

    Args:
        obj: Python-object to dump.
        file_path (str): File path or name to save 'obj' to.

    Returns:
        None.
    """
    with open(file_path, 'w') as json_file:
        json.dump(obj, json_file, **kwargs)


[docs]def load_json_file(file_path: str, **kwargs):
    """Load json file.

    Args:
        file_path (str): File path or name to load.

    Returns:
        obj: Python-object of file.
    """
    with open(file_path, 'r') as json_file:
        file_read = json.load(json_file, **kwargs)
    return file_read


[docs]def load_yaml_file(file_path: str):
    """Load yaml file.

    Args:
        file_path (str): File path or name to load.

    Returns:
        obj: Python-object of file.
    """
    with open(file_path, 'r') as stream:
        obj = yaml.safe_load(stream)
    return obj


[docs]def save_yaml_file(obj, file_path: str, default_flow_style: bool = False, **kwargs):
    """Save yaml file.

    Args:
        obj: Python-object to dump.
        file_path (str): File path or name to save 'obj' to.
        default_flow_style (bool): Flag for flow style. Default to False.

    Returns:
        None.
    """
    with open(file_path, 'w') as yaml_file:
        yaml.dump(obj, yaml_file, default_flow_style=default_flow_style, **kwargs)


[docs]def load_hyper_file(file_name: str, **kwargs) -> dict:
    """Load hyperparameter from file. File type can be '.yaml', '.json', '.pickle' or '.py'.

    Args:
        file_name (str): Path or name of the file containing hyperparameter.

    Returns:
        hyper (dict): Dictionary of hyperparameter.
    """
    if "." not in file_name:
        module_logger.error("Can not determine file-type.")
        return {}
    type_ending = file_name.split(".")[-1]
    if type_ending == "json":
        return load_json_file(file_name, **kwargs)
    elif type_ending == "yaml":
        return load_yaml_file(file_name)
    elif type_ending == "pickle":
        return load_pickle_file(file_name, **kwargs)
    elif type_ending == "py":
        path = os.path.realpath(file_name)
        hyper = getattr(SourceFileLoader(os.path.basename(path).replace(".py", ""), path).load_module(), "hyper")
        return hyper
    else:
        module_logger.error("Unsupported file type '%s'." % type_ending)
    return {}


[docs]def ragged_tensor_from_nested_numpy(numpy_list: list, dtype: str = None, row_splits_dtype: str = "int64"):
    r"""Make ragged tensor from a list of numpy arrays. Each array can have different length but must match in shape
    except the first dimension.
    This will result in a ragged tensor with ragged dimension only at first axis (ragged_rank=1), like
    shape `(batch, None, ...)` . This way a tensor can be generated faster than `tf.ragged.constant()` .

    .. warning::
        The data will be copied for this operation.

    .. code-block:: python

        import tensorflow as tf
        import numpy as np
        ragged_tensor = ragged_tensor_from_nested_numpy([np.array([[0]]), np.array([[1], [2], [3]])])
        print(ragged_tensor)
        # <tf.RaggedTensor [[[0]], [[1], [2], [3]]]>
        print(ragged_tensor.shape)
        # (2, None, 1)

    Args:
        numpy_list (list): List of numpy arrays of different length but else identical shape.
        dtype (str): Data type of values tensor. Defaults to None.
        row_splits_dtype (str): Data type of partition tensor. Default is "int64".

    Returns:
        tf.RaggedTensor: Ragged tensor of former nested list of numpy arrays.
    """
    return tf.RaggedTensor.from_row_lengths(
        np.concatenate(numpy_list, axis=0, dtype=dtype), np.array([len(x) for x in numpy_list], dtype=row_splits_dtype))


[docs]def pad_np_array_list_batch_dim(values: list, dtype: str = None):
    r"""Pad a list of numpy arrays along first dimension.

    Args:
        values (list): List of :obj:`np.ndarray` .
        dtype (str): Data type of values tensor. Defaults to None.

    Returns:
        tuple: Padded and mask :obj:`np.ndarray` of values.
    """
    max_shape = np.amax([x.shape for x in values], axis=0)
    final_shape = np.concatenate([np.array([len(values)], dtype="int64"), np.array(max_shape, dtype="int64")])
    padded = np.zeros(final_shape, dtype=values[0].dtype)
    mask = np.zeros(final_shape, dtype="bool")
    for i, x in enumerate(values):
        # noinspection PyTypeChecker
        index = [i] + [slice(0, int(j)) for j in x.shape]
        padded[tuple(index)] = x
        mask[tuple(index)] = True
    if dtype is not None:
        padded = padded.astype(dtype=dtype)
    return padded, mask