Source code for kgcnn.data.transform.scaler.force

import numpy as np
import logging
from typing import Union, List, Dict, Tuple
from kgcnn.data.transform.scaler.molecule import _ExtensiveMolecularScalerBase

logging.basicConfig()  # Module logger
module_logger = logging.getLogger(__name__)
module_logger.setLevel(logging.INFO)


[docs]class EnergyForceExtensiveLabelScaler(_ExtensiveMolecularScalerBase): r"""Extensive scaler for scaling jointly energy, forces. Inherits from :obj:`kgcnn.scaler.mol._ExtensiveMolecularScalerBase` but makes use of `X` , `y` , as `atomic_number` and (`energy` , `force` ). In contrast to :obj:`kgcnn.scaler.mol.ExtensiveMolecularLabelScaler` which uses only `y` as `energy` . Interface is designed after scikit-learn scaler and has additional functions to apply on datasets with :obj:`fit_dataset()` and :obj:`transform_dataset()` .. note:: Units for energy and forces must match. Code example for scaler: .. code-block:: python import numpy as np from kgcnn.data.transform.scaler.force import EnergyForceExtensiveLabelScaler energy = np.random.rand(5).reshape((5,1)) mol_num = [np.array([6, 1, 1, 1, 1]), np.array([7, 1, 1, 1]), np.array([6, 6, 1, 1, 1, 1]), np.array([6, 6, 1, 1]), np.array([6, 6, 1, 1, 1, 1, 1, 1]) ] force = [np.random.rand(len(m)*3).reshape((len(m),3)) for m in mol_num] scaler = EnergyForceExtensiveLabelScaler() scaler.fit(y=[energy, force], X=mol_num) print(scaler.get_weights()) print(scaler.get_config()) scaler._plot_predict(energy, mol_num) # For debugging. y, f = scaler.transform(y=[energy, force], X=mol_num) print(energy, y) print(scaler.inverse_transform(y=[y, f], X=mol_num)[1][1][0], f[0]) scaler.save("example.json") new_scaler = EnergyForceExtensiveLabelScaler() new_scaler.load("example.json") print(scaler.inverse_transform(y=[y, f], X=mol_num)[1][1][0], f[0]) """
[docs] def __init__(self, standardize_coordinates: bool = False, energy: str = "energy", force: str = "force", atomic_number: str = "atomic_number", sample_weight: str = None, **kwargs): r"""Initialize layer with arguments for :obj:`kgcnn.scaler.mol._ExtensiveMolecularScalerBase` . Args: standardize_coordinates (bool): Whether to standardize coordinates. Must always be False. kwargs: Kwargs for :obj:`kgcnn.scaler.mol._ExtensiveMolecularScalerBase` parent class. See docs for this class. """ super(EnergyForceExtensiveLabelScaler, self).__init__(**kwargs) self._standardize_coordinates = standardize_coordinates if self._standardize_coordinates: raise NotImplementedError("Scaling of coordinates is not supported. This class is a pure label scaler.") # Backward compatibility. self._use_separate_input_arguments = False self._energy = energy self._force = force self._atomic_number = atomic_number self._sample_weight = sample_weight
# noinspection PyPep8Naming
[docs] def fit(self, y: Tuple[List[np.ndarray], List[np.ndarray]] = None, *, X: List[np.ndarray] = None, sample_weight: Union[None, np.ndarray] = None, force: Union[None, List[np.ndarray]] = None, atomic_number: Union[None, List[np.ndarray]] = None ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """Fit Scaler to data. Args: y (tuple): Tuple of `(energy, forces)` . Energies must be a single array or list of energies of shape `(n_samples, n_states)` . For one energy this must still be `(n_samples, 1)` . List of forces as with each force stored in a numpy array. Note that you can also pass the forces separately to function argument `force` , in which case `y` should be only energies (not a tuple). X (list): Atomic number `atomic_number` are a list of arrays of atomic numbers. Example: `[np.array([7,1,1,1]), ...]` . They must match in length. Note that you can also pass the atomic numbers separately to function argument `atomic_number` , in which case `X` is ignored. sample_weight (list, np.ndarray): Weights for each sample. force (list): List of forces as numpy arrays. Deprecated, since they can be contained in `y` . atomic_number (list): List of arrays of atomic numbers. Example [np.array([7,1,1,1]), ...]. Deprecated, since they can be contained in `X` . Returns: self. """ X, y, force, atomic_number = self._verify_input(X, y, force, atomic_number) return super(EnergyForceExtensiveLabelScaler, self)._fit( molecular_property=y, sample_weight=sample_weight, atomic_number=atomic_number)
# noinspection PyPep8Naming
[docs] def fit_transform(self, y: Tuple[List[np.ndarray], List[np.ndarray]] = None, *, X: List[np.ndarray] = None, sample_weight: Union[None, np.ndarray] = None, force: Union[None, List[np.ndarray]] = None, atomic_number: Union[None, List[np.ndarray]] = None, copy: bool = True ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """Fit Scaler to data and subsequently transform data. Args: y (tuple): Tuple of `(energy, forces)` . Energies must be a single array or list of energies of shape `(n_samples, n_states)` . For one energy this must still be `(n_samples, 1)` . List of forces as with each force stored in a numpy array. Note that you can also pass the forces separately to function argument `force` , in which case `y` should be only energies (not a tuple). X (list): Atomic number `atomic_number` are a list of arrays of atomic numbers. Example: `[np.array([7,1,1,1]), ...]` . They must match in length. Note that you can also pass the atomic numbers separately to function argument `atomic_number` , in which case `X` is ignored. sample_weight (list, np.ndarray): Weights for each sample. force (list): List of forces as numpy arrays. Deprecated, since they can be contained in `y` . atomic_number (list): List of arrays of atomic numbers. Example [np.array([7,1,1,1]), ...]. Deprecated, since they can be contained in `X` . copy (bool): Not yet implemented. Returns: tuple: Tuple of transformed `(energy, forces)` . """ X, y, force, atomic_number = self._verify_input(X, y, force, atomic_number) self.fit(X=X, y=y, atomic_number=atomic_number, force=force, sample_weight=sample_weight) return self.transform(X=X, y=y, copy=copy, force=force, atomic_number=atomic_number)
# noinspection PyPep8Naming
[docs] def transform(self, y: Tuple[List[np.ndarray], List[np.ndarray]] = None, *, X: List[np.ndarray] = None, force: Union[None, List[np.ndarray]] = None, atomic_number: Union[None, List[np.ndarray]] = None, copy: bool = True ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """Perform scaling of atomic energies and forces. Args: y (tuple): Tuple of `(energy, forces)` . Energies must be a single array or list of energies of shape `(n_samples, n_states)` . For one energy this must still be `(n_samples, 1)` . List of forces as with each force stored in a numpy array. Note that you can also pass the forces separately to function argument `force` , in which case `y` should be only energies (not a tuple). X (list): Atomic number `atomic_number` are a list of arrays of atomic numbers. Example: `[np.array([7,1,1,1]), ...]` . They must match in length. Note that you can also pass the atomic numbers separately to function argument `atomic_number` , in which case `X` is ignored. force (list): List of forces as numpy arrays. Deprecated, since they can be contained in `y` . atomic_number (list): List of arrays of atomic numbers. Example [np.array([7,1,1,1]), ...]. Deprecated, since they can be contained in `X` . copy (bool): Not yet implemented. Returns: tuple: Tuple of transformed `(energy, forces)` . """ X, y, force, atomic_number = self._verify_input(X, y, force, atomic_number) if copy: y = np.array(y) - self._predict(atomic_number) if self._standardize_scale: y = y / np.expand_dims(self.scale_, axis=0) force = [np.array(f) / np.expand_dims(self.scale_, axis=0) for f in force] else: force = [np.array(f) for f in force] else: for i in range(len(y)): y[i][:] = y[i] - self._predict(atomic_number)[i] if self._standardize_scale: y[i][:] = y[i] / self.scale_ force[i][:] = force[i] / np.expand_dims(self.scale_, axis=0) return y, force
# noinspection PyPep8Naming
[docs] def inverse_transform(self, y: Tuple[List[np.ndarray], List[np.ndarray]] = None, *, X: List[np.ndarray] = None, force: Union[None, List[np.ndarray]] = None, atomic_number: Union[None, List[np.ndarray]] = None, copy: bool = True ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """Scale back data for atoms. Args: y (tuple): Tuple of `(energy, forces)` . Energies must be a single array or list of energies of shape `(n_samples, n_states)` . For one energy this must still be `(n_samples, 1)` . List of forces as with each force stored in a numpy array. Note that you can also pass the forces separately to function argument `force` , in which case `y` should be only energies (not a tuple). X (list): Atomic number `atomic_number` are a list of arrays of atomic numbers. Example: `[np.array([7,1,1,1]), ...]` . They must match in length. Note that you can also pass the atomic numbers separately to function argument `atomic_number` , in which case `X` is ignored. force (list): List of forces as numpy arrays. Deprecated, since they can be contained in `y` . atomic_number (list): List of arrays of atomic numbers. Example [np.array([7,1,1,1]), ...]. Deprecated, since they can be contained in `X` . copy (bool): Not yet implemented. Returns: tuple: Tuple of reverse-transformed `(energy, forces)` . """ X, y, force, atomic_number = self._verify_input(X, y, force, atomic_number) if copy: y = np.array(y) if self._standardize_scale: y = y * np.expand_dims(self.scale_, axis=0) force = [np.array(f) * np.expand_dims(self.scale_, axis=0) for f in force] else: force = [np.array(f) for f in force] y = y + self._predict(atomic_number) else: for i in range(len(y)): if self._standardize_scale: y[i][:] = y[i][:] * self.scale_ force[i][:] = force[i] * np.expand_dims(self.scale_, axis=0) y[i][:] = y[i][:] + self._predict(atomic_number)[i] return y, force
# Needed for backward compatibility. # noinspection PyPep8Naming def _verify_input(self, X, y, force, atomic_number): # Verify the input format. if y is None: raise ValueError("`EnergyForceExtensiveLabelScaler` requires 'y' argument, but got 'None'.") if force is not None: self._use_separate_input_arguments = True if len(force) == len(y): energy, forces = y, force elif len(y) == 2: energy, forces = y[0], force else: raise ValueError("Energy and forces do not match.") else: self._use_separate_input_arguments = False energy, forces = y if atomic_number is not None: atoms = atomic_number x_input = X else: atoms = X x_input = None return x_input, energy, forces, atoms
[docs] def get_config(self) -> dict: """Get configuration for scaler.""" config = super(EnergyForceExtensiveLabelScaler, self).get_config() config.update({ "standardize_coordinates": self._standardize_coordinates, "energy": self._energy, "force": self._force, "atomic_number": self._atomic_number, "sample_weight": self._sample_weight }) return config
[docs] def set_config(self, config: dict): """Set configuration for scaler. Args: config (dict): Config dictionary. """ self._standardize_coordinates = config["standardize_coordinates"] self._energy = config["energy"] self._force = config["force"] self._atomic_number = config["atomic_number"] self._sample_weight = config["sample_weight"] config_super = {key: value for key, value in config.items() if key not in [ "standardize_coordinates", "energy", "force", "atomic_number", "sample_weight"]} return super(EnergyForceExtensiveLabelScaler, self).set_config(config_super)
# Similar functions that work on dataset plus property names. # noinspection PyPep8Naming
[docs] def fit_dataset(self, dataset: List[Dict[str, np.ndarray]], **fit_params): r"""Fit to dataset with relevant `X` , `y` information. Args: dataset (list): Dataset of type `List[Dict]` containing energies and forces and atomic numbers. fit_params: Fit parameters handed to :obj:`fit()` Returns: self. """ atoms = self._atomic_number energy, force = self._energy, self._force return self.fit( X=[item[atoms] for item in dataset], y=([item[energy] for item in dataset], [item[force] for item in dataset]), sample_weight=[item[self._sample_weight] for item in dataset] if self._sample_weight is not None else None, **fit_params )
# noinspection PyPep8Naming
[docs] def transform_dataset(self, dataset: List[Dict[str, np.ndarray]], copy: bool = True, copy_dataset: bool = False, ) -> List[Dict[str, np.ndarray]]: r"""Transform dataset with relevant `X` , `y` information. Args: dataset (list): Dataset of type `List[Dict]` containing energies and forces and atomic numbers. copy (bool): Whether to copy data for transformation. Default is True. copy_dataset (bool): Whether to copy full dataset. Default is False. Returns: dataset: Transformed dataset. """ atoms = self._atomic_number energy, force = self._energy, self._force if copy_dataset: dataset = dataset.copy() out_energy, out_force = self.transform( atomic_number=[graph[atoms] for graph in dataset], y=([graph[energy] for graph in dataset], [graph[force] for graph in dataset]), copy=copy, ) for graph, graph_energy, graph_force in zip(dataset, out_energy, out_force): graph[energy] = graph_energy graph[force] = graph_force return dataset
# noinspection PyPep8Naming
[docs] def inverse_transform_dataset(self, dataset: List[Dict[str, np.ndarray]], copy: bool = True, copy_dataset: bool = False, ) -> List[Dict[str, np.ndarray]]: r"""Inverse transform dataset with relevant `X` , `y` information. Args: dataset (list): Dataset of type `List[Dict]` containing energies and forces and atomic numbers. copy (bool): Whether to copy dataset. Default is True. copy_dataset (bool): Whether to copy full dataset. Default is False. Returns: dataset: Inverse-transformed dataset. """ atoms = self._atomic_number energy, force = self._energy, self._force if copy_dataset: dataset = dataset.copy() out_energy, out_force = self.inverse_transform( atomic_number=[graph[atoms] for graph in dataset], y=([graph[energy] for graph in dataset], [graph[force] for graph in dataset]), copy=copy, ) for graph, graph_energy, graph_force in zip(dataset, out_energy, out_force): graph[energy] = graph_energy graph[force] = graph_force return dataset
# noinspection PyPep8Naming
[docs] def fit_transform_dataset(self, dataset: List[Dict[str, np.ndarray]], copy: bool = True, copy_dataset: bool = False, **fit_params) -> List[Dict[str, np.ndarray]]: r"""Fit and transform to dataset with relevant `X` , `y` information. Args: dataset (list): Dataset of type `List[Dict]` containing energies and forces and atomic numbers. copy (bool): Whether to copy dataset. Default is True. copy_dataset (bool): Whether to copy full dataset. Default is False. fit_params: Fit parameters handed to :obj:`fit()` Returns: dataset: Transformed dataset. """ self.fit_dataset(dataset=dataset, **fit_params) return self.transform_dataset(dataset=dataset, copy=copy, copy_dataset=copy_dataset)