Source code for kgcnn.data.transform.scaler.force
import numpy as np
import logging
from typing import Union, List, Dict, Tuple
from kgcnn.data.transform.scaler.molecule import _ExtensiveMolecularScalerBase
logging.basicConfig() # Module logger
module_logger = logging.getLogger(__name__)
module_logger.setLevel(logging.INFO)
[docs]class EnergyForceExtensiveLabelScaler(_ExtensiveMolecularScalerBase):
r"""Extensive scaler for scaling jointly energy, forces.
Inherits from :obj:`kgcnn.scaler.mol._ExtensiveMolecularScalerBase` but makes use of `X` , `y` , as
`atomic_number` and (`energy` , `force` ).
In contrast to :obj:`kgcnn.scaler.mol.ExtensiveMolecularLabelScaler` which uses only
`y` as `energy` .
Interface is designed after scikit-learn scaler and has additional functions to apply on datasets with
:obj:`fit_dataset()` and :obj:`transform_dataset()`
.. note::
Units for energy and forces must match.
Code example for scaler:
.. code-block:: python
import numpy as np
from kgcnn.data.transform.scaler.force import EnergyForceExtensiveLabelScaler
energy = np.random.rand(5).reshape((5,1))
mol_num = [np.array([6, 1, 1, 1, 1]), np.array([7, 1, 1, 1]),
np.array([6, 6, 1, 1, 1, 1]), np.array([6, 6, 1, 1]), np.array([6, 6, 1, 1, 1, 1, 1, 1])
]
force = [np.random.rand(len(m)*3).reshape((len(m),3)) for m in mol_num]
scaler = EnergyForceExtensiveLabelScaler()
scaler.fit(y=[energy, force], X=mol_num)
print(scaler.get_weights())
print(scaler.get_config())
scaler._plot_predict(energy, mol_num) # For debugging.
y, f = scaler.transform(y=[energy, force], X=mol_num)
print(energy, y)
print(scaler.inverse_transform(y=[y, f], X=mol_num)[1][1][0], f[0])
scaler.save("example.json")
new_scaler = EnergyForceExtensiveLabelScaler()
new_scaler.load("example.json")
print(scaler.inverse_transform(y=[y, f], X=mol_num)[1][1][0], f[0])
"""
[docs] def __init__(self, standardize_coordinates: bool = False,
energy: str = "energy", force: str = "force", atomic_number: str = "atomic_number",
sample_weight: str = None, **kwargs):
r"""Initialize layer with arguments for :obj:`kgcnn.scaler.mol._ExtensiveMolecularScalerBase` .
Args:
standardize_coordinates (bool): Whether to standardize coordinates. Must always be False.
kwargs: Kwargs for :obj:`kgcnn.scaler.mol._ExtensiveMolecularScalerBase` parent class.
See docs for this class.
"""
super(EnergyForceExtensiveLabelScaler, self).__init__(**kwargs)
self._standardize_coordinates = standardize_coordinates
if self._standardize_coordinates:
raise NotImplementedError("Scaling of coordinates is not supported. This class is a pure label scaler.")
# Backward compatibility.
self._use_separate_input_arguments = False
self._energy = energy
self._force = force
self._atomic_number = atomic_number
self._sample_weight = sample_weight
# noinspection PyPep8Naming
[docs] def fit(self, y: Tuple[List[np.ndarray], List[np.ndarray]] = None, *,
X: List[np.ndarray] = None,
sample_weight: Union[None, np.ndarray] = None,
force: Union[None, List[np.ndarray]] = None,
atomic_number: Union[None, List[np.ndarray]] = None
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
"""Fit Scaler to data.
Args:
y (tuple): Tuple of `(energy, forces)` .
Energies must be a single array or list of energies of shape `(n_samples, n_states)` .
For one energy this must still be `(n_samples, 1)` . List of forces as with each force stored in a
numpy array. Note that you can also pass the forces separately to function argument `force` , in
which case `y` should be only energies (not a tuple).
X (list): Atomic number `atomic_number` are a list of arrays of atomic numbers.
Example: `[np.array([7,1,1,1]), ...]` . They must match in length.
Note that you can also pass the atomic numbers separately to function argument `atomic_number` , in
which case `X` is ignored.
sample_weight (list, np.ndarray): Weights for each sample.
force (list): List of forces as numpy arrays. Deprecated, since they can be contained in `y` .
atomic_number (list): List of arrays of atomic numbers. Example [np.array([7,1,1,1]), ...].
Deprecated, since they can be contained in `X` .
Returns:
self.
"""
X, y, force, atomic_number = self._verify_input(X, y, force, atomic_number)
return super(EnergyForceExtensiveLabelScaler, self)._fit(
molecular_property=y, sample_weight=sample_weight, atomic_number=atomic_number)
# noinspection PyPep8Naming
[docs] def fit_transform(self, y: Tuple[List[np.ndarray], List[np.ndarray]] = None, *,
X: List[np.ndarray] = None,
sample_weight: Union[None, np.ndarray] = None,
force: Union[None, List[np.ndarray]] = None,
atomic_number: Union[None, List[np.ndarray]] = None,
copy: bool = True
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
"""Fit Scaler to data and subsequently transform data.
Args:
y (tuple): Tuple of `(energy, forces)` .
Energies must be a single array or list of energies of shape `(n_samples, n_states)` .
For one energy this must still be `(n_samples, 1)` . List of forces as with each force stored in a
numpy array. Note that you can also pass the forces separately to function argument `force` , in
which case `y` should be only energies (not a tuple).
X (list): Atomic number `atomic_number` are a list of arrays of atomic numbers.
Example: `[np.array([7,1,1,1]), ...]` . They must match in length.
Note that you can also pass the atomic numbers separately to function argument `atomic_number` , in
which case `X` is ignored.
sample_weight (list, np.ndarray): Weights for each sample.
force (list): List of forces as numpy arrays. Deprecated, since they can be contained in `y` .
atomic_number (list): List of arrays of atomic numbers. Example [np.array([7,1,1,1]), ...].
Deprecated, since they can be contained in `X` .
copy (bool): Not yet implemented.
Returns:
tuple: Tuple of transformed `(energy, forces)` .
"""
X, y, force, atomic_number = self._verify_input(X, y, force, atomic_number)
self.fit(X=X, y=y, atomic_number=atomic_number, force=force, sample_weight=sample_weight)
return self.transform(X=X, y=y, copy=copy, force=force, atomic_number=atomic_number)
# noinspection PyPep8Naming
[docs] def transform(self, y: Tuple[List[np.ndarray], List[np.ndarray]] = None, *,
X: List[np.ndarray] = None,
force: Union[None, List[np.ndarray]] = None,
atomic_number: Union[None, List[np.ndarray]] = None,
copy: bool = True
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
"""Perform scaling of atomic energies and forces.
Args:
y (tuple): Tuple of `(energy, forces)` .
Energies must be a single array or list of energies of shape `(n_samples, n_states)` .
For one energy this must still be `(n_samples, 1)` . List of forces as with each force stored in a
numpy array. Note that you can also pass the forces separately to function argument `force` , in
which case `y` should be only energies (not a tuple).
X (list): Atomic number `atomic_number` are a list of arrays of atomic numbers.
Example: `[np.array([7,1,1,1]), ...]` . They must match in length.
Note that you can also pass the atomic numbers separately to function argument `atomic_number` , in
which case `X` is ignored.
force (list): List of forces as numpy arrays. Deprecated, since they can be contained in `y` .
atomic_number (list): List of arrays of atomic numbers. Example [np.array([7,1,1,1]), ...].
Deprecated, since they can be contained in `X` .
copy (bool): Not yet implemented.
Returns:
tuple: Tuple of transformed `(energy, forces)` .
"""
X, y, force, atomic_number = self._verify_input(X, y, force, atomic_number)
if copy:
y = np.array(y) - self._predict(atomic_number)
if self._standardize_scale:
y = y / np.expand_dims(self.scale_, axis=0)
force = [np.array(f) / np.expand_dims(self.scale_, axis=0) for f in force]
else:
force = [np.array(f) for f in force]
else:
for i in range(len(y)):
y[i][:] = y[i] - self._predict(atomic_number)[i]
if self._standardize_scale:
y[i][:] = y[i] / self.scale_
force[i][:] = force[i] / np.expand_dims(self.scale_, axis=0)
return y, force
# noinspection PyPep8Naming
[docs] def inverse_transform(self, y: Tuple[List[np.ndarray], List[np.ndarray]] = None, *,
X: List[np.ndarray] = None,
force: Union[None, List[np.ndarray]] = None,
atomic_number: Union[None, List[np.ndarray]] = None,
copy: bool = True
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
"""Scale back data for atoms.
Args:
y (tuple): Tuple of `(energy, forces)` .
Energies must be a single array or list of energies of shape `(n_samples, n_states)` .
For one energy this must still be `(n_samples, 1)` . List of forces as with each force stored in a
numpy array. Note that you can also pass the forces separately to function argument `force` , in
which case `y` should be only energies (not a tuple).
X (list): Atomic number `atomic_number` are a list of arrays of atomic numbers.
Example: `[np.array([7,1,1,1]), ...]` . They must match in length.
Note that you can also pass the atomic numbers separately to function argument `atomic_number` , in
which case `X` is ignored.
force (list): List of forces as numpy arrays. Deprecated, since they can be contained in `y` .
atomic_number (list): List of arrays of atomic numbers. Example [np.array([7,1,1,1]), ...].
Deprecated, since they can be contained in `X` .
copy (bool): Not yet implemented.
Returns:
tuple: Tuple of reverse-transformed `(energy, forces)` .
"""
X, y, force, atomic_number = self._verify_input(X, y, force, atomic_number)
if copy:
y = np.array(y)
if self._standardize_scale:
y = y * np.expand_dims(self.scale_, axis=0)
force = [np.array(f) * np.expand_dims(self.scale_, axis=0) for f in force]
else:
force = [np.array(f) for f in force]
y = y + self._predict(atomic_number)
else:
for i in range(len(y)):
if self._standardize_scale:
y[i][:] = y[i][:] * self.scale_
force[i][:] = force[i] * np.expand_dims(self.scale_, axis=0)
y[i][:] = y[i][:] + self._predict(atomic_number)[i]
return y, force
# Needed for backward compatibility.
# noinspection PyPep8Naming
def _verify_input(self, X, y, force, atomic_number):
# Verify the input format.
if y is None:
raise ValueError("`EnergyForceExtensiveLabelScaler` requires 'y' argument, but got 'None'.")
if force is not None:
self._use_separate_input_arguments = True
if len(force) == len(y):
energy, forces = y, force
elif len(y) == 2:
energy, forces = y[0], force
else:
raise ValueError("Energy and forces do not match.")
else:
self._use_separate_input_arguments = False
energy, forces = y
if atomic_number is not None:
atoms = atomic_number
x_input = X
else:
atoms = X
x_input = None
return x_input, energy, forces, atoms
[docs] def get_config(self) -> dict:
"""Get configuration for scaler."""
config = super(EnergyForceExtensiveLabelScaler, self).get_config()
config.update({
"standardize_coordinates": self._standardize_coordinates,
"energy": self._energy,
"force": self._force,
"atomic_number": self._atomic_number,
"sample_weight": self._sample_weight
})
return config
[docs] def set_config(self, config: dict):
"""Set configuration for scaler.
Args:
config (dict): Config dictionary.
"""
self._standardize_coordinates = config["standardize_coordinates"]
self._energy = config["energy"]
self._force = config["force"]
self._atomic_number = config["atomic_number"]
self._sample_weight = config["sample_weight"]
config_super = {key: value for key, value in config.items() if key not in [
"standardize_coordinates", "energy", "force", "atomic_number", "sample_weight"]}
return super(EnergyForceExtensiveLabelScaler, self).set_config(config_super)
# Similar functions that work on dataset plus property names.
# noinspection PyPep8Naming
[docs] def fit_dataset(self, dataset: List[Dict[str, np.ndarray]], **fit_params):
r"""Fit to dataset with relevant `X` , `y` information.
Args:
dataset (list): Dataset of type `List[Dict]` containing energies and forces and atomic numbers.
fit_params: Fit parameters handed to :obj:`fit()`
Returns:
self.
"""
atoms = self._atomic_number
energy, force = self._energy, self._force
return self.fit(
X=[item[atoms] for item in dataset],
y=([item[energy] for item in dataset], [item[force] for item in dataset]),
sample_weight=[item[self._sample_weight] for item in dataset] if self._sample_weight is not None else None,
**fit_params
)
# noinspection PyPep8Naming
[docs] def transform_dataset(self, dataset: List[Dict[str, np.ndarray]], copy: bool = True, copy_dataset: bool = False,
) -> List[Dict[str, np.ndarray]]:
r"""Transform dataset with relevant `X` , `y` information.
Args:
dataset (list): Dataset of type `List[Dict]` containing energies and forces and atomic numbers.
copy (bool): Whether to copy data for transformation. Default is True.
copy_dataset (bool): Whether to copy full dataset. Default is False.
Returns:
dataset: Transformed dataset.
"""
atoms = self._atomic_number
energy, force = self._energy, self._force
if copy_dataset:
dataset = dataset.copy()
out_energy, out_force = self.transform(
atomic_number=[graph[atoms] for graph in dataset],
y=([graph[energy] for graph in dataset], [graph[force] for graph in dataset]),
copy=copy,
)
for graph, graph_energy, graph_force in zip(dataset, out_energy, out_force):
graph[energy] = graph_energy
graph[force] = graph_force
return dataset
# noinspection PyPep8Naming
[docs] def inverse_transform_dataset(self, dataset: List[Dict[str, np.ndarray]], copy: bool = True,
copy_dataset: bool = False,
) -> List[Dict[str, np.ndarray]]:
r"""Inverse transform dataset with relevant `X` , `y` information.
Args:
dataset (list): Dataset of type `List[Dict]` containing energies and forces and atomic numbers.
copy (bool): Whether to copy dataset. Default is True.
copy_dataset (bool): Whether to copy full dataset. Default is False.
Returns:
dataset: Inverse-transformed dataset.
"""
atoms = self._atomic_number
energy, force = self._energy, self._force
if copy_dataset:
dataset = dataset.copy()
out_energy, out_force = self.inverse_transform(
atomic_number=[graph[atoms] for graph in dataset],
y=([graph[energy] for graph in dataset], [graph[force] for graph in dataset]),
copy=copy,
)
for graph, graph_energy, graph_force in zip(dataset, out_energy, out_force):
graph[energy] = graph_energy
graph[force] = graph_force
return dataset
# noinspection PyPep8Naming
[docs] def fit_transform_dataset(self, dataset: List[Dict[str, np.ndarray]], copy: bool = True, copy_dataset: bool = False,
**fit_params) -> List[Dict[str, np.ndarray]]:
r"""Fit and transform to dataset with relevant `X` , `y` information.
Args:
dataset (list): Dataset of type `List[Dict]` containing energies and forces and atomic numbers.
copy (bool): Whether to copy dataset. Default is True.
copy_dataset (bool): Whether to copy full dataset. Default is False.
fit_params: Fit parameters handed to :obj:`fit()`
Returns:
dataset: Transformed dataset.
"""
self.fit_dataset(dataset=dataset, **fit_params)
return self.transform_dataset(dataset=dataset, copy=copy, copy_dataset=copy_dataset)