Source code for kgcnn.molecule.base

import logging
import numpy as np

# Module logger
logging.basicConfig()
module_logger = logging.getLogger(__name__)
module_logger.setLevel(logging.INFO)


[docs]class MolGraphInterface: r"""The `MolGraphInterface` defines the base class interface to extract a molecular graph. The method implementation to generate a molecule-instance from smiles etc. can be obtained from different backends like `RDkit` . The mol-instance of a chemical informatics package like `RDkit` is treated via composition. The interface is designed to extract a graph from a mol instance, not to make a mol object from a graph. """
[docs] def __init__(self, mol=None, make_directed: bool = False): """Set the mol attribute for composition. This mol instances will be the backend molecule class. Args: mol: Instance of a molecule from chemical informatics package. make_directed (bool): Whether the edges are directed. Default is False. """ self.mol = mol self._make_directed = make_directed
[docs] def add_hs(self, **kwargs): """Add hydrogen to molecule instance.""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def remove_hs(self, **kwargs): """Remove hydrogen from molecule instance.""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def make_conformer(self, **kwargs): """Generate a conformer guess for molecule instance.""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def optimize_conformer(self, **kwargs): """Optimize conformer of molecule instance.""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def from_smiles(self, smile: str, **kwargs): """Main method to generate a molecule from smiles string representation. Args: smile (str): Smile string representation of a molecule. Returns: self """ raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def to_smiles(self, **kwargs): """Return a smile string representation of the mol instance. Returns: smile (str): Smile string. """ raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def from_mol_block(self, mol_block: str, keep_hs: bool = True, **kwargs): """Set mol-instance from a more extensive string representation containing coordinates and bond information. Args: mol_block (str): Mol-block representation of a molecule. keep_hs (str): Whether to keep hydrogen in mol-block. Default is True. Returns: self """ raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def to_mol_block(self, **kwargs): """Make a more extensive string representation containing coordinates and bond information from self. Returns: mol_block (str): Mol-block representation of a molecule. """ raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def clean(self, **kwargs): raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def compute_partial_charges(self, method="gasteiger", **kwargs): raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
@property def node_number(self): """Return list of node numbers which is the atomic number of atoms in the molecule""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.") @property def node_symbol(self): """Return a list of atomic symbols of the molecule.""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.") @property def node_coordinates(self): """Return a list of atomic coordinates of the molecule.""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.") @property def edge_indices(self): """Return a list of edge indices of the molecule.""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.") @property def edge_number(self): """Return a list of edge number that represents the bond order.""" raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def edge_attributes(self, properties: list, encoder: dict): """Make edge attributes. Args: properties (list): List of string identifier for a molecular property. Must match backend features. encoder (dict): A dictionary of callable encoder function or class for each string identifier. Returns: list: List of attributes after processed by the encoder. """ raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def node_attributes(self, properties: list, encoder: dict): """Make node attributes. Args: properties (list): List of string identifier for a molecular property. Must match backend features. encoder (dict): A dictionary of callable encoder function or class for each string identifier. Returns: list: List of attributes after processed by the encoder. """ raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] def graph_attributes(self, properties: list, encoder: dict): """Make graph attributes. Args: properties (list): List of string identifier for a molecular property. Must match backend features. encoder (dict): A dictionary of callable encoder function or class for each string identifier. Returns: list: List of attributes after processed by the encoder. """ raise NotImplementedError("Method for `MolGraphInterface` must be implemented in sub-class.")
[docs] @staticmethod def _check_encoder(encoder: dict, possible_keys: list, raise_error: bool = False): """Verify and check if encoder dictionary inputs is within possible properties. If a key has to be removed, a warning is issued. Args: encoder (dict): Dictionary of callable encoder function or class. Key matches properties. possible_keys (list): List of allowed keys for encoder. raise_error (bool): Whether to raise an error on wrong identifier. Returns: dict: Cleaned encoder dictionary. """ if encoder is None: return {} # Check if encoder is given for unknown identifier. # Encoder is only intended for string-based properties. encoder_unknown = [x for x in encoder if x not in possible_keys] if len(encoder_unknown) > 0: msg = "Encoder property not known %s" % encoder_unknown if raise_error: module_logger.error(msg) raise ValueError(msg) else: module_logger.warning(msg) encoder = {key: value for key, value in encoder.items() if key not in encoder_unknown} return encoder
[docs] @staticmethod def _check_properties_list(properties: list, possible_properties: list, attribute_name: str, raise_error: bool = False): """Verify and check if list of string identifier match expected properties. If an identifier has to be removed, a warning is issued. Non-string properties i.e. class or functions to extract properties are ignored. Args: properties (list): List of requested string identifier. Key matches properties. possible_properties (list): List of allowed string identifier for properties. attribute_name(str): A name for the properties. E.g. bond, node or graph. raise_error (bool): Whether to raise an error on wrong identifier. Returns: dict: Cleaned encoder dictionary. """ if properties is None: return [] # Check if string identifier match the list of possible keys. props_unknown = [] for x in properties: if isinstance(x, str): if x not in possible_properties: props_unknown.append(x) if len(props_unknown) > 0: msg = "%s properties are not defined, ignore following keys: %s" % (attribute_name, props_unknown) if raise_error: module_logger.error(msg) raise ValueError(msg) else: module_logger.warning(msg) props = [] for x in properties: if isinstance(x, str): if x in possible_properties: props.append(x) else: props.append(x) return props
@staticmethod def _sort_bonds(bond_idx, bond_info=None): # Sort directed bonds bond_idx = np.array(bond_idx, dtype="int64") bonds1, bonds2 = None, None if len(bond_idx) > 0: order1 = np.argsort(bond_idx[:, 1], axis=0, kind='mergesort') # stable! ind1 = bond_idx[order1] if bond_info: bonds1 = [bond_info[i] for i in order1] order2 = np.argsort(ind1[:, 0], axis=0, kind='mergesort') # stable! ind2 = ind1[order2] if bond_info: bonds2 = [bonds1[i] for i in order2] # Take the sorted bonds bond_idx = ind2 bond_info = bonds2 return bond_idx, bond_info