Source code for kgcnn.molecule.io

import logging


[docs]def parse_list_to_xyz_str(mol: list, comment: str = "", number_coordinates: int = None): """Convert list of atom and coordinates list into xyz-string. Args: mol (list): Tuple or list of `[['C', 'H', ...], [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], ... ]]`. comment (str): Comment for comment line in xyz string. Default is "". number_coordinates (int): Number of allowed coordinates. Returns: str: Information in xyz-string format. """ atoms = mol[0] coordinates = mol[1] if len(atoms) != len(coordinates): raise ValueError("Number of atoms does not match number of coordinates for xyz string.") xyz_str = str(int(len(atoms))) + "\n" if "\n" in comment: raise ValueError("Line break must not be in the comment line for xyz string.") xyz_str = xyz_str + comment + "\n" for a_iter, c_iter in zip(atoms, coordinates): _at_str = str(a_iter) if number_coordinates is not None: c_iter = c_iter[:number_coordinates] _c_format_str = " {:.10f}" * len(c_iter) + "\n" xyz_str = xyz_str + _at_str + _c_format_str.format(*c_iter) return xyz_str
[docs]def write_list_to_xyz_file(filepath: str, mol_list: list): """Write a list of nested list of atom and coordinates into xyz-string. Uses :obj:`parse_list_to_xyz_str`. Args: filepath (str): Full path to file including name. mol_list (list): List of molecules, which is a list of pairs of atoms and coordinates of `[[['C', 'H', ... ], [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], ... ]], ... ]`. """ with open(filepath, "w+") as file: for x in mol_list: xyz_str = parse_list_to_xyz_str(x) file.write(xyz_str)
[docs]def parse_mol_str(mol_str: str): """Parse MDL mol table string into nested list. Only supports V2000 format and CTab. Better rely on OpenBabel to do this. This function was a temporary solution. Args: mol_str (str): String of mol block. Returns: list: [title, program, comment, counts, atoms, bonds, properties] """ empty_return = ["", "", "", [], [], [], []] if len(mol_str) == 0: logging.error("Received empty MLD mol-block string. Nothing to parse. Return empty list.") return empty_return lines = mol_str.split("\n") if len(lines) < 4: logging.error("Could not find counts line. Invalid format. Can not parse string. Return empty list.") return empty_return title = lines[0] program = lines[1] # IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR comment = lines[2] version = lines[3][-6:].strip() if version == "V2000": # counts has aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv # or shorter but should have version of len=5 at the end counts = [lines[3][i:i + 3].strip() for i in range(0, len(lines[3][:-6]), 3)] + [version] na = int(counts[0]) nb = int(counts[1]) nl = int(counts[2]) ns = int(counts[5]) if ns != 0 or nl != 0: logging.warning("Not supporting atom lists (deprecated) or stext entries for this function.") atoms = [] for a in lines[4:(na + 4)]: # xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee # noinspection PyTypeChecker atoms.append([a[0:10].strip(), a[10:20].strip(), a[20:30].strip(), a[30:34].strip(), a[34:36].strip(), a[36:39].strip(), a[39:42].strip(), a[42:45].strip(), a[45:48].strip(), a[48:51].strip(), a[51:54].strip(), a[54:57].strip(), a[57:60].strip(), a[60:63].strip(), a[63:66].strip(), a[66:69].strip()]) # bond block bonds = [] for b in lines[4 + na:4 + na + nb]: # 111222tttsssxxxrrrccc # noinspection PyTypeChecker bonds.append([b[i:i+3].strip() for i in range(0, len(b), 3)]) # Properties block properties = [] for p in lines[4 + na + nb:]: if p == "M END": break if "M" in p: properties.append(p) else: raise NotImplementedError("Can not parse mol V3000 or higher.") return [title, program, comment, counts, atoms, bonds, properties]
[docs]def read_xyz_file(file_path, delimiter: str = None, line_by_line=False): """Simple python script to read xyz-file and parse into a nested python list. Always returns a list with the geometries in xyz file. Args: file_path (str): Full path to xyz-file. delimiter (str): Delimiter for xyz separation. Default is ' '. line_by_line (bool): Whether to read XYZ file line by line. Returns: list: Nested coordinates from xyz-file. """ mol_list = [] comment_list = [] # open file infile = open(file_path, "r") if line_by_line: lines = infile # File object else: lines = infile.readlines() # list of lines num = 0 comment = 0 atoms = [] coordinates = [] for line in lines: line_list = line.strip().split(delimiter) line_list = [x.strip() for x in line_list if x != ""] # Remove multiple delimiter if len(line_list) == 1 and num == 0 and comment == 0: # Start new conformer and set line counts to read. num = int(line_list[0]) comment = 1 elif comment > 0: # Comment comes before atom block and must always be read. comment_list.append(str(line)) comment = 0 elif num > 0: if len(line_list) <= 1: logging.error("Expected to read atom-coordinate block but got comment or line count instead.") atoms.append(str(line_list[0]).lower().capitalize()) coordinates.append([float(x) for x in line_list[1:]]) if num == 1: # This was last line for this conformer. Append result and reset current list. mol_list.append([atoms, coordinates]) num = 0 atoms = [] coordinates = [] else: # Finished reading an atom line. num = num - 1 else: logging.warning("Empty line in xyz file for mismatch in atom count found.") # close file infile.close() return mol_list
[docs]def write_mol_block_list_to_sdf(mol_block_list, filepath): """Write a list of mol blocks as string into a SDF file. Args: mol_block_list (list): List of mol blocks as string. filepath (str): File path for SDF file. Returns: None. """ with open(filepath, "w+") as file: for i, mol_block in enumerate(mol_block_list): if mol_block is not None: file.write(mol_block) if i < len(mol_block_list) - 1: file.write("$$$$\n") else: file.write("".join(["\n", " FAIL\n", "\n", " 0 0 0 0 0 0 0 0 0 0 V2000\n", "M END\n"])) if i < len(mol_block_list) - 1: file.write("$$$$\n")
[docs]def read_mol_list_from_sdf_file(filepath, line_by_line=False): """Simple loader to load an SDF file by only splitting. Args: filepath (str): File path for SDF file. line_by_line (bool): Whether to read SDF file line by line. Returns: list: List of mol blocks as string. """ mol_list = [] with open(filepath, "r") as f: if not line_by_line: all_sting = f.read() mol_list = all_sting.split("$$$$\n") else: iter_mol = "" for line in f: if line == "$$$$\n": mol_list.append(iter_mol) iter_mol = "" else: iter_mol = iter_mol + line if iter_mol != "": mol_list.append(iter_mol) # Check if there was tailing $$$$ with nothing to follow. # Split will make empty string at the end, which does not match actual number of mol blocks. if len(mol_list[-1]) == 0: mol_list = mol_list[:-1] return mol_list
[docs]def read_smiles_file(file_path): """Simply python function to read smiles from file. Args: file_path (str): File path for smiles file. Returns: list: List of smiles. """ with open(file_path, "r") as f: smile_list = [line.rstrip() for line in f] return smile_list
[docs]def write_smiles_file(file_path, smile_list): """Simply python function to write smiles to file. Args: file_path (str): File path for smiles file. smile_list (list): List of smiles to write to file. Returns: None """ with open(file_path, "w+") as f: for i, x in enumerate(smile_list): if i == len(smile_list)-1: f.write(x) else: f.write(x + "\n")