本文整理汇总了Python中rdkit.Chem.SDMolSupplier方法的典型用法代码示例。如果您正苦于以下问题:Python Chem.SDMolSupplier方法的具体用法?Python Chem.SDMolSupplier怎么用?Python Chem.SDMolSupplier使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rdkit.Chem
的用法示例。
在下文中一共展示了Chem.SDMolSupplier方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ReadMolFromSDF
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def ReadMolFromSDF(filename=""):
"""
Read a set of molecules by SDF file format.
Note: the output of this function is a set of molecular objects.
You need to use for statement to call each object.
Usage:
res=ReadMolFromSDF(filename)
Input: filename is a file name with path.
Output: res is a set of molecular object.
"""
molset = Chem.SDMolSupplier(filename)
return molset
示例2: rotate_molecule
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def rotate_molecule(path, target_path, count=10):
# Load dataset
mols = Chem.SDMolSupplier(path)
rotated_mols = []
print("Loaded {} Molecules from {}".format(len(mols), path))
print("Rotating Molecules...")
for mol in mols:
for _ in range(count):
for atom in mol.GetAtoms():
atom_idx = atom.GetIdx()
pos = list(mol.GetConformer().GetAtomPosition(atom_idx))
pos_rotated = np.matmul(random_rotation_matrix(), pos)
mol.GetConformer().SetAtomPosition(atom_idx, pos_rotated)
rotated_mols.append(mol)
w = Chem.SDWriter(target_path)
for m in rotated_mols:
if m is not None:
w.write(m)
print("Saved {} Molecules to {}".format(len(rotated_mols), target_path))
示例3: draw_confusion_matrix
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def draw_confusion_matrix(dataset, model, set_trial=None, filename="test_results.sdf"):
path = find_average_trial(dataset, model, metric="test_pr") if set_trial is None \
else "../result/{}/{}/{}/".format(model, dataset, set_trial)
# Load true, pred value
true_y, pred_y = [], []
mols = Chem.SDMolSupplier(path + filename)
for mol in mols:
true_y.append(float(mol.GetProp("true")))
pred_y.append(float(mol.GetProp("pred")))
true_y = np.array(true_y, dtype=float)
pred_y = np.array(pred_y, dtype=float).round()
# Get precision and recall
confusion = confusion_matrix(true_y, pred_y)
tn, fp, fn, tp = confusion.ravel()
print("tn: {}, fp: {}, fn: {}, tp: {}".format(tn, fp, fn, tp))
示例4: load_sdf_files
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def load_sdf_files(input_files, clean_mols):
"""Load SDF file into dataframe."""
dataframes = []
for input_file in input_files:
# Tasks are stored in .sdf.csv file
raw_df = next(load_csv_files([input_file + ".csv"], shard_size=None))
# Structures are stored in .sdf file
print("Reading structures from %s." % input_file)
suppl = Chem.SDMolSupplier(str(input_file), clean_mols, False, False)
df_rows = []
for ind, mol in enumerate(suppl):
if mol is not None:
smiles = Chem.MolToSmiles(mol)
df_rows.append([ind, smiles, mol])
mol_df = pd.DataFrame(df_rows, columns=('mol_id', 'smiles', 'mol'))
dataframes.append(pd.concat([mol_df, raw_df], axis=1, join='inner'))
return dataframes
示例5: extract_total_num
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def extract_total_num(self, filepath):
"""Extracts total number of data which can be parsed
We can use this method to determine the value fed to `target_index`
option of `parse` method. For example, if we want to extract input
feature from 10% of whole dataset, we need to know how many samples
are in a file. The returned value of this method may not to be same as
the final dataset size.
Args:
filepath (str): file path of to check the total number.
Returns (int): total number of dataset can be parsed.
"""
mol_supplier = Chem.SDMolSupplier(filepath)
return len(mol_supplier)
示例6: importQuerySDF
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def importQuerySDF(in_file):
outfp = []
outmol = []
query = Chem.SDMolSupplier(in_file)
for idx, m in enumerate(suppl):
sys.stdout.write(' Importing SDF file. Compound number: %s\r' % idx)
sys.stdout.flush()
try:
if not m: raise SdfNoneMolError('None mol')
smi, fp, mol = calcFingerprints(m,qtype='sdf')
outfp.append(fp)
outmol.append(mol)
except SdfNoneMolError: print ' SDF parse error (compound index: ' + str(idx) + ')'
print
return np.array(outfp,dtype=np.uint8),outmol
#unzip a pkl model
示例7: importQuerySDF
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def importQuerySDF(in_file):
outfp = []
outid= []
outmol = []
query = Chem.SDMolSupplier(in_file)
for idx, m in enumerate(suppl):
sys.stdout.write(' Importing SDF file. Compound number: %s\r' % idx)
sys.stdout.flush()
try:
if not m: raise SdfNoneMolError('None mol')
smi, fp, mol = calcFingerprints(m,qtype='sdf')
try: outid.append(m.GetProp('_Name'))
except KeyError: outid.append(smi)
outfp.append(fp)
outmol.append(mol)
except SdfNoneMolError: print ' SDF parse error (compound index: ' + str(idx) + ')'
print
return np.array(outfp,dtype=np.uint8),outmol,outid
#unzip a pkl model
示例8: replace_dataset
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def replace_dataset(self, path, subset="test", target_name="target"):
x, c, y = [], [], []
mols = Chem.SDMolSupplier(path)
for mol in mols:
if mol is not None:
# Multitask
if type(target_name) is list:
y.append([float(mol.GetProp(t)) if t in mol.GetPropNames() else -1 for t in target_name])
self.outputs = len(self.target_name)
# Singletask
elif target_name in mol.GetPropNames():
_y = float(mol.GetProp(target_name))
if _y == -1:
continue
else:
y.append(_y)
else:
continue
x.append(mol)
c.append(mol.GetConformer().GetPositions())
# Normalize
x = np.array(x)
c = np.array(c)
y = (np.array(y) - self.mean) / self.std
self.x[subset] = x
self.c[subset] = c
self.y[subset] = y.astype(int) if self.task != "regression" else y
示例9: find_confusion
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def find_confusion(dataset, base_path):
for i in range(1, 11):
path = base_path + "trial_{}/".format(i)
# Load true, pred value
true_y, pred_y, diff_y = [], [], []
mols = Chem.SDMolSupplier(path + "test.sdf")
for mol in mols:
diff_y.append(float(mol.GetProp("true")) - float(mol.GetProp("pred")))
diff_y = np.array(diff_y, dtype=float)
# Find largest, smallest error molecules
idx = np.argsort(diff_y)
top_1 = mols[int(idx[-1])]
top_2 = mols[int(idx[-2])]
btm_1 = mols[int(idx[0])]
btm_2 = mols[int(idx[1])]
best_idx = np.argsort(np.abs(diff_y))
best = mols[int(best_idx[0])]
# Save example molecules
writer = Chem.SDWriter(path + "confusion_examples_" + dataset + "_trial" + str(i) + ".sdf")
for mol in [top_1, top_2, btm_1, btm_2, best]:
writer.write(mol)
示例10: load_sdf_files
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def load_sdf_files(input_files, clean_mols, tasks=[]):
"""Load SDF file into dataframe."""
from rdkit import Chem
dataframes = []
for input_file in input_files:
# Tasks are either in .sdf.csv file or in the .sdf file itself
has_csv = os.path.isfile(input_file + ".csv")
# Structures are stored in .sdf file
print("Reading structures from %s." % input_file)
suppl = Chem.SDMolSupplier(str(input_file), clean_mols, False, False)
df_rows = []
for ind, mol in enumerate(suppl):
if mol is None:
continue
smiles = Chem.MolToSmiles(mol)
df_row = [ind, smiles, mol]
if not has_csv: # Get task targets from .sdf file
for task in tasks:
df_row.append(mol.GetProp(str(task)))
df_rows.append(df_row)
if has_csv:
mol_df = pd.DataFrame(df_rows, columns=('mol_id', 'smiles', 'mol'))
raw_df = next(load_csv_files([input_file + ".csv"], shard_size=None))
dataframes.append(pd.concat([mol_df, raw_df], axis=1, join='inner'))
else:
mol_df = pd.DataFrame(
df_rows, columns=('mol_id', 'smiles', 'mol') + tuple(tasks))
dataframes.append(mol_df)
return dataframes
示例11: readstring
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def readstring(format, string, **kwargs):
"""Read in a molecule from a string.
Required parameters:
format - see the informats variable for a list of available
input formats
string
Example:
>>> input = "C1=CC=CS1"
>>> mymol = readstring("smi", input)
>>> len(mymol.atoms)
5
"""
string = str(string)
format = format.lower()
if format in ["mol", "sdf"]:
supplier = Chem.SDMolSupplier()
supplier.SetData(string)
mol = next(supplier)
del supplier
elif format == "mol2":
mol = Chem.MolFromMol2Block(string, **kwargs)
elif format == "pdb":
mol = MolFromPDBBlock(string, **kwargs)
elif format == 'pdbqt':
mol = MolFromPDBQTBlock(string, **kwargs)
elif format == "smi":
s = string.strip().split('\n')[0].strip().split()
mol = Chem.MolFromSmiles(s[0], **kwargs)
if mol:
mol.SetProp("_Name", ' '.join(s[1:]))
elif format == 'inchi' and Chem.INCHI_AVAILABLE:
mol = Chem.inchi.MolFromInchi(string, **kwargs)
else:
raise ValueError("%s is not a recognised RDKit format" % format)
return Molecule(mol)
示例12: get_mol_list_from_sdf
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def get_mol_list_from_sdf(sdf_fname):
"""Reads a sdf file and returns a list of molecules.
Note: rdkit's Chem.SDMolSupplier only accepts filenames as inputs. As such
this code only supports local filesystem name environments.
Args:
sdf_fname: Path to sdf file.
Returns:
List of rdkit.Mol objects.
Raises:
ValueError if a molblock in the SDF cannot be parsed.
"""
suppl = Chem.SDMolSupplier(sdf_fname)
mols = []
for idx, mol in enumerate(suppl):
if mol is not None:
mols.append(mol)
else:
fail_sdf_block = suppl.GetItemText(idx)
raise ValueError("Unable to parse the following mol block %s" %
fail_sdf_block)
return mols
示例13: FragmentSanitize
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def FragmentSanitize(tempSDFPath):
try:
suppl2 = Chem.SDMolSupplier(tempSDFPath,sanitize=True)
newmol2=Chem.FragmentOnBRICSBonds(suppl2[0])
mfl=Chem.GetMolFrags(newmol2,asMols=True,sanitizeFrags=False)
#print('Good True')
return mfl
except:
#print('Not good for true')
raise RDKitError(1)
示例14: converter
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def converter(path, target_path, name, target_name, process=20):
# Load dataset
print("Loading Dataset...")
if ".csv" in path:
x, y = load_csv(path, name, target_name)
mols, props = [], []
for smi, prop in zip(x, y):
mol = Chem.MolFromSmiles(smi)
if mol is not None:
mols.append(mol)
props.append(prop)
mol_idx = list(range(len(mols)))
elif ".sdf" in path:
mols = Chem.SDMolSupplier(path)
props = []
for mol in mols:
props.append(mol.GetProp(target_name))
mol_idx = list(range(len(mols)))
else:
raise ValueError("Unsupported file type.")
print("Loaded {} Molecules from {}".format(len(mols), path))
# Optimize coordinate using multiprocessing
print("Optimizing Conformers...")
pool = mp.Pool(process)
results = pool.starmap(optimize_conformer, zip(mol_idx, mols, props))
# Collect results
mol_list, prop_list = [], []
for mol, prop in results:
mol_list.append(mol)
prop_list.append(prop)
# Remove None and add properties
mol_list_filtered = []
for mol, prop in zip(mol_list, prop_list):
if mol is not None:
mol.SetProp("target", str(prop))
mol_list_filtered.append(mol)
print("{} Molecules Optimized".format(len(mol_list_filtered)))
# Save molecules
print("Saving File...")
w = Chem.SDWriter(target_path)
for m in mol_list_filtered:
w.write(m)
print("Saved {} Molecules to {}".format(len(mol_list_filtered), target_path))
示例15: generate
# 需要导入模块: from rdkit import Chem [as 别名]
# 或者: from rdkit.Chem import SDMolSupplier [as 别名]
def generate(self, filename, add_h=False, filters=lambda x: True, size=None, validation=0.1, test=0.1):
self.log('Extracting {}..'.format(filename))
if filename.endswith('.sdf'):
self.data = list(filter(lambda x: x is not None, Chem.SDMolSupplier(filename)))
elif filename.endswith('.smi'):
self.data = [Chem.MolFromSmiles(line) for line in open(filename, 'r').readlines()]
self.data = list(map(Chem.AddHs, self.data)) if add_h else self.data
self.data = list(filter(filters, self.data))
self.data = self.data[:size]
self.log('Extracted {} out of {} molecules {}adding Hydrogen!'.format(len(self.data),
len(Chem.SDMolSupplier(filename)),
'' if add_h else 'not '))
self._generate_encoders_decoders()
self._generate_AX()
# it contains the all the molecules stored as rdkit.Chem objects
self.data = np.array(self.data)
# it contains the all the molecules stored as SMILES strings
self.smiles = np.array(self.smiles)
# a (N, L) matrix where N is the length of the dataset and each L-dim vector contains the
# indices corresponding to a SMILE sequences with padding wrt the max length of the longest
# SMILES sequence in the dataset (see self._genS)
self.data_S = np.stack(self.data_S)
# a (N, 9, 9) tensor where N is the length of the dataset and each 9x9 matrix contains the
# indices of the positions of the ones in the one-hot representation of the adjacency tensor
# (see self._genA)
self.data_A = np.stack(self.data_A)
# a (N, 9) matrix where N is the length of the dataset and each 9-dim vector contains the
# indices of the positions of the ones in the one-hot representation of the annotation matrix
# (see self._genX)
self.data_X = np.stack(self.data_X)
# a (N, 9) matrix where N is the length of the dataset and each 9-dim vector contains the
# diagonal of the correspondent adjacency matrix
self.data_D = np.stack(self.data_D)
# a (N, F) matrix where N is the length of the dataset and each F vector contains features
# of the correspondent molecule (see self._genF)
self.data_F = np.stack(self.data_F)
# a (N, 9) matrix where N is the length of the dataset and each 9-dim vector contains the
# eigenvalues of the correspondent Laplacian matrix
self.data_Le = np.stack(self.data_Le)
# a (N, 9, 9) matrix where N is the length of the dataset and each 9x9 matrix contains the
# eigenvectors of the correspondent Laplacian matrix
self.data_Lv = np.stack(self.data_Lv)
self.vertexes = self.data_F.shape[-2]
self.features = self.data_F.shape[-1]
self._generate_train_validation_test(validation, test)