本文整理汇总了Python中rdkit.DataStructs.BulkTanimotoSimilarity方法的典型用法代码示例。如果您正苦于以下问题:Python DataStructs.BulkTanimotoSimilarity方法的具体用法?Python DataStructs.BulkTanimotoSimilarity怎么用?Python DataStructs.BulkTanimotoSimilarity使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rdkit.DataStructs
的用法示例。
在下文中一共展示了DataStructs.BulkTanimotoSimilarity方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: doSimSearch
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def doSimSearch(model_name):
if os.name == 'nt': sep = '\\'
else: sep = '/'
mod = model_name.split(sep)[-1].split('.')[0]
try:
with zipfile.ZipFile(os.path.dirname(os.path.abspath(__file__)) + sep + 'actives' + sep + mod + '.smi.zip', 'r') as zfile:
comps = [i.split('\t') for i in zfile.open(mod + '.smi', 'r').read().splitlines()]
except IOError: return
comps2 = []
afp = []
for comp in comps:
try:
afp.append(calcFingerprints(comp[1]))
comps2.append(comp)
except: pass
ret = []
for i,fp in enumerate(querymatrix):
sims = DataStructs.BulkTanimotoSimilarity(fp,afp)
idx = sims.index(max(sims))
ret.append([sims[idx], mod] + comps2[idx] + [smiles[i]])
return ret
#prediction runner
示例2: calculate_internal_pairwise_similarities
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def calculate_internal_pairwise_similarities(smiles_list: Collection[str]) -> np.array:
"""
Computes the pairwise similarities of the provided list of smiles against itself.
Returns:
Symmetric matrix of pairwise similarities. Diagonal is set to zero.
"""
if len(smiles_list) > 10000:
logger.warning(f'Calculating internal similarity on large set of '
f'SMILES strings ({len(smiles_list)})')
mols = get_mols(smiles_list)
fps = get_fingerprints(mols)
nfps = len(fps)
similarities = np.zeros((nfps, nfps))
for i in range(1, nfps):
sims = DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i])
similarities[i, :i] = sims
similarities[:i, i] = sims
return similarities
示例3: highest_tanimoto_precalc_fps
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def highest_tanimoto_precalc_fps(mol, fps):
"""
Args:
mol: Rdkit molecule
fps: precalculated ECFP4 bitvectors
Returns:
"""
if fps is None or len(fps) == 0:
return 0
fp1 = AllChem.GetMorganFingerprintAsBitVect(mol, 2, 4096)
sims = np.array(DataStructs.BulkTanimotoSimilarity(fp1, fps))
return sims.max()
示例4: doPercentileCalculation
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def doPercentileCalculation(model_name):
global rdkit_mols
#expensive to unzip training file - so only done if smiles requested
if options.ad_smiles:
smiles = get_training_smiles(model_name)
ad_data = getAdData(model_name)
def calcPercentile(rdkit_mol):
sims = DataStructs.BulkTanimotoSimilarity(rdkit_mol,ad_data[:,0])
bias = ad_data[:,2].astype(float)
std_dev = ad_data[:,3].astype(float)
scores = ad_data[:,5].astype(float)
weights = sims / (bias * std_dev)
critical_weight = weights.max()
percentile = percentileofscore(scores,critical_weight)
if options.ad_smiles:
critical_smiles = smiles[np.argmax(weights)]
result = percentile, critical_smiles
else:
result = percentile, None
return result
ret = [calcPercentile(x) for x in rdkit_mols]
return model_name, ret
#prediction runner for percentile calculation
示例5: ClusterFps
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def ClusterFps(fps, cutoff=0.2):
# (ytz): this is directly copypasta'd from Greg Landrum's clustering example.
dists = []
nfps = len(fps)
from rdkit import DataStructs
for i in range(1, nfps):
sims = DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i])
dists.extend([1 - x for x in sims])
from rdkit.ML.Cluster import Butina
cs = Butina.ClusterData(dists, nfps, cutoff, isDistData=True)
return cs
示例6: ClusterFps
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def ClusterFps(fps, cutoff=0.2):
# (ytz): this is directly copypasta'd from Greg Landrum's clustering example.
dists = []
nfps = len(fps)
for i in range(1, nfps):
sims = DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i])
dists.extend([1 - x for x in sims])
cs = Butina.ClusterData(dists, nfps, cutoff, isDistData=True)
return cs
示例7: tanimoto_worker
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def tanimoto_worker(k, fps):
"""Get per-fingerprint Tanimoto distance vector."""
# pylint: disable=no-member
sims = DataStructs.BulkTanimotoSimilarity(fps[k], fps[(k + 1):])
dists_k = [1. - s for s in sims]
return np.array(dists_k), 0
示例8: tanimoto_single
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def tanimoto_single(fp, fps):
"""Get per-fingerprint Tanimoto distance vector."""
# pylint: disable=no-member
sims = DataStructs.BulkTanimotoSimilarity(fp, fps)
dists = [1. - s for s in sims]
return np.array(dists), 0
示例9: __compute_diversity
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def __compute_diversity(mol, fps):
ref_fps = Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, 4, nBits=2048)
dist = DataStructs.BulkTanimotoSimilarity(ref_fps, fps, returnDistance=True)
score = np.mean(dist)
return score
示例10: calculate_pairwise_similarities
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def calculate_pairwise_similarities(smiles_list1: List[str], smiles_list2: List[str]) -> np.array:
"""
Computes the pairwise ECFP4 tanimoto similarity of the two smiles containers.
Returns:
Pairwise similarity matrix as np.array
"""
if len(smiles_list1) > 10000 or len(smiles_list2) > 10000:
logger.warning(f'Calculating similarity between large sets of '
f'SMILES strings ({len(smiles_list1)} x {len(smiles_list2)})')
mols1 = get_mols(smiles_list1)
fps1 = get_fingerprints(mols1)
mols2 = get_mols(smiles_list2)
fps2 = get_fingerprints(mols2)
similarities = []
for fp1 in fps1:
sims = DataStructs.BulkTanimotoSimilarity(fp1, fps2)
similarities.append(sims)
similarities = np.array(similarities)
return similarities
示例11: diversity
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def diversity(smile, fps):
val = 0.0
low_rand_dst = 0.9
mean_div_dst = 0.945
ref_mol = Chem.MolFromSmiles(smile)
ref_fps = Chem.GetMorganFingerprintAsBitVect(ref_mol, 4, nBits=2048)
dist = DataStructs.BulkTanimotoSimilarity(
ref_fps, fps, returnDistance=True)
mean_dist = np.mean(np.array(dist))
val = remap(mean_dist, low_rand_dst, mean_div_dst)
val = np.clip(val, 0.0, 1.0)
return val
#==============
示例12: tanimoto_1d
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def tanimoto_1d(fps):
ds = []
for i in range(1, len(fps)):
ds.extend(DataStructs.BulkTanimotoSimilarity(
fps[i], fps[:i], returnDistance=True))
return ds
示例13: diversity
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import BulkTanimotoSimilarity [as 别名]
def diversity(fake_path, real_path=None, is_active=False):
""" Molecular diversity measurement based on Tanimoto-distance on ECFP6 fingerprints,
including, intra-diversity and inter-diversity.
Arguments:
fake_path (str): the file path of molecules that need to measuring diversity
real_path (str, optional): the file path of molecules as the reference, if it
is provided, the inter-diversity will be calculated; otherwise, the intra-diversity
will be calculated.
is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False)
if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected.
(Default: False)
Returns:
df (DataFrame): the table that contains columns of CANONICAL_SMILES
and diversity value for each molecules
"""
fake = pd.read_table(fake_path)
fake = fake[fake.SCORE > (0.5 if is_active else 0)]
fake = fake.drop_duplicates(subset='CANONICAL_SMILES')
fake_fps, real_fps = [], []
for i, row in fake.iterrows():
mol = Chem.MolFromSmiles(row.CANONICAL_SMILES)
fake_fps.append(AllChem.GetMorganFingerprint(mol, 3))
if real_path:
real = pd.read_table(real_path)
real = real[real.PCHEMBL_VALUE >= (6.5 if is_active else 0)]
for i, row in real.iterrows():
mol = Chem.MolFromSmiles(row.CANONICAL_SMILES)
real_fps.append(AllChem.GetMorganFingerprint(mol, 3))
else:
real_fps = fake_fps
method = np.min if real_path else np.mean
dist = 1 - np.array([method(DataStructs.BulkTanimotoSimilarity(f, real_fps)) for f in fake_fps])
fake['DIST'] = dist
return fake