本文整理汇总了Python中rdkit.DataStructs.DiceSimilarity方法的典型用法代码示例。如果您正苦于以下问题:Python DataStructs.DiceSimilarity方法的具体用法?Python DataStructs.DiceSimilarity怎么用?Python DataStructs.DiceSimilarity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rdkit.DataStructs
的用法示例。
在下文中一共展示了DataStructs.DiceSimilarity方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: morgan_similarity
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import DiceSimilarity [as 别名]
def morgan_similarity(smiles_1: List[str], smiles_2: List[str], radius: int, sample_rate: float):
"""
Determines the similarity between the morgan fingerprints of two lists of smiles strings.
:param smiles_1: A list of smiles strings.
:param smiles_2: A list of smiles strings.
:param radius: The radius of the morgan fingerprints.
:param sample_rate: Rate at which to sample pairs of molecules for Morgan similarity (to reduce time).
"""
# Compute similarities
similarities = []
num_pairs = len(smiles_1) * len(smiles_2)
# Sample to improve speed
if sample_rate < 1.0:
sample_num_pairs = sample_rate * num_pairs
sample_size = math.ceil(math.sqrt(sample_num_pairs))
sample_smiles_1 = np.random.choice(smiles_1, size=sample_size, replace=True)
sample_smiles_2 = np.random.choice(smiles_2, size=sample_size, replace=True)
else:
sample_smiles_1, sample_smiles_2 = smiles_1, smiles_2
sample_num_pairs = len(sample_smiles_1) * len(sample_smiles_2)
for smile_1, smile_2 in tqdm(product(sample_smiles_1, sample_smiles_2), total=sample_num_pairs):
mol_1, mol_2 = Chem.MolFromSmiles(smile_1), Chem.MolFromSmiles(smile_2)
fp_1, fp_2 = AllChem.GetMorganFingerprint(mol_1, radius), AllChem.GetMorganFingerprint(mol_2, radius)
similarity = DataStructs.DiceSimilarity(fp_1, fp_2)
similarities.append(similarity)
similarities = np.array(similarities)
# Print results
print()
print(f'Average dice similarity = {np.mean(similarities):.4f} +/- {np.std(similarities):.4f}')
print(f'Minimum dice similarity = {np.min(similarities):.4f}')
print(f'Maximum dice similarity = {np.max(similarities):.4f}')
print()
print('Percentiles for dice similarity')
print(' | '.join([f'{i}% = {np.percentile(similarities, i):.4f}' for i in range(0, 101, 10)]))
示例2: split
# 需要导入模块: from rdkit import DataStructs [as 别名]
# 或者: from rdkit.DataStructs import DiceSimilarity [as 别名]
def split(self,
dataset,
seed=None,
frac_train=.8,
frac_valid=.1,
frac_test=.1,
log_every_n=None):
"""
Splits internal compounds randomly into train/validation/test.
"""
np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.)
if seed is None:
seed = random.randint(0, 2**30)
np.random.seed(seed)
num_datapoints = len(dataset)
train_cutoff = int(frac_train * num_datapoints)
valid_cutoff = int((frac_train + frac_valid) * num_datapoints)
num_train = train_cutoff
num_valid = valid_cutoff - train_cutoff
num_test = num_datapoints - valid_cutoff
all_mols = []
for ind, smiles in enumerate(dataset.ids):
all_mols.append(Chem.MolFromSmiles(smiles))
fps = [AllChem.GetMorganFingerprintAsBitVect(x, 2, 1024) for x in all_mols]
def distance(i, j):
return 1 - DataStructs.DiceSimilarity(fps[i], fps[j])
picker = MaxMinPicker()
testIndices = picker.LazyPick(
distFunc=distance,
poolSize=num_datapoints,
pickSize=num_test,
seed=seed)
validTestIndices = picker.LazyPick(
distFunc=distance,
poolSize=num_datapoints,
pickSize=num_valid + num_test,
firstPicks=testIndices,
seed=seed)
allSet = set(range(num_datapoints))
testSet = set(testIndices)
validSet = set(validTestIndices) - testSet
trainSet = allSet - testSet - validSet
assert len(testSet & validSet) == 0
assert len(testSet & trainSet) == 0
assert len(validSet & trainSet) == 0
assert (validSet | trainSet | testSet) == allSet
return sorted(list(trainSet)), sorted(list(validSet)), sorted(list(testSet))