本文整理汇总了Python中rdkit.Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect方法的典型用法代码示例。如果您正苦于以下问题:Python rdMolDescriptors.GetMorganFingerprintAsBitVect方法的具体用法?Python rdMolDescriptors.GetMorganFingerprintAsBitVect怎么用?Python rdMolDescriptors.GetMorganFingerprintAsBitVect使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rdkit.Chem.rdMolDescriptors
的用法示例。
在下文中一共展示了rdMolDescriptors.GetMorganFingerprintAsBitVect方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reward_target_molecule_similarity
# 需要导入模块: from rdkit.Chem import rdMolDescriptors [as 别名]
# 或者: from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect [as 别名]
def reward_target_molecule_similarity(mol, target, radius=2, nBits=2048,
useChirality=True):
"""
Reward for a target molecule similarity, based on tanimoto similarity
between the ECFP fingerprints of the x molecule and target molecule
:param mol: rdkit mol object
:param target: rdkit mol object
:return: float, [0.0, 1.0]
"""
x = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=radius,
nBits=nBits,
useChirality=useChirality)
target = rdMolDescriptors.GetMorganFingerprintAsBitVect(target,
radius=radius,
nBits=nBits,
useChirality=useChirality)
return DataStructs.TanimotoSimilarity(x, target)
### TERMINAL VALUE REWARDS ###
示例2: __init__
# 需要导入模块: from rdkit.Chem import rdMolDescriptors [as 别名]
# 或者: from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect [as 别名]
def __init__(self, mol, radius, nBits=2048, useChirality=False, useBondTypes=True,
useFeatures=False, smiles=None):
self.fingerprint = rdMolDescriptors.GetMorganFingerprintAsBitVect(
mol, radius, nBits=nBits, useChirality=useChirality, useBondTypes=useBondTypes,
useFeatures=useFeatures)
self.fingerprint_array = np.asarray(self.fingerprint)
self.smiles = smiles
示例3: _featurize
# 需要导入模块: from rdkit.Chem import rdMolDescriptors [as 别名]
# 或者: from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect [as 别名]
def _featurize(self, mol):
"""
Calculate circular fingerprint.
Parameters
----------
mol : RDKit Mol
Molecule.
"""
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
if self.sparse:
info = {}
fp = rdMolDescriptors.GetMorganFingerprint(
mol,
self.radius,
useChirality=self.chiral,
useBondTypes=self.bonds,
useFeatures=self.features,
bitInfo=info)
fp = fp.GetNonzeroElements() # convert to a dict
# generate SMILES for fragments
if self.smiles:
fp_smiles = {}
for fragment_id, count in fp.items():
root, radius = info[fragment_id][0]
env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, root)
frag = Chem.PathToSubmol(mol, env)
smiles = Chem.MolToSmiles(frag)
fp_smiles[fragment_id] = {'smiles': smiles, 'count': count}
fp = fp_smiles
else:
fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
mol,
self.radius,
nBits=self.size,
useChirality=self.chiral,
useBondTypes=self.bonds,
useFeatures=self.features)
return fp
示例4: get_highest_similarity
# 需要导入模块: from rdkit.Chem import rdMolDescriptors [as 别名]
# 或者: from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect [as 别名]
def get_highest_similarity(input_file, output_file, comparison_file='../full_toxcast/restructured.csv',
top_compounds_only=True, num_compounds=1500):
df_avg = pd.read_csv(input_file, header=0, index_col=False)
if top_compounds_only:
df_avg = df_avg.head(num_compounds)
smiles_list = df_avg['smiles']
avg_scores = df_avg['avg_score']
# default_mol = Chem.MolFromSmiles('CCCC')
# default_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(default_mol, 2, nBits=1024)
# mol2 = Chem.MolFromSmiles('CCCC')
# fp2 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024)
# sim = DataStructs.FingerprintSimilarity(default_fp, fp2)
df_comparison = pd.read_csv(comparison_file, header=0, index_col=False)
#df_comparison = df_comparison.head(100)
comparison_smiles_list = df_comparison['smiles']
comparison_fp_list = []
similarity_list = []
for c_smiles in comparison_smiles_list:
comp_mol = Chem.MolFromSmiles(c_smiles)
#comp_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(comp_mol, 2, nBits=1024)
comp_fp = FingerprintMols.FingerprintMol(comp_mol)
comparison_fp_list.append(comp_fp)
for i, smiles in enumerate(smiles_list):
mol_to_test = Chem.MolFromSmiles(smiles)
#fp_to_test = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol_to_test, 2, nBits=1024)
fp_to_test = FingerprintMols.FingerprintMol(mol_to_test)
similarity_list.append(get_highest_similarity_for_mol(fp_to_test, comparison_fp_list))
if i%500 == 0:
print(i)
with open(output_file, 'w', newline='') as csvfile:
fieldnames = ['smiles', 'avg_score', 'max_similarity']
writer = csv.DictWriter(csvfile, fieldnames = fieldnames)
writer.writeheader()
for i, smiles in enumerate(smiles_list):
out_line = {'smiles': smiles, 'avg_score': avg_scores[i],
'max_similarity': similarity_list[i]}
writer.writerow(out_line)
示例5: get_input_features
# 需要导入模块: from rdkit.Chem import rdMolDescriptors [as 别名]
# 或者: from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect [as 别名]
def get_input_features(self, mol):
try:
fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol,
self.radius)
except Exception as e:
logger = getLogger(__name__)
logger.debug('exception caught at ECFPPreprocessor:', e)
# Extracting feature failed
raise MolFeatureExtractionError
# TODO(Nakago): Test it.
return numpy.asarray(fp, numpy.float32)
示例6: __represent
# 需要导入模块: from rdkit.Chem import rdMolDescriptors [as 别名]
# 或者: from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect [as 别名]
def __represent(self, smiles):
# The descriptor must be a binary Morgan fingerprint with radius 2 and 1024 bits.
mol = Chem.MolFromSmiles(smiles.strip())
if mol is None:
msg = '%s is not a valid SMILES representation'%smiles
raise ValueError(msg)
else:
return np.array(GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024))
示例7: featurize
# 需要导入模块: from rdkit.Chem import rdMolDescriptors [as 别名]
# 或者: from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect [as 别名]
def featurize(self, x):
if self.input_type == 'smiles':
x_ = x
x = Chem.MolFromSmiles(x)
if x is None:
raise ValueError('can not convert Mol from SMILES %s' % x_)
if self.input_type == 'any':
if not isinstance(x, Chem.rdchem.Mol):
x_ = x
x = Chem.MolFromSmiles(x)
if x is None:
raise ValueError('can not convert Mol from SMILES %s' % x_)
return list(rdMol.GetMorganFingerprintAsBitVect(x, self.radius, nBits=self.n_bits, useFeatures=True))
示例8: split
# 需要导入模块: from rdkit.Chem import rdMolDescriptors [as 别名]
# 或者: from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect [as 别名]
def split(self,
dataset,
seed=None,
frac_train=.8,
frac_valid=.1,
frac_test=.1,
log_every_n=None):
"""
Splits internal compounds randomly into train/validation/test.
"""
np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.)
if seed is None:
seed = random.randint(0, 2**30)
np.random.seed(seed)
num_datapoints = len(dataset)
train_cutoff = int(frac_train * num_datapoints)
valid_cutoff = int((frac_train + frac_valid) * num_datapoints)
num_train = train_cutoff
num_valid = valid_cutoff - train_cutoff
num_test = num_datapoints - valid_cutoff
all_mols = []
for ind, smiles in enumerate(dataset.ids):
all_mols.append(Chem.MolFromSmiles(smiles))
fps = [AllChem.GetMorganFingerprintAsBitVect(x, 2, 1024) for x in all_mols]
def distance(i, j):
return 1 - DataStructs.DiceSimilarity(fps[i], fps[j])
picker = MaxMinPicker()
testIndices = picker.LazyPick(
distFunc=distance,
poolSize=num_datapoints,
pickSize=num_test,
seed=seed)
validTestIndices = picker.LazyPick(
distFunc=distance,
poolSize=num_datapoints,
pickSize=num_valid + num_test,
firstPicks=testIndices,
seed=seed)
allSet = set(range(num_datapoints))
testSet = set(testIndices)
validSet = set(validTestIndices) - testSet
trainSet = allSet - testSet - validSet
assert len(testSet & validSet) == 0
assert len(testSet & trainSet) == 0
assert len(validSet & trainSet) == 0
assert (validSet | trainSet | testSet) == allSet
return sorted(list(trainSet)), sorted(list(validSet)), sorted(list(testSet))