本文整理汇总了Python中msmbuilder.utils.verboseload函数的典型用法代码示例。如果您正苦于以下问题:Python verboseload函数的具体用法?Python verboseload怎么用?Python verboseload使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了verboseload函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_current_protein_model
def load_current_protein_model(yaml_file, protein, sanity=True):
"""
:param base_dir: Base directory for the project
:param protein: Protein for which to load
:param sanity: Whether or not to run sanity tests
:return: base_dir, mdl_dir,
msm_mdl, tica_mdl,
tica_data, kmeans_mdl,
fixed_assignments for the model currently stored in
mdl_dir and mdl_dir/protein
"""
yaml_file = load_yaml_file(yaml_file)
base_dir = yaml_file["base_dir"]
mdl_dir = yaml_file["mdl_dir"]
prot_mdl_dir = os.path.join(mdl_dir, protein)
# load the project level information first
kmeans_mdl = verboseload(os.path.join(mdl_dir, "kmeans_mdl.pkl"))
tica_mdl = verboseload(os.path.join(mdl_dir, "tica_mdl.pkl"))
# now load the protein level information
tica_data = verboseload(os.path.join(prot_mdl_dir, "tica_data.pkl"))
# need the fixed assignments because otherwise we will have issues
assignments = verboseload(os.path.join(
prot_mdl_dir, "fixed_assignments.pkl"))
msm_mdl = verboseload(os.path.join(prot_mdl_dir, "msm_mdl.pkl"))
# some sanity tests
if sanity:
_sanity_test(base_dir, protein, msm_mdl,
tica_data, kmeans_mdl, assignments)
return base_dir, mdl_dir, msm_mdl, tica_mdl, tica_data, kmeans_mdl, assignments
示例2: featurize_project
def featurize_project(proj_folder,top_folder,featurizer_object,stride,view):
#if already featurized dont bother(should add a warning about this)
if os.path.exists(proj_folder+"/featurized_traj.pkl"):
return verboseload(proj_folder+"/featurized_traj.pkl")
if featurizer_object is None:
featurizer = DihedralFeaturizer(types=['phi', 'psi','chi1'])
else:
try:
featurizer = verboseload(featurizer_object)
except:
sys.exit("Cant Load Featurizer using msmbuilder verboseload")
feature_dict={}
traj_list = glob.glob(proj_folder+"/trajectories/*.dcd")
jobs = [(proj_folder,top_folder,featurizer,traj,stride) for traj in traj_list]
results = view.map_sync(featurize_traj,jobs)
for result in results:
feature_dict[result[0]] = result[1]
verbosedump(feature_dict,proj_folder+"/featurized_traj.pkl")
return feature_dict
示例3: transform_protein_tica
def transform_protein_tica(yaml_file):
mdl_dir = yaml_file["mdl_dir"]
tica_obj_path = os.path.join(mdl_dir, "tica_mdl.pkl")
protein_tica_mdl = verboseload(tica_obj_path)
for protein in yaml_file["protein_list"]:
with enter_protein_data_dir(yaml_file, protein):
print("Transforming protein %s" % protein)
featurized_traj = sorted(glob.glob("./%s/*.jl" %
yaml_file["feature_dir"]), key=keynat)
tica_data = {}
for f in featurized_traj:
featurized_path = verboseload(f)
try:
tica_data[os.path.basename(f)] = \
protein_tica_mdl.partial_transform(featurized_path)
except:
pass
with enter_protein_mdl_dir(yaml_file, protein):
verbosedump(tica_data, 'tica_data.pkl')
print("Done transforming protein %s" % protein)
# dumping the tica_mdl again since the eigenspectrum might have been calculated.
tica_mdl_path = os.path.join(mdl_dir, "tica_mdl.pkl")
verbosedump(protein_tica_mdl, tica_mdl_path)
return
示例4: fit_and_transform
def fit_and_transform(features_directory, model_dir, stride=5, lag_time=10, n_components = 5):
if not os.path.exists(model_dir):
os.makedirs(model_dir)
projected_data_filename = "%s/phi_psi_chi2_allprot_projected.h5" %model_dir
fit_model_filename = "%s/phi_psi_chi2_allprot_tica_coords.h5" %model_dir
#active_pdb_file = "/scratch/users/enf/b2ar_analysis/renamed_topologies/A-00.pdb"
tica_model = tICA(n_components = n_components, lag_time = lag_time)
if not os.path.exists(projected_data_filename):
print("loading feature files")
feature_files = get_trajectory_files(features_directory, ext = ".h5")
pool = mp.Pool(mp.cpu_count())
features = pool.map(load_features, feature_files)
pool.terminate()
if not os.path.exists(fit_model_filename):
print("fitting data to tICA model")
fit_model = tica_model.fit(features)
verbosedump(fit_model, fit_model_filename)
transformed_data = fit_model.transform(features)
verbosedump(transformed_data, projected_data_filename)
else:
print("loading tICA model")
fit_model = verboseload(fit_model_filename)
print("transforming")
transformed_data = fit_model.transform(features)
verbosedump(transformed_data, projected_data_filename)
else:
fit_model = verboseload(fit_model_filename)
transformed_data = verboseload(projected_data_filename)
print fit_model.summarize()
示例5: cos_to_means
def cos_to_means(clusterer_dir, features_dir):
clusterer = verboseload(clusterer_dir)
clusters_map = make_clusters_map(clusterer)
features = verboseload(features_dir)
feature_distances = {}
for i in range(0, len(clusters_map.keys())):
indices = clusters_map[i]
k_mean = clusterer.cluster_centers_[i]
print k_mean
find_cos_partial = partial(find_cos, k_mean=k_mean, features = features)
feature_distances_i = map(find_cos_partial, indices)
feature_distances[i] = feature_distances_i
print(feature_distances[0][0:10])
sorted_map = {}
print(feature_distances.keys())
print(len(feature_distances.keys()))
for i in range(0, len(feature_distances.keys())):
sorted_features = sorted(feature_distances[i], key = lambda x: x[2], reverse = True)
sorted_map[i] = sorted_features
print sorted_map[0][0:10]
return sorted_map
示例6: dist_to_means
def dist_to_means(clusterer_dir, features_dir):
clusterer = verboseload(clusterer_dir)
clusters_map = make_clusters_map(clusterer)
features = verboseload(features_dir)
feature_distances = {}
def find_cos(index, k_mean):
traj = index[0]
frame = index[1]
conformation = features[traj][frame]
a = conformation
b = k_mean
return (traj, frame, np.dot(a,b) / (np.linalg.norm(a) * np.linalg.norm(b)))
for i in range(0, len(clusters_map.keys())):
indices = clusters_map[i]
k_mean = clusterer.cluster_centers_[i]
print k_mean
find_cos_partial = partial(find_cos, k_mean=k_mean)
feature_distances_i = map(find_cos_partial, indices)
feature_distances[i] = feature_distances_i
print(feature_distances[0][0:10])
sorted_map = {}
print(feature_distances.keys())
print(len(feature_distances.keys()))
for i in range(0, len(feature_distances.keys())):
sorted_features = sorted(feature_distances[i], key = lambda x: x[2], reverse = True)
sorted_map[i] = sorted_features
print sorted_map[0][0:10]
return sorted_map
示例7: landmark_ktica
def landmark_ktica(features_dir, combined_features_file=None, feature_ext = ".dataset", use_clusters_as_landmarks=True, clusters_map_file = "",
landmarks_dir = "", nystroem_components=1000, n_components=10, lag_time=5, nystroem_data_filename = "",
fit_model_filename = "", projected_data_filename = "", landmark_subsample=10,
sparse = False, shrinkage = 0.05, wolf = False, rho = 0.01):
'''
features_dir: string, directory where your featurized trajectories are kept.
combined_features_dir: if you have a file containing all featurized trajectories in one file, i.e. as a list of np arrays, this is it.
feature_ext: if instead of a combined file of features they are in separate files, what is the extension of your feature files?
use_clusters_as_landmarks: this is if you are doing a composition of tICA --> clustering --> Nystroem --> tICA. this is what I do.
if true, you need to feed it a json file containing a dictionary that maps cluster name --> list of 2-tuples, where each tuple has
(trajectory_id, frame_number pairs). So this way, instead of choosing landmark points at random in the Nystroem approximation, you
are using regular linear tICA-driven clustering to choose your landmark points more efficiently.
landmarks_dir: directory where you will save the landmarks. this should be a file containing a list of 1d np arrays or a 2d array
nystroem_components: the number of landmarks to use.
n_components: the number of ktICA components to compute.
lag_time: lag time of tICA
nystroem_data_filename: where you will save Nystroem object
fit_model_filename: the filename of the ktICA object to save.
projected_data_filename: where you will save the features projected with kernel tICA
landmark_subsample= how frequently to subsample the landmarks if you are doing use_clusters_as_landmarks.
sparse: set to False.
shrinkage: same as gamma in old version of tICA. you might want to mess with this.
wolf = False: keep this as true unless you're using Robert's branch of msmbuilder
rho = Ignore this.
'''
if not os.path.exists(nystroem_data_filename):
if combined_features_dir is not None:
features = verboseload(combined_features_file)
else:
features = load_file_list(get_trajectory_files(features_dir, ext = feature_ext))
if os.path.exists(landmarks_dir):
landmarks = verboseload(landmarks_dir)
print(np.shape(landmarks))
else:
if use_clusters_as_landmarks:
with open(clusters_map_file) as f:
clusters_map = json.load(f)
clusters_map = {int(k):v for k,v in clusters_map.items()}
landmarks = []
for cluster_id,sample_list in clusters_map.items():
for sample in sample_list:
traj = sample[0]
frame = sample[1]
landmark = features[traj][frame]
landmarks.append(landmark)
landmarks = [landmarks[i] for i in range(0,np.shape(landmarks)[0]) if i%landmark_subsample==0] #%landmark_subsample == 0]
verbosedump(landmarks, landmarks_dir)
else:
n = np.shape(features)[0]
indices = np.random.choice(n, nystroem_components)
features_concatenated = np.concatenate(features)
landmarks = features_concatenated[indices,:]
verbosedump(landmarks, landmarks_dir)
ktica(features, landmarks, projected_data_filename, nystroem_data_filename, fit_model_filename, sparse, shrinkage, wolf, rho)
示例8: dist_to_means
def dist_to_means(clusterer_dir, features_dir, n_samples = False, n_components = False, tica_coords_csv = False, kmeans_csv = False):
clusterer = verboseload(clusterer_dir)
clusters_map = make_clusters_map(clusterer)
try:
features = verboseload(features_dir)
except:
features = load_dataset(features_dir)
feature_distances = {}
for i in range(0, len(clusters_map.keys())):
indices = clusters_map[i]
k_mean = clusterer.cluster_centers_[i]
print k_mean
find_dist_partial = partial(find_dist, k_mean=k_mean, features = features)
feature_distances_i = map(find_dist_partial, indices)
feature_distances[i] = feature_distances_i
print(feature_distances[0][0:10])
sorted_map = {}
print(feature_distances.keys())
print(len(feature_distances.keys()))
for i in range(0, len(feature_distances.keys())):
sorted_features = sorted(feature_distances[i], key = lambda x: x[2], reverse = False)
sorted_map[i] = sorted_features
if n_samples is not False and n_components is not False and tica_coords_csv is not False:
tica_coords_map = {}
for cluster_id in sorted_map.keys():
for j in range(0, n_samples):
sample = "cluster%d_sample%d" %(cluster_id, j)
sample_tuple = sorted_map[cluster_id][j][0:2]
sample_coords = features[sample_tuple[0]][sample_tuple[1]]
tica_coords_map[sample] = sample_coords
titles = ["sample"]
for k in range(0, n_components):
titles.append("component_%d" %k)
print(tica_coords_map.keys()[0])
print(tica_coords_map[tica_coords_map.keys()[0]])
write_map_to_csv(tica_coords_csv, tica_coords_map, titles)
if kmeans_csv is not False:
kmeans_map = {}
for cluster in range(0,clusterer.n_clusters):
k_mean = clusterer.cluster_centers_[cluster]
cluster_id = "cluster%d" %cluster
kmeans_map[cluster_id] = k_mean
titles = ["cluster"]
for k in range(0, n_components):
titles.append("component_%d" %k)
write_map_to_csv(kmeans_csv, kmeans_map, titles)
print sorted_map[0][0:10]
return sorted_map
示例9: plot_col
def plot_col(transformed_data_file, figure_directory, colors_file):
transformed_data = verboseload(transformed_data_file)
trajs = np.concatenate(transformed_data)
colors = np.concatenate(verboseload(colors_file))
sc = plt.scatter(trajs[:,0], trajs[:,1], c=colors, s=50, cmap = mpl.cm.RdYlBu_r)
plt.colorbar(sc)
plt.show()
pp = PdfPages(figure_directory)
pp.savefig()
pp.close()
return
示例10: landmark_ktica_ticaTraj
def landmark_ktica_ticaTraj(tica_dir, clusterer_dir, ktica_dir, clusters_map_file = "", landmarks_dir = "", nystroem_components=1000, n_components=10, lag_time=5, nystroem_data_filename = "", fit_model_filename = "", projected_data_filename = "", landmark_subsample=1, sparse = False, wolf = True, rho = 0.01, shrinkage = None):
if not os.path.exists(ktica_dir): os.makedirs(ktica_dir)
if not sparse:
if shrinkage is None:
tica_model = tICA(n_components = n_components, lag_time = lag_time)
else:
tica_model = tICA(n_components = n_components, lag_time = lag_time, shrinkage = shrinkage)
else:
if shrinkage is None:
tica_model = SparseTICA(n_components = n_components, lag_time = lag_time, rho = rho)
else:
tica_model = SparseTICA(n_components = n_components, lag_time = lag_time, rho = rho, shrinkage = shrinkage)
if not os.path.exists(nystroem_data_filename):
clusterer = verboseload(clusterer_dir)
tica = verboseload(tica_dir)
features = tica
clusters = clusterer.cluster_centers_
landmarks = clusters
print("here's what goes into the combined class:")
#print(np.shape(features))
print(np.shape(landmarks))
print(type(landmarks))
nys = Nystroem(n_components = np.shape(landmarks)[0], basis = landmarks)#np.shape(landmarks)[0])# basis=landmarks)
nyx = nys.fit_transform(features)
del features
del landmarks
try:
save_dataset(nyx, nystroem_data_filename)
except:
os.system("rm -rf %s" %nystroem_data_filename)
save_dataset(nyx, nystroem_data_filename)
else:
nyx = load_dataset(nystroem_data_filename)
print(np.shape(nyx))
print(dir(nyx))
if not os.path.exists(projected_data_filename):
fit_model = tica_model.fit(nyx)
verbosedump(fit_model, fit_model_filename)
transformed_data = fit_model.transform(nyx)
del(nyx)
try:
save_dataset(transformed_data, projected_data_filename)
except:
os.system("rm -rf %s" %projected_data_filename)
save_dataset(transformed_data, projected_data_filename)
else:
print("Already performed landmark kernel tICA.")
示例11: __init__
def __init__(self, yaml_file, relative_loc=None):
self.yaml_file = load_yaml_file(yaml_file)
self.base_dir = self.yaml_file["base_dir"]
self.mdl_dir = self.yaml_file["mdl_dir"]
if relative_loc is None:
self.relative_loc = self.mdl_dir
else:
self.relative_loc = os.path.join(relative_loc,
os.path.split(self.mdl_dir)[1])
self.kmeans_mdl = verboseload(
os.path.join(self.relative_loc, "kmeans_mdl.pkl"))
self.tica_mdl = verboseload(os.path.join(self.relative_loc, "tica_mdl.pkl"))
示例12: _test_protein_with_project
def _test_protein_with_project(prj):
p1 = Protein(prj, "kinase_1")
p2 = Protein(prj, "kinase_2")
assert isinstance(p1, Protein)
assert isinstance(p1.msm, MarkovStateModel)
assert (p1.msm.left_eigenvectors_ ==
verboseload(os.path.join(prj.mdl_dir,"kinase_1","msm_mdl.pkl")).left_eigenvectors_).all()
assert (p1.bootrap_msm.mle_.left_eigenvectors_ ==
verboseload(os.path.join(prj.mdl_dir,"kinase_1","msm_mdl.pkl")).left_eigenvectors_).all()
assert (p2.msm.left_eigenvectors_ ==
verboseload(os.path.join(prj.mdl_dir,"kinase_2","msm_mdl.pkl")).left_eigenvectors_).all()
assert (p2.bootrap_msm.mle_.left_eigenvectors_ ==
verboseload(os.path.join(prj.mdl_dir,"kinase_2","msm_mdl.pkl")).left_eigenvectors_).all()
return True
示例13: plot_tica_and_clusters
def plot_tica_and_clusters(tica_dir, transformed_data_dir, clusterer_dir, lag_time, component_i = 0, component_j = 1):
transformed_data = verboseload(transformed_data_dir)
clusterer = verboseload(clusterer_dir)
trajs = np.concatenate(transformed_data)
plt.hexbin(trajs[:,component_i], trajs[:,component_j], bins='log', mincnt=1)
centers = clusterer.cluster_centers_
for i in range(0, np.shape(centers)[0]):
center = centers[i,:]
plt.annotate('%d' %i, xy=(center[0],center[1]), xytext=(center[0], center[1]),size=6)
pp = PdfPages("%s/c%d_c%d_clusters%d.pdf" %(tica_dir, component_i, component_j, np.shape(centers)[0]))
pp.savefig()
pp.close()
示例14: plot_tics_gmm_R
def plot_tics_gmm_R(save_dir, data_file, gmm_dir, titles = None, tICA = False, scale = 1.0, refcoords_file = None):
data = verboseload(data_file)
data = np.concatenate(data)
data[:,0] *= scale
if(refcoords_file is not None):
refcoords = load_file(refcoords_file)
else:
refcoords = None
print(np.shape(refcoords))
print(refcoords)
gmm_means = []
for j in range(0,np.shape(data)[1]):
with gzip.open("%s/tIC%d_gmm.pkl.gz" %(gmm_dir, j)) as f:
gmm = pickle.load(f)
gmm_means.append(gmm.means_)
num_columns = np.shape(data)[1]
plot_column_pair_partial = partial(plot_column_pair, num_columns = num_columns, save_dir = save_dir, titles = titles,
data = data, gmm_means = gmm_means, refcoords = refcoords)
#for i in range(0,num_columns):
# plot_column_pair_partial(i)
pool = mp.Pool(mp.cpu_count())
pool.map(plot_column_pair_partial, range(0,num_columns))
pool.terminate()
print("Done plotting columns")
return
示例15: fit_protein_kmeans
def fit_protein_kmeans(yaml_file,mini=True):
mdl_dir = yaml_file["mdl_dir"]
mdl_params = yaml_file["mdl_params"]
current_mdl_params={}
for i in mdl_params.keys():
if i.startswith("cluster__"):
current_mdl_params[i.split("cluster__")[1]] = mdl_params[i]
if mini:
current_mdl_params["batch_size"] = 100*current_mdl_params["n_clusters"]
kmeans_mdl = MiniBatchKMeans(**current_mdl_params)
else:
kmeans_mdl = KMeans(**current_mdl_params)
data = []
for protein in yaml_file["protein_list"]:
with enter_protein_mdl_dir(yaml_file, protein):
tica_data = verboseload("tica_data.pkl")
# get all traj
sorted_list = sorted(tica_data.keys(), key=keynat)
data.extend([tica_data[i] for i in sorted_list])
kmeans_mdl.fit(data)
kmeans_mdl_path = os.path.join(mdl_dir, "kmeans_mdl.pkl")
verbosedump(kmeans_mdl, kmeans_mdl_path)
return