本文整理汇总了Python中eden.graph.Vectorizer.annotate方法的典型用法代码示例。如果您正苦于以下问题:Python Vectorizer.annotate方法的具体用法?Python Vectorizer.annotate怎么用?Python Vectorizer.annotate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类eden.graph.Vectorizer
的用法示例。
在下文中一共展示了Vectorizer.annotate方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: SequenceMotif
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import annotate [as 别名]
#.........这里部分代码省略.........
cluster_hits = []
for cluster_id in self.motives_db:
hits = self._cluster_hit(seq, cluster_id)
if len(list(hits)):
cluster_hits.append(cluster_id)
if return_list is False:
if len(cluster_hits):
yield 1
else:
yield 0
else:
yield cluster_hits
def transform(self, seqs, return_match=False):
"""Transform an instance to a dense vector with features as cluster ID and entries 0/1 if a motif is found,
if 'return_match' argument is True, then write a pair with (start position,end position) in the entry instead of 0/1"""
num = len(self.motives_db)
for header, seq in seqs:
cluster_hits = [0] * num
for cluster_id in self.motives_db:
hits = self._cluster_hit(seq, cluster_id)
hits = list(hits)
if return_match is False:
if len(hits):
cluster_hits[cluster_id] = 1
else:
cluster_hits[cluster_id] = hits
yield cluster_hits
def _serial_graph_motif(self, seqs, placeholder=None):
# make graphs
iterable = sequence_to_eden(seqs)
# use node importance and 'position' attribute to identify max_subarrays of a specific size
graphs = self.vectorizer.annotate(iterable, estimator=self.estimator)
# use compute_max_subarrays to return an iterator over motives
motives = []
for graph in graphs:
subarrays = compute_max_subarrays(graph=graph, min_subarray_size=self.min_subarray_size, max_subarray_size=self.max_subarray_size)
for subarray in subarrays:
motives.append(subarray['subarray_string'])
return motives
def _multiprocess_graph_motif(self, seqs):
size = len(seqs)
intervals = compute_intervals(size=size, n_blocks=self.n_blocks, block_size=self.block_size)
if self.n_jobs == -1:
pool = mp.Pool()
else:
pool = mp.Pool(processes=self.n_jobs)
results = [apply_async(pool, self._serial_graph_motif, args=(seqs[start:end], True)) for start, end in intervals]
output = [p.get() for p in results]
return list(chain(*output))
def _motif_finder(self, seqs):
if self.n_jobs > 1 or self.n_jobs == -1:
return self._multiprocess_graph_motif(seqs)
else:
return self._serial_graph_motif(seqs)
def _fit_predictive_model(self, seqs, neg_seqs=None):
# duplicate iterator
pos_seqs, pos_seqs_ = tee(seqs)
pos_graphs = mp_pre_process(pos_seqs, pre_processor=sequence_to_eden,
n_blocks=self.pre_processor_n_blocks,
block_size=self.pre_processor_block_size,
n_jobs=self.pre_processor_n_jobs)