本文整理汇总了Python中eden.graph.Vectorizer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python Vectorizer.transform方法的具体用法?Python Vectorizer.transform怎么用?Python Vectorizer.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类eden.graph.Vectorizer
的用法示例。
在下文中一共展示了Vectorizer.transform方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: IsomorphicClusterer
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class IsomorphicClusterer(BaseEstimator, ClusterMixin):
"""IsomorphismClusterer.
"""
def __init__(self):
"""Construct."""
self.vectorizer = Vectorizer()
def set_params(self, **params):
"""Set the parameters of this estimator.
The method.
Returns
-------
self
"""
for param in params:
self.__dict__[param] = params[param]
return self
def fit_predict(self, graphs):
"""fit_predict."""
def vec_to_hash(vec):
return hash(tuple(vec.data + vec.indices))
try:
for graph in graphs:
prediction = vec_to_hash(self.vectorizer.transform([graph]))
yield prediction
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
示例2: EdenRegressor
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class EdenRegressor(BaseEstimator, RegressorMixin):
"""Build a regressor for graphs."""
def __init__(self, r=3, d=8, nbits=16, discrete=True,
normalization=True, inner_normalization=True,
penalty='elasticnet', loss='squared_loss'):
"""construct."""
self.set_params(r, d, nbits, discrete,
normalization, inner_normalization,
penalty, loss)
def set_params(self, r=3, d=8, nbits=16, discrete=True,
normalization=True, inner_normalization=True,
penalty='elasticnet', loss='squared_loss'):
"""setter."""
self.r = r
self.d = d
self.nbits = nbits
self.normalization = normalization
self.inner_normalization = inner_normalization
self.discrete = discrete
self.model = SGDRegressor(
loss=loss, penalty=penalty,
average=True, shuffle=True,
max_iter=5, tol=None)
self.vectorizer = Vectorizer(
r=self.r, d=self.d,
normalization=self.normalization,
inner_normalization=self.inner_normalization,
discrete=self.discrete,
nbits=self.nbits)
return self
def transform(self, graphs):
"""transform."""
x = self.vectorizer.transform(graphs)
return x
@timeit
def kernel_matrix(self, graphs):
"""kernel_matrix."""
x = self.transform(graphs)
return metrics.pairwise.pairwise_kernels(x, metric='linear')
def fit(self, graphs, targets, randomize=True):
"""fit."""
x = self.transform(graphs)
self.model = self.model.fit(x, targets)
return self
def predict(self, graphs):
"""predict."""
x = self.transform(graphs)
preds = self.model.predict(x)
return preds
def decision_function(self, graphs):
"""decision_function."""
return self.predict(graphs)
示例3: prep
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
def prep(graphlist,id=0):
if not graphlist:
return {}
v=Vectorizer()
map(lambda x: node_operation(x, lambda n, d: d.pop('weight', None)), graphlist)
csr=v.transform(graphlist)
hash_function = lambda vec: hash(tuple(vec.data + vec.indices))
return {hash_function(row): (id,ith) for ith, row in enumerate(csr)}
示例4: Annotator
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class Annotator():
def __init__(self, multiprocess=True, score_attribute='importance'):
self.score_attribute=score_attribute
self.vectorizer=Vectorizer()
self.multi_process=multiprocess
self.trained=False
def fit(self, graphs_pos, graphs_neg=[]):
if self.trained:
return self
self.trained=True
map(utils.remove_eden_annotation,graphs_pos+graphs_neg)
map(lambda x: utils.node_operation(x, lambda n,d: d.pop('importance',None)), graphs_pos+graphs_neg)
map( lambda graph: graph.graph.pop('mass_annotate_mp_was_here',None) ,graphs_pos+graphs_neg)
if graphs_neg:
#print 'choosing to train binary esti'
self.estimator = SGDClassifier()
classes= [1]*len(graphs_pos)+[-1]*len(graphs_neg)
self.estimator.fit(self.vectorizer.transform(graphs_pos+graphs_neg),classes)
else:
self.estimator = ExperimentalOneClassEstimator()
self.estimator.fit(self.vectorizer.transform(graphs_pos))
return self
def fit_transform(self,graphs_p, graphs_n=[]):
self.fit(graphs_p,graphs_n)
return self.transform(graphs_p),self.transform(graphs_n)
def transform(self,graphs):
return self.annotate(graphs)
def annotate(self,graphs,neg=False):
if not graphs:
return []
return mass_annotate_mp(graphs,self.vectorizer,score_attribute=self.score_attribute,estimator=self.estimator,
multi_process=self.multi_process, invert_score=neg)
示例5: compute_NSPDK_features
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
def compute_NSPDK_features():
import eden
from eden.graph import Vectorizer
from eden.converter.molecule.obabel import mol_file_to_iterable, obabel_to_eden
mol_path = olfaction_prediction_path + '/data/sdf/'
iter_mols = mol_file_to_iterable(mol_path + '/all_mol.sdf', 'sdf')
iter_graphs = obabel_to_eden(iter_mols)
vectorizer = Vectorizer( r=3, d=4 )
X = vectorizer.transform( iter_graphs )
return X
示例6: DiscSampler
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class DiscSampler():
'''
'''
def __init__(self):
# this is mainly for the forest. the sampler uses a different vectorizer
self.vectorizer = Vectorizer(nbits=14)
def get_heap_and_forest(self, griter, k):
'''
so we create the heap and the forest...
heap is (dist to hyperplane, count, graph)
and the forest ist just a nearest neighbor from sklearn
'''
graphs = list(griter)
graphs2 = copy.deepcopy(graphs)
# transform doess mess up the graph objects
X = self.vectorizer.transform(graphs)
forest = LSHForest()
forest.fit(X)
print 'got forest'
heap = []
for vector, graph in zip(X, graphs2):
graph2 = nx.Graph(graph)
heapq.heappush(heap, (
self.sampler.estimator.predict_proba(self.sampler.vectorizer.transform_single(graph2))[0][1],
# score ~ dist from hyperplane
k + 1, # making sure that the counter is high so we dont output the startgraphz at the end
graph)) # at last the actual graph
print 'got heap'
distances, unused = forest.kneighbors(X, n_neighbors=2)
distances = [a[1] for a in distances] # the second element should be the dist we want
avg_dist = distances[len(distances) / 2] # sum(distances)/len(distances)
print 'got dist'
return heap, forest, avg_dist
'''
def sample_simple(self,graphiter,iterneg):
graphiter,grait,griter2 = itertools.tee(graphiter,3)
self.fit_sampler(graphiter,iterneg)
a,b,c=self.get_heap_and_forest( griter2, 30)
grait= itertools.islice(grait,5)
rez=self.sampler.sample(grait,n_samples=5,
batch_size=1,
n_jobs=0,
n_steps=1,
select_cip_max_tries=100,
accept_annealing_factor=.5,
generatormode=False,
same_core_size=False )
return rez
'''
def sample_graphs(self, graphiter, iter_neg, radius, how_many, check_k, heap_chunk_size=10):
# some initialisation,
# creating samper
# setup heap and forest
graphiter, iter2 = itertools.tee(graphiter)
self.fit_sampler(iter2, iter_neg)
heap, forest, avg_dist = self.get_heap_and_forest(graphiter, check_k)
# heap should be like (hpdist, count, graph)
radius = radius * avg_dist
# so lets start the loop1ng
result = []
while heap and len(result) < how_many:
# pop all the graphs we want
todo = []
for i in range(heap_chunk_size):
if heap:
todo.append(heapq.heappop(heap))
# let the sampler do the sampling
graphz = [e[2] for e in todo]
# draw.draw_graph_set_graphlearn(graphz)
work = self.sampler.sample(graphz,
batch_size=1,
n_jobs=0,
n_steps=30,
select_cip_max_tries=100,
improving_threshold=.5,
generatormode=False,
max_core_size_diff=False,
n_samples=3
)
# lets see, we need to take care of
# = the initialy poped stuff
# - increase and check the counter, reinsert into heap
# = the new graphs
# put them in the heap and the forest
#.........这里部分代码省略.........
示例7: Vectorizer
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class Vectorizer(object):
def __init__(self,
complexity=None,
nbits=20,
sequence_vectorizer_complexity=3,
graph_vectorizer_complexity=2,
n_neighbors=5,
sampling_prob=.5,
n_iter=5,
min_energy=-5,
random_state=1):
random.seed(random_state)
if complexity is not None:
sequence_vectorizer_complexity = complexity
graph_vectorizer_complexity = complexity
self.sequence_vectorizer = SeqVectorizer(complexity=sequence_vectorizer_complexity,
nbits=nbits,
normalization=False,
inner_normalization=False)
self.graph_vectorizer = GraphVectorizer(complexity=graph_vectorizer_complexity, nbits=nbits)
self.n_neighbors = n_neighbors
self.sampling_prob = sampling_prob
self.n_iter = n_iter
self.min_energy = min_energy
self.nearest_neighbors = NearestNeighbors(n_neighbors=n_neighbors)
def fit(self, seqs):
# store seqs
self.seqs = list(normalize_seqs(seqs))
data_matrix = self.sequence_vectorizer.transform(self.seqs)
# fit nearest_neighbors model
self.nearest_neighbors.fit(data_matrix)
return self
def fit_transform(self, seqs, sampling_prob=None, n_iter=None):
seqs, seqs_ = tee(seqs)
return self.fit(seqs_).transform(seqs, sampling_prob=sampling_prob, n_iter=n_iter)
def transform(self, seqs, sampling_prob=None, n_iter=None):
seqs = list(normalize_seqs(seqs))
graphs_ = self.graphs(seqs)
data_matrix = self.graph_vectorizer.transform(graphs_)
return data_matrix
def graphs(self, seqs, sampling_prob=None, n_iter=None):
seqs = list(normalize_seqs(seqs))
if n_iter is not None:
self.n_iter = n_iter
if sampling_prob is not None:
self.sampling_prob = sampling_prob
for seq, neighs in self._compute_neighbors(seqs):
if self.n_iter > 1:
header, sequence, struct, energy = self._optimize_struct(seq, neighs)
else:
header, sequence, struct, energy = self._align_sequence_structure(seq, neighs)
graph = self._seq_to_eden(header, sequence, struct, energy)
yield graph
def _optimize_struct(self, seq, neighs):
structs = []
results = []
for i in range(self.n_iter):
new_neighs = self._sample_neighbors(neighs)
header, sequence, struct, energy = self._align_sequence_structure(seq, new_neighs)
results.append((header, sequence, struct, energy))
structs.append(struct)
instance_id = self._most_representative(structs)
selected = results[instance_id]
return selected
def _most_representative(self, structs):
# compute kernel matrix with sequence_vectorizer
data_matrix = self.sequence_vectorizer.transform(structs)
kernel_matrix = pairwise_kernels(data_matrix, metric='rbf', gamma=1)
# compute instance density as 1 over average pairwise distance
density = np.sum(kernel_matrix, 0) / data_matrix.shape[0]
# compute list of nearest neighbors
max_id = np.argsort(-density)[0]
return max_id
def _sample_neighbors(self, neighs):
out_neighs = []
# insert one element at random
out_neighs.append(random.choice(neighs))
# add other elements sampling without replacement
for neigh in neighs:
if random.random() < self.sampling_prob:
out_neighs.append(neigh)
return out_neighs
def _align_sequence_structure(self, seq, neighs, structure_deletions=False):
header = seq[0]
if len(neighs) < 1:
clean_seq, clean_struct = rnafold.RNAfold_wrapper(seq[1])
energy = 0
logger.debug('Warning: no alignment for: %s' % seq)
else:
str_out = convert_seq_to_fasta_str(seq)
#.........这里部分代码省略.........
示例8: range
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
improved_graphs = sampler.transform(graphs_pos_,
same_radius=False,
size_constrained_core_choice=True,
sampling_interval=9999,
select_cip_max_tries=100,
batch_size=int(count/4)+1,
n_steps=100,
n_jobs=-1,
improving_threshold=0.9)
#calculate the score of the improved versions
#calculate score of the originals
avg_imp=sum( [estimator.decision_function(e) for e in vectorizer.transform(unpack(improved_graphs)) ] )/count
avg_ori=sum( [estimator.decision_function(e) for e in vectorizer.transform(graphs_pos___)] )/count
improved.append(avg_imp)
originals.append(avg_ori)
t = range(len(percentages))
# originals are blue
# improved ones are green
print originals
print improved
plt.plot(t,originals ,'bs')
plt.plot(t, improved ,'g^')
plt.savefig('zomg.png')
示例9: ListVectorizer
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class ListVectorizer(Vectorizer):
"""Transform vector labeled, weighted, nested graphs in sparse vectors.
A list of iterators over graphs and a list of weights are taken in input.
The returned vector is the linear combination of sparse vectors obtained on each
corresponding graph.
"""
def __init__(self,
complexity=3,
r=None,
d=None,
min_r=0,
min_d=0,
nbits=20,
normalization=True,
inner_normalization=True,
n=1,
min_n=2):
"""
Arguments:
complexity : int
The complexity of the features extracted.
r : int
The maximal radius size.
d : int
The maximal distance size.
min_r : int
The minimal radius size.
min_d : int
The minimal distance size.
nbits : int
The number of bits that defines the feature space size: |feature space|=2^nbits.
normalization : bool
If set the resulting feature vector will have unit euclidean norm.
inner_normalization : bool
If set the feature vector for a specific combination of the radius and
distance size will have unit euclidean norm.
When used together with the 'normalization' flag it will be applied first and
then the resulting feature vector will be normalized.
n : int
The maximal number of clusters used to discretized label vectors.
min:n : int
The minimal number of clusters used to discretized label vectors.
"""
self.vectorizer = Vectorizer(complexity=complexity,
r=r,
d=d,
min_r=min_r,
min_d=min_d,
nbits=nbits,
normalization=normalization,
inner_normalization=inner_normalization,
n=n,
min_n=min_n)
self.vectorizers = list()
def fit(self, graphs_iterators_list):
"""
Constructs an approximate explicit mapping of a kernel function on the data
stored in the nodes of the graphs.
Arguments:
graphs_iterators_list : list of iterators over networkx graphs.
The data.
"""
for i, graphs in enumerate(graphs_iterators_list):
self.vectorizers.append(copy.copy(self.vectorizer))
self.vectorizers[i].fit(graphs)
def fit_transform(self, graphs_iterators_list, weights=list()):
"""
Arguments:
graphs_iterators_list : list of iterators over networkx graphs.
The data.
weights : list of positive real values.
Weights for the linear combination of sparse vectors obtained on each iterated tuple of graphs.
"""
graphs_iterators_list_fit, graphs_iterators_list_transf = itertools.tee(graphs_iterators_list)
self.fit(graphs_iterators_list_fit)
return self.transform(graphs_iterators_list_transf)
def transform(self, graphs_iterators_list, weights=list()):
"""
Transforms a list of networkx graphs into a Numpy csr sparse matrix
( Compressed Sparse Row matrix ).
#.........这里部分代码省略.........
示例10: EdenEstimator
# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class EdenEstimator(BaseEstimator, ClassifierMixin):
"""Build an estimator for graphs."""
def __init__(self, r=3, d=8, nbits=16, discrete=True,
balance=False, subsample_size=200, ratio=2,
normalization=False, inner_normalization=False,
penalty='elasticnet'):
"""construct."""
self.set_params(r, d, nbits, discrete, balance, subsample_size,
ratio, normalization, inner_normalization,
penalty)
def set_params(self, r=3, d=8, nbits=16, discrete=True,
balance=False, subsample_size=200, ratio=2,
normalization=False, inner_normalization=False,
penalty='elasticnet'):
"""setter."""
self.r = r
self.d = d
self.nbits = nbits
self.normalization = normalization
self.inner_normalization = inner_normalization
self.discrete = discrete
self.balance = balance
self.subsample_size = subsample_size
self.ratio = ratio
if penalty == 'perceptron':
self.model = Perceptron(max_iter=5, tol=None)
else:
self.model = SGDClassifier(
average=True, class_weight='balanced', shuffle=True,
penalty=penalty, max_iter=5, tol=None)
self.vectorizer = Vectorizer(
r=self.r, d=self.d,
normalization=self.normalization,
inner_normalization=self.inner_normalization,
discrete=self.discrete,
nbits=self.nbits)
return self
def transform(self, graphs):
"""transform."""
x = self.vectorizer.transform(graphs)
return x
@timeit
def kernel_matrix(self, graphs):
"""kernel_matrix."""
x = self.transform(graphs)
return metrics.pairwise.pairwise_kernels(x, metric='linear')
def fit(self, graphs, targets, randomize=True):
"""fit."""
if self.balance:
if randomize:
bal_graphs, bal_targets = balance(
graphs, targets, None, ratio=self.ratio)
else:
samp_graphs, samp_targets = subsample(
graphs, targets, subsample_size=self.subsample_size)
x = self.transform(samp_graphs)
self.model.fit(x, samp_targets)
bal_graphs, bal_targets = balance(
graphs, targets, self, ratio=self.ratio)
size = len(bal_targets)
logger.debug('Dataset size=%d' % (size))
x = self.transform(bal_graphs)
self.model = self.model.fit(x, bal_targets)
else:
x = self.transform(graphs)
self.model = self.model.fit(x, targets)
return self
def predict(self, graphs):
"""predict."""
x = self.transform(graphs)
preds = self.model.predict(x)
return preds
def decision_function(self, graphs):
"""decision_function."""
x = self.transform(graphs)
preds = self.model.decision_function(x)
return preds
@timeit
def cross_val_score(self, graphs, targets,
scoring='roc_auc', cv=5):
"""cross_val_score."""
x = self.transform(graphs)
scores = cross_val_score(
self.model, x, targets, cv=cv, scoring=scoring)
return scores
@timeit
def cross_val_predict(self, graphs, targets, cv=5):
"""cross_val_score."""
x = self.transform(graphs)
scores = cross_val_predict(
self.model, x, targets, cv=cv, method='decision_function')
#.........这里部分代码省略.........