本文整理汇总了Python中eden.graph.Vectorizer类的典型用法代码示例。如果您正苦于以下问题:Python Vectorizer类的具体用法?Python Vectorizer怎么用?Python Vectorizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Vectorizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: prep
def prep(graphlist,id=0):
if not graphlist:
return {}
v=Vectorizer()
map(lambda x: node_operation(x, lambda n, d: d.pop('weight', None)), graphlist)
csr=v.transform(graphlist)
hash_function = lambda vec: hash(tuple(vec.data + vec.indices))
return {hash_function(row): (id,ith) for ith, row in enumerate(csr)}
示例2: TransformerWrapper
class TransformerWrapper(BaseEstimator, ClassifierMixin):
"""TransformerWrapper."""
def __init__(self, program=None):
"""Construct."""
self.program = program
self.vectorizer = Vectorizer()
self.params_vectorize = dict()
def set_params(self, **params):
"""Set the parameters of this estimator.
The method.
Returns
-------
self
"""
# finds parameters for the vectorizer as those that contain "__"
params_vectorizer = dict()
params_clusterer = dict()
for param in params:
if "vectorizer__" in param:
key = param.split('__')[1]
val = params[param]
params_vectorizer[key] = val
elif "vectorize__" in param:
key = param.split('__')[1]
val = params[param]
self.params_vectorize[key] = val
else:
params_clusterer[param] = params[param]
self.program.set_params(**params_clusterer)
self.vectorizer.set_params(**params_vectorizer)
return self
def fit(self, graphs):
"""fit."""
try:
self.program.fit(graphs)
return self
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
def transform(self, graphs):
"""predict."""
try:
for graph in graphs:
transformed_graph = self._transform(graph)
yield transformed_graph
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
def _transform(self, graph):
return graph
示例3: compute_NSPDK_features
def compute_NSPDK_features():
import eden
from eden.graph import Vectorizer
from eden.converter.molecule.obabel import mol_file_to_iterable, obabel_to_eden
mol_path = olfaction_prediction_path + '/data/sdf/'
iter_mols = mol_file_to_iterable(mol_path + '/all_mol.sdf', 'sdf')
iter_graphs = obabel_to_eden(iter_mols)
vectorizer = Vectorizer( r=3, d=4 )
X = vectorizer.transform( iter_graphs )
return X
示例4: OrdererWrapper
class OrdererWrapper(BaseEstimator, ClassifierMixin):
"""Orderer."""
def __init__(self, program=None):
"""Construct."""
self.program = program
self.vectorizer = Vectorizer()
self.params_vectorize = dict()
def set_params(self, **params):
"""Set the parameters of this estimator.
The method.
Returns
-------
self
"""
# finds parameters for the vectorizer as those that contain "__"
params_vectorizer = dict()
params_orderer = dict()
for param in params:
if "vectorizer__" in param:
key = param.split('__')[1]
val = params[param]
params_vectorizer[key] = val
elif "vectorize__" in param:
key = param.split('__')[1]
val = params[param]
self.params_vectorize[key] = val
else:
params_orderer[param] = params[param]
self.program.set_params(**params_orderer)
self.vectorizer.set_params(**params_vectorizer)
return self
def decision_function(self, graphs):
"""decision_function."""
try:
graphs, graphs_ = tee(graphs)
data_matrix = vectorize(graphs_,
vectorizer=self.vectorizer,
**self.params_vectorize)
scores = self.program.decision_function(data_matrix)
return scores
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
示例5: __init__
def __init__(self,
complexity=None,
nbits=20,
sequence_vectorizer_complexity=3,
graph_vectorizer_complexity=2,
n_neighbors=5,
sampling_prob=.5,
n_iter=5,
min_energy=-5,
random_state=1):
random.seed(random_state)
if complexity is not None:
sequence_vectorizer_complexity = complexity
graph_vectorizer_complexity = complexity
self.sequence_vectorizer = SeqVectorizer(complexity=sequence_vectorizer_complexity,
nbits=nbits,
normalization=False,
inner_normalization=False)
self.graph_vectorizer = GraphVectorizer(complexity=graph_vectorizer_complexity, nbits=nbits)
self.n_neighbors = n_neighbors
self.sampling_prob = sampling_prob
self.n_iter = n_iter
self.min_energy = min_energy
self.nearest_neighbors = NearestNeighbors(n_neighbors=n_neighbors)
示例6: set_params
def set_params(self, r=3, d=8, nbits=16, discrete=True,
balance=False, subsample_size=200, ratio=2,
normalization=False, inner_normalization=False,
penalty='elasticnet'):
"""setter."""
self.r = r
self.d = d
self.nbits = nbits
self.normalization = normalization
self.inner_normalization = inner_normalization
self.discrete = discrete
self.balance = balance
self.subsample_size = subsample_size
self.ratio = ratio
if penalty == 'perceptron':
self.model = Perceptron(max_iter=5, tol=None)
else:
self.model = SGDClassifier(
average=True, class_weight='balanced', shuffle=True,
penalty=penalty, max_iter=5, tol=None)
self.vectorizer = Vectorizer(
r=self.r, d=self.d,
normalization=self.normalization,
inner_normalization=self.inner_normalization,
discrete=self.discrete,
nbits=self.nbits)
return self
示例7: IsomorphicClusterer
class IsomorphicClusterer(BaseEstimator, ClusterMixin):
"""IsomorphismClusterer.
"""
def __init__(self):
"""Construct."""
self.vectorizer = Vectorizer()
def set_params(self, **params):
"""Set the parameters of this estimator.
The method.
Returns
-------
self
"""
for param in params:
self.__dict__[param] = params[param]
return self
def fit_predict(self, graphs):
"""fit_predict."""
def vec_to_hash(vec):
return hash(tuple(vec.data + vec.indices))
try:
for graph in graphs:
prediction = vec_to_hash(self.vectorizer.transform([graph]))
yield prediction
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
示例8: __init__
def __init__(self,
complexity=3,
r=None,
d=None,
min_r=0,
min_d=0,
nbits=20,
normalization=True,
inner_normalization=True,
n=1,
min_n=2):
"""
Arguments:
complexity : int
The complexity of the features extracted.
r : int
The maximal radius size.
d : int
The maximal distance size.
min_r : int
The minimal radius size.
min_d : int
The minimal distance size.
nbits : int
The number of bits that defines the feature space size: |feature space|=2^nbits.
normalization : bool
If set the resulting feature vector will have unit euclidean norm.
inner_normalization : bool
If set the feature vector for a specific combination of the radius and
distance size will have unit euclidean norm.
When used together with the 'normalization' flag it will be applied first and
then the resulting feature vector will be normalized.
n : int
The maximal number of clusters used to discretized label vectors.
min:n : int
The minimal number of clusters used to discretized label vectors.
"""
self.vectorizer = Vectorizer(complexity=complexity,
r=r,
d=d,
min_r=min_r,
min_d=min_d,
nbits=nbits,
normalization=normalization,
inner_normalization=inner_normalization,
n=n,
min_n=min_n)
self.vectorizers = list()
示例9: EdenRegressor
class EdenRegressor(BaseEstimator, RegressorMixin):
"""Build a regressor for graphs."""
def __init__(self, r=3, d=8, nbits=16, discrete=True,
normalization=True, inner_normalization=True,
penalty='elasticnet', loss='squared_loss'):
"""construct."""
self.set_params(r, d, nbits, discrete,
normalization, inner_normalization,
penalty, loss)
def set_params(self, r=3, d=8, nbits=16, discrete=True,
normalization=True, inner_normalization=True,
penalty='elasticnet', loss='squared_loss'):
"""setter."""
self.r = r
self.d = d
self.nbits = nbits
self.normalization = normalization
self.inner_normalization = inner_normalization
self.discrete = discrete
self.model = SGDRegressor(
loss=loss, penalty=penalty,
average=True, shuffle=True,
max_iter=5, tol=None)
self.vectorizer = Vectorizer(
r=self.r, d=self.d,
normalization=self.normalization,
inner_normalization=self.inner_normalization,
discrete=self.discrete,
nbits=self.nbits)
return self
def transform(self, graphs):
"""transform."""
x = self.vectorizer.transform(graphs)
return x
@timeit
def kernel_matrix(self, graphs):
"""kernel_matrix."""
x = self.transform(graphs)
return metrics.pairwise.pairwise_kernels(x, metric='linear')
def fit(self, graphs, targets, randomize=True):
"""fit."""
x = self.transform(graphs)
self.model = self.model.fit(x, targets)
return self
def predict(self, graphs):
"""predict."""
x = self.transform(graphs)
preds = self.model.predict(x)
return preds
def decision_function(self, graphs):
"""decision_function."""
return self.predict(graphs)
示例10: __init__
def __init__(self,
program=None,
relabel=False,
reweight=1.0):
"""Construct."""
self.program = program
self.relabel = relabel
self.reweight = reweight
self.vectorizer = Vectorizer()
self.params_vectorize = dict()
示例11: __init__
def __init__(self,
min_subarray_size=7,
max_subarray_size=10,
min_motif_count=1,
min_cluster_size=1,
training_size=None,
negative_ratio=1,
shuffle_order=2,
n_iter_search=1,
complexity=4,
radius=None,
distance=None,
nbits=20,
clustering_algorithm=None,
n_jobs=4,
n_blocks=8,
block_size=None,
pre_processor_n_jobs=4,
pre_processor_n_blocks=8,
pre_processor_block_size=None,
random_state=1):
self.n_jobs = n_jobs
self.n_blocks = n_blocks
self.block_size = block_size
self.pre_processor_n_jobs = pre_processor_n_jobs
self.pre_processor_n_blocks = pre_processor_n_blocks
self.pre_processor_block_size = pre_processor_block_size
self.training_size = training_size
self.n_iter_search = n_iter_search
self.complexity = complexity
self.nbits = nbits
# init vectorizer
self.vectorizer = Vectorizer(complexity=self.complexity,
r=radius, d=distance,
nbits=self.nbits)
self.seq_vectorizer = SeqVectorizer(complexity=self.complexity,
r=radius, d=distance,
nbits=self.nbits)
self.negative_ratio = negative_ratio
self.shuffle_order = shuffle_order
self.clustering_algorithm = clustering_algorithm
self.min_subarray_size = min_subarray_size
self.max_subarray_size = max_subarray_size
self.min_motif_count = min_motif_count
self.min_cluster_size = min_cluster_size
self.random_state = random_state
random.seed(random_state)
self.motives_db = defaultdict(list)
self.motives = []
self.clusters = defaultdict(list)
self.cluster_models = []
self.importances = []
示例12: Annotator
class Annotator():
def __init__(self, multiprocess=True, score_attribute='importance'):
self.score_attribute=score_attribute
self.vectorizer=Vectorizer()
self.multi_process=multiprocess
self.trained=False
def fit(self, graphs_pos, graphs_neg=[]):
if self.trained:
return self
self.trained=True
map(utils.remove_eden_annotation,graphs_pos+graphs_neg)
map(lambda x: utils.node_operation(x, lambda n,d: d.pop('importance',None)), graphs_pos+graphs_neg)
map( lambda graph: graph.graph.pop('mass_annotate_mp_was_here',None) ,graphs_pos+graphs_neg)
if graphs_neg:
#print 'choosing to train binary esti'
self.estimator = SGDClassifier()
classes= [1]*len(graphs_pos)+[-1]*len(graphs_neg)
self.estimator.fit(self.vectorizer.transform(graphs_pos+graphs_neg),classes)
else:
self.estimator = ExperimentalOneClassEstimator()
self.estimator.fit(self.vectorizer.transform(graphs_pos))
return self
def fit_transform(self,graphs_p, graphs_n=[]):
self.fit(graphs_p,graphs_n)
return self.transform(graphs_p),self.transform(graphs_n)
def transform(self,graphs):
return self.annotate(graphs)
def annotate(self,graphs,neg=False):
if not graphs:
return []
return mass_annotate_mp(graphs,self.vectorizer,score_attribute=self.score_attribute,estimator=self.estimator,
multi_process=self.multi_process, invert_score=neg)
示例13: __init__
def __init__(self, program=NearestNeighbors(n_neighbors=2)):
"""Construct."""
self.program = program
self.vectorizer = Vectorizer()
self.params_vectorize = dict()
示例14: RegressorWrapper
class RegressorWrapper(BaseEstimator, RegressorMixin):
"""Regressor."""
def __init__(self,
program=SGDRegressor(average=True, shuffle=True)):
"""Construct."""
self.program = program
self.vectorizer = Vectorizer()
self.params_vectorize = dict()
def set_params(self, **params):
"""Set the parameters of this estimator.
The method.
Returns
-------
self
"""
# finds parameters for the vectorizer as those that contain "__"
params_vectorizer = dict()
params_clusterer = dict()
for param in params:
if "vectorizer__" in param:
key = param.split('__')[1]
val = params[param]
params_vectorizer[key] = val
elif "vectorize__" in param:
key = param.split('__')[1]
val = params[param]
self.params_vectorize[key] = val
else:
params_clusterer[param] = params[param]
self.program.set_params(**params_clusterer)
self.vectorizer.set_params(**params_vectorizer)
return self
def fit(self, graphs):
"""fit."""
try:
graphs, graphs_ = tee(graphs)
data_matrix = vectorize(graphs_,
vectorizer=self.vectorizer,
**self.params_vectorize)
y = self._extract_targets(graphs)
self.program = self.program.fit(data_matrix, y)
return self
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
def predict(self, graphs):
"""predict."""
try:
graphs, graphs_ = tee(graphs)
data_matrix = vectorize(graphs_,
vectorizer=self.vectorizer,
**self.params_vectorize)
predictions = self.program.predict(data_matrix)
for prediction, graph in izip(predictions, graphs):
graph.graph['prediction'] = prediction
graph.graph['score'] = prediction
yield graph
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
def _extract_targets(self, graphs):
y = []
for graph in graphs:
if graph.graph.get('target', None) is not None:
y.append(graph.graph['target'])
else:
raise Exception('Missing the attribute "target" \
in graph dictionary!')
y = np.ravel(y)
return y
示例15: ClassifierWrapper
class ClassifierWrapper(BaseEstimator, ClassifierMixin):
"""Classifier."""
def __init__(self,
program=SGDClassifier(average=True,
class_weight='balanced',
shuffle=True)):
"""Construct."""
self.program = program
self.vectorizer = Vectorizer()
self.params_vectorize = dict()
def set_params(self, **params):
"""Set the parameters of this estimator.
The method.
Returns
-------
self
"""
# finds parameters for the vectorizer as those that contain "__"
params_vectorizer = dict()
params_clusterer = dict()
for param in params:
if "vectorizer__" in param:
key = param.split('__')[1]
val = params[param]
params_vectorizer[key] = val
elif "vectorize__" in param:
key = param.split('__')[1]
val = params[param]
self.params_vectorize[key] = val
else:
params_clusterer[param] = params[param]
self.program.set_params(**params_clusterer)
self.vectorizer.set_params(**params_vectorizer)
return self
def fit(self, graphs):
"""fit."""
try:
graphs, graphs_ = tee(graphs)
data_matrix = vectorize(graphs_,
vectorizer=self.vectorizer,
**self.params_vectorize)
y = self._extract_targets(graphs)
# manage case for single class learning
if len(set(y)) == 1:
# make negative data matrix
negative_data_matrix = data_matrix.multiply(-1)
# make targets
y = list(y)
y_neg = [-1] * len(y)
# concatenate elements
data_matrix = vstack(
[data_matrix, negative_data_matrix], format="csr")
y = y + y_neg
y = np.ravel(y)
self.program = self.program.fit(data_matrix, y)
return self
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
def predict(self, graphs):
"""predict."""
try:
graphs, graphs_ = tee(graphs)
data_matrix = vectorize(graphs_,
vectorizer=self.vectorizer,
**self.params_vectorize)
predictions = self.program.predict(data_matrix)
scores = self.program.decision_function(data_matrix)
for score, prediction, graph in izip(scores, predictions, graphs):
graph.graph['prediction'] = prediction
graph.graph['score'] = score
yield graph
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
def _extract_targets(self, graphs):
y = []
for graph in graphs:
if graph.graph.get('target', None) is not None:
y.append(graph.graph['target'])
else:
raise Exception('Missing the attribute "target" \
in graph dictionary!')
y = np.ravel(y)
return y