当前位置: 首页>>代码示例>>Python>>正文


Python Vectorizer.transform方法代码示例

本文整理汇总了Python中eden.graph.Vectorizer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python Vectorizer.transform方法的具体用法?Python Vectorizer.transform怎么用?Python Vectorizer.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在eden.graph.Vectorizer的用法示例。


在下文中一共展示了Vectorizer.transform方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: IsomorphicClusterer

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class IsomorphicClusterer(BaseEstimator, ClusterMixin):
    """IsomorphismClusterer.
    """

    def __init__(self):
        """Construct."""
        self.vectorizer = Vectorizer()

    def set_params(self, **params):
        """Set the parameters of this estimator.

        The method.

        Returns
        -------
        self
        """
        for param in params:
            self.__dict__[param] = params[param]
        return self

    def fit_predict(self, graphs):
        """fit_predict."""
        def vec_to_hash(vec):
            return hash(tuple(vec.data + vec.indices))
        try:
            for graph in graphs:
                prediction = vec_to_hash(self.vectorizer.transform([graph]))
                yield prediction
        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)
开发者ID:smautner,项目名称:EDeN,代码行数:34,代码来源:__init__.py

示例2: EdenRegressor

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class EdenRegressor(BaseEstimator, RegressorMixin):
    """Build a regressor for graphs."""

    def __init__(self, r=3, d=8, nbits=16, discrete=True,
                 normalization=True, inner_normalization=True,
                 penalty='elasticnet', loss='squared_loss'):
        """construct."""
        self.set_params(r, d, nbits, discrete,
                        normalization, inner_normalization,
                        penalty, loss)

    def set_params(self, r=3, d=8, nbits=16, discrete=True,
                   normalization=True, inner_normalization=True,
                   penalty='elasticnet', loss='squared_loss'):
        """setter."""
        self.r = r
        self.d = d
        self.nbits = nbits
        self.normalization = normalization
        self.inner_normalization = inner_normalization
        self.discrete = discrete
        self.model = SGDRegressor(
            loss=loss, penalty=penalty,
            average=True, shuffle=True,
            max_iter=5, tol=None)
        self.vectorizer = Vectorizer(
            r=self.r, d=self.d,
            normalization=self.normalization,
            inner_normalization=self.inner_normalization,
            discrete=self.discrete,
            nbits=self.nbits)
        return self

    def transform(self, graphs):
        """transform."""
        x = self.vectorizer.transform(graphs)
        return x

    @timeit
    def kernel_matrix(self, graphs):
        """kernel_matrix."""
        x = self.transform(graphs)
        return metrics.pairwise.pairwise_kernels(x, metric='linear')

    def fit(self, graphs, targets, randomize=True):
        """fit."""
        x = self.transform(graphs)
        self.model = self.model.fit(x, targets)
        return self

    def predict(self, graphs):
        """predict."""
        x = self.transform(graphs)
        preds = self.model.predict(x)
        return preds

    def decision_function(self, graphs):
        """decision_function."""
        return self.predict(graphs)
开发者ID:fabriziocosta,项目名称:EDeN,代码行数:61,代码来源:estimator.py

示例3: prep

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
def prep(graphlist,id=0):
    if not graphlist:
        return {}
    v=Vectorizer()
    map(lambda x: node_operation(x, lambda n, d: d.pop('weight', None)), graphlist)
    csr=v.transform(graphlist)
    hash_function = lambda vec: hash(tuple(vec.data + vec.indices))
    return {hash_function(row): (id,ith) for ith, row in enumerate(csr)}
开发者ID:fabriziocosta,项目名称:GraphLearn,代码行数:10,代码来源:graph_set_operations.py

示例4: Annotator

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class Annotator():

    def __init__(self, multiprocess=True, score_attribute='importance'):
        self.score_attribute=score_attribute
        self.vectorizer=Vectorizer()
        self.multi_process=multiprocess
        self.trained=False

    def fit(self, graphs_pos, graphs_neg=[]):

        if self.trained:
            return self
        self.trained=True
        map(utils.remove_eden_annotation,graphs_pos+graphs_neg)
        map(lambda x: utils.node_operation(x, lambda n,d: d.pop('importance',None)), graphs_pos+graphs_neg)
        map( lambda graph: graph.graph.pop('mass_annotate_mp_was_here',None) ,graphs_pos+graphs_neg)

        if graphs_neg:
            #print 'choosing to train binary esti'
            self.estimator = SGDClassifier()
            classes= [1]*len(graphs_pos)+[-1]*len(graphs_neg)
            self.estimator.fit(self.vectorizer.transform(graphs_pos+graphs_neg),classes)
        else:
            self.estimator = ExperimentalOneClassEstimator()
            self.estimator.fit(self.vectorizer.transform(graphs_pos))
        return self


    def fit_transform(self,graphs_p, graphs_n=[]):
        self.fit(graphs_p,graphs_n)
        return self.transform(graphs_p),self.transform(graphs_n)

    def transform(self,graphs):
        return  self.annotate(graphs)

    def annotate(self,graphs,neg=False):
        if not graphs:
            return []
        return mass_annotate_mp(graphs,self.vectorizer,score_attribute=self.score_attribute,estimator=self.estimator,
                                multi_process=self.multi_process, invert_score=neg)
开发者ID:smautner,项目名称:GraphLearn,代码行数:42,代码来源:annotate.py

示例5: compute_NSPDK_features

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
def compute_NSPDK_features():
  import eden
  from eden.graph import Vectorizer
  from eden.converter.molecule.obabel import mol_file_to_iterable, obabel_to_eden
  mol_path = olfaction_prediction_path + '/data/sdf/'
  iter_mols = mol_file_to_iterable(mol_path + '/all_mol.sdf', 'sdf')
  iter_graphs = obabel_to_eden(iter_mols)

  vectorizer = Vectorizer( r=3, d=4 )
  X = vectorizer.transform( iter_graphs )
  return X
开发者ID:joelmainland,项目名称:olfaction-prediction,代码行数:13,代码来源:gramian.py

示例6: DiscSampler

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class DiscSampler():
    '''
    '''

    def __init__(self):
        # this is mainly for the forest. the sampler uses a different vectorizer
        self.vectorizer = Vectorizer(nbits=14)

    def get_heap_and_forest(self, griter, k):
        '''
        so we create the heap and the forest...
        heap is (dist to hyperplane, count, graph)
        and the forest ist just a nearest neighbor from sklearn
        '''

        graphs = list(griter)
        graphs2 = copy.deepcopy(graphs)
        # transform doess mess up the graph objects
        X = self.vectorizer.transform(graphs)

        forest = LSHForest()
        forest.fit(X)
        print 'got forest'

        heap = []
        for vector, graph in zip(X, graphs2):
            graph2 = nx.Graph(graph)
            heapq.heappush(heap, (
                self.sampler.estimator.predict_proba(self.sampler.vectorizer.transform_single(graph2))[0][1],
                # score ~ dist from hyperplane
                k + 1,  # making sure that the counter is high so we dont output the startgraphz at the end
                graph))  # at last the actual graph

        print 'got heap'
        distances, unused = forest.kneighbors(X, n_neighbors=2)
        distances = [a[1] for a in distances]  # the second element should be the dist we want
        avg_dist = distances[len(distances) / 2]  # sum(distances)/len(distances)
        print 'got dist'

        return heap, forest, avg_dist

    '''
    def sample_simple(self,graphiter,iterneg):
        graphiter,grait,griter2 = itertools.tee(graphiter,3)
        
        self.fit_sampler(graphiter,iterneg)
        a,b,c=self.get_heap_and_forest( griter2, 30)


        grait= itertools.islice(grait,5)
        rez=self.sampler.sample(grait,n_samples=5,
                                       batch_size=1,
                                       n_jobs=0,
                                       n_steps=1,
                                       select_cip_max_tries=100,
                                       accept_annealing_factor=.5,
                                       generatormode=False,
                                       same_core_size=False )
        return rez
    '''

    def sample_graphs(self, graphiter, iter_neg, radius, how_many, check_k, heap_chunk_size=10):

        # some initialisation,
        # creating samper
        # setup heap and forest
        graphiter, iter2 = itertools.tee(graphiter)
        self.fit_sampler(iter2, iter_neg)

        heap, forest, avg_dist = self.get_heap_and_forest(graphiter, check_k)
        # heap should be like   (hpdist, count, graph)
        radius = radius * avg_dist
        # so lets start the loop1ng
        result = []
        while heap and len(result) < how_many:

            # pop all the graphs we want
            todo = []
            for i in range(heap_chunk_size):
                if heap:
                    todo.append(heapq.heappop(heap))

            # let the sampler do the sampling
            graphz = [e[2] for e in todo]
            # draw.draw_graph_set_graphlearn(graphz)
            work = self.sampler.sample(graphz,
                                       batch_size=1,
                                       n_jobs=0,
                                       n_steps=30,
                                       select_cip_max_tries=100,
                                       improving_threshold=.5,
                                       generatormode=False,
                                       max_core_size_diff=False,
                                       n_samples=3
                                       )
            # lets see, we need to take care of
            # = the initialy poped stuff
            # - increase and check the counter, reinsert into heap
            # = the new graphs
            # put them in the heap and the forest
#.........这里部分代码省略.........
开发者ID:smautner,项目名称:GraphLearn,代码行数:103,代码来源:discsampler.py

示例7: Vectorizer

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class Vectorizer(object):

    def __init__(self,
                 complexity=None,
                 nbits=20,
                 sequence_vectorizer_complexity=3,
                 graph_vectorizer_complexity=2,
                 n_neighbors=5,
                 sampling_prob=.5,
                 n_iter=5,
                 min_energy=-5,
                 random_state=1):
        random.seed(random_state)
        if complexity is not None:
            sequence_vectorizer_complexity = complexity
            graph_vectorizer_complexity = complexity

        self.sequence_vectorizer = SeqVectorizer(complexity=sequence_vectorizer_complexity,
                                                 nbits=nbits,
                                                 normalization=False,
                                                 inner_normalization=False)
        self.graph_vectorizer = GraphVectorizer(complexity=graph_vectorizer_complexity, nbits=nbits)
        self.n_neighbors = n_neighbors
        self.sampling_prob = sampling_prob
        self.n_iter = n_iter
        self.min_energy = min_energy
        self.nearest_neighbors = NearestNeighbors(n_neighbors=n_neighbors)

    def fit(self, seqs):
        # store seqs
        self.seqs = list(normalize_seqs(seqs))
        data_matrix = self.sequence_vectorizer.transform(self.seqs)
        # fit nearest_neighbors model
        self.nearest_neighbors.fit(data_matrix)
        return self

    def fit_transform(self, seqs, sampling_prob=None, n_iter=None):
        seqs, seqs_ = tee(seqs)
        return self.fit(seqs_).transform(seqs, sampling_prob=sampling_prob, n_iter=n_iter)

    def transform(self, seqs, sampling_prob=None, n_iter=None):
        seqs = list(normalize_seqs(seqs))
        graphs_ = self.graphs(seqs)
        data_matrix = self.graph_vectorizer.transform(graphs_)
        return data_matrix

    def graphs(self, seqs, sampling_prob=None, n_iter=None):
        seqs = list(normalize_seqs(seqs))
        if n_iter is not None:
            self.n_iter = n_iter
        if sampling_prob is not None:
            self.sampling_prob = sampling_prob
        for seq, neighs in self._compute_neighbors(seqs):
            if self.n_iter > 1:
                header, sequence, struct, energy = self._optimize_struct(seq, neighs)
            else:
                header, sequence, struct, energy = self._align_sequence_structure(seq, neighs)
            graph = self._seq_to_eden(header, sequence, struct, energy)
            yield graph

    def _optimize_struct(self, seq, neighs):
        structs = []
        results = []
        for i in range(self.n_iter):
            new_neighs = self._sample_neighbors(neighs)
            header, sequence, struct, energy = self._align_sequence_structure(seq, new_neighs)
            results.append((header, sequence, struct, energy))
            structs.append(struct)
        instance_id = self._most_representative(structs)
        selected = results[instance_id]
        return selected

    def _most_representative(self, structs):
        # compute kernel matrix with sequence_vectorizer
        data_matrix = self.sequence_vectorizer.transform(structs)
        kernel_matrix = pairwise_kernels(data_matrix, metric='rbf', gamma=1)
        # compute instance density as 1 over average pairwise distance
        density = np.sum(kernel_matrix, 0) / data_matrix.shape[0]
        # compute list of nearest neighbors
        max_id = np.argsort(-density)[0]
        return max_id

    def _sample_neighbors(self, neighs):
        out_neighs = []
        # insert one element at random
        out_neighs.append(random.choice(neighs))
        # add other elements sampling without replacement
        for neigh in neighs:
            if random.random() < self.sampling_prob:
                out_neighs.append(neigh)
        return out_neighs

    def _align_sequence_structure(self, seq, neighs, structure_deletions=False):
        header = seq[0]
        if len(neighs) < 1:
            clean_seq, clean_struct = rnafold.RNAfold_wrapper(seq[1])
            energy = 0
            logger.debug('Warning: no alignment for: %s' % seq)
        else:
            str_out = convert_seq_to_fasta_str(seq)
#.........这里部分代码省略.........
开发者ID:gianlucacorrado,项目名称:EDeN,代码行数:103,代码来源:RNA.py

示例8: range

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
    improved_graphs = sampler.transform(graphs_pos_,
                                        same_radius=False,
                                        size_constrained_core_choice=True,
                                        sampling_interval=9999,
                                        select_cip_max_tries=100,
                                        batch_size=int(count/4)+1,
                                        n_steps=100,
                                        n_jobs=-1,
                                        improving_threshold=0.9)



    #calculate the score of the improved versions
    #calculate score of the originals
    avg_imp=sum( [estimator.decision_function(e) for e in vectorizer.transform(unpack(improved_graphs)) ] )/count
    avg_ori=sum( [estimator.decision_function(e) for e in vectorizer.transform(graphs_pos___)] )/count
    improved.append(avg_imp)
    originals.append(avg_ori)


t = range(len(percentages))
# originals are blue
# improved ones are green

print originals
print improved
plt.plot(t,originals ,'bs')
plt.plot(t, improved ,'g^')
plt.savefig('zomg.png')
开发者ID:smautner,项目名称:GraphLearn,代码行数:31,代码来源:BFG1.py

示例9: ListVectorizer

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class ListVectorizer(Vectorizer):
    """Transform vector labeled, weighted, nested graphs in sparse vectors.

    A list of iterators over graphs and a list of weights are taken in input.
    The returned vector is the linear combination of sparse vectors obtained on each
    corresponding graph.
    """

    def __init__(self,
                 complexity=3,
                 r=None,
                 d=None,
                 min_r=0,
                 min_d=0,
                 nbits=20,
                 normalization=True,
                 inner_normalization=True,
                 n=1,
                 min_n=2):
        """
        Arguments:


        complexity : int
          The complexity of the features extracted.

        r : int
          The maximal radius size.

        d : int
          The maximal distance size.

        min_r : int
          The minimal radius size.

        min_d : int
          The minimal distance size.

        nbits : int
          The number of bits that defines the feature space size: |feature space|=2^nbits.

        normalization : bool
          If set the resulting feature vector will have unit euclidean norm.

        inner_normalization : bool
          If set the feature vector for a specific combination of the radius and
          distance size will have unit euclidean norm.
          When used together with the 'normalization' flag it will be applied first and
          then the resulting feature vector will be normalized.

        n : int
          The maximal number of clusters used to discretized label vectors.

        min:n : int
          The minimal number of clusters used to discretized label vectors.
        """
        self.vectorizer = Vectorizer(complexity=complexity,
                                     r=r,
                                     d=d,
                                     min_r=min_r,
                                     min_d=min_d,
                                     nbits=nbits,
                                     normalization=normalization,
                                     inner_normalization=inner_normalization,
                                     n=n,
                                     min_n=min_n)
        self.vectorizers = list()

    def fit(self, graphs_iterators_list):
        """
        Constructs an approximate explicit mapping of a kernel function on the data
        stored in the nodes of the graphs.

        Arguments:

        graphs_iterators_list : list of iterators over networkx graphs.
          The data.
        """
        for i, graphs in enumerate(graphs_iterators_list):
            self.vectorizers.append(copy.copy(self.vectorizer))
            self.vectorizers[i].fit(graphs)

    def fit_transform(self, graphs_iterators_list, weights=list()):
        """
        Arguments:

        graphs_iterators_list : list of iterators over networkx graphs.
          The data.

        weights : list of positive real values.
          Weights for the linear combination of sparse vectors obtained on each iterated tuple of graphs.
        """
        graphs_iterators_list_fit, graphs_iterators_list_transf = itertools.tee(graphs_iterators_list)
        self.fit(graphs_iterators_list_fit)
        return self.transform(graphs_iterators_list_transf)

    def transform(self, graphs_iterators_list, weights=list()):
        """
        Transforms a list of networkx graphs into a Numpy csr sparse matrix
        ( Compressed Sparse Row matrix ).
#.........这里部分代码省略.........
开发者ID:gianlucacorrado,项目名称:EDeN,代码行数:103,代码来源:multi_graph.py

示例10: EdenEstimator

# 需要导入模块: from eden.graph import Vectorizer [as 别名]
# 或者: from eden.graph.Vectorizer import transform [as 别名]
class EdenEstimator(BaseEstimator, ClassifierMixin):
    """Build an estimator for graphs."""

    def __init__(self, r=3, d=8, nbits=16, discrete=True,
                 balance=False, subsample_size=200, ratio=2,
                 normalization=False, inner_normalization=False,
                 penalty='elasticnet'):
        """construct."""
        self.set_params(r, d, nbits, discrete, balance, subsample_size,
                        ratio, normalization, inner_normalization,
                        penalty)

    def set_params(self, r=3, d=8, nbits=16, discrete=True,
                   balance=False, subsample_size=200, ratio=2,
                   normalization=False, inner_normalization=False,
                   penalty='elasticnet'):
        """setter."""
        self.r = r
        self.d = d
        self.nbits = nbits
        self.normalization = normalization
        self.inner_normalization = inner_normalization
        self.discrete = discrete
        self.balance = balance
        self.subsample_size = subsample_size
        self.ratio = ratio
        if penalty == 'perceptron':
            self.model = Perceptron(max_iter=5, tol=None)
        else:
            self.model = SGDClassifier(
                average=True, class_weight='balanced', shuffle=True,
                penalty=penalty, max_iter=5, tol=None)
        self.vectorizer = Vectorizer(
            r=self.r, d=self.d,
            normalization=self.normalization,
            inner_normalization=self.inner_normalization,
            discrete=self.discrete,
            nbits=self.nbits)
        return self

    def transform(self, graphs):
        """transform."""
        x = self.vectorizer.transform(graphs)
        return x

    @timeit
    def kernel_matrix(self, graphs):
        """kernel_matrix."""
        x = self.transform(graphs)
        return metrics.pairwise.pairwise_kernels(x, metric='linear')

    def fit(self, graphs, targets, randomize=True):
        """fit."""
        if self.balance:
            if randomize:
                bal_graphs, bal_targets = balance(
                    graphs, targets, None, ratio=self.ratio)
            else:
                samp_graphs, samp_targets = subsample(
                    graphs, targets, subsample_size=self.subsample_size)
                x = self.transform(samp_graphs)
                self.model.fit(x, samp_targets)
                bal_graphs, bal_targets = balance(
                    graphs, targets, self, ratio=self.ratio)
            size = len(bal_targets)
            logger.debug('Dataset size=%d' % (size))
            x = self.transform(bal_graphs)
            self.model = self.model.fit(x, bal_targets)
        else:
            x = self.transform(graphs)
            self.model = self.model.fit(x, targets)
        return self

    def predict(self, graphs):
        """predict."""
        x = self.transform(graphs)
        preds = self.model.predict(x)
        return preds

    def decision_function(self, graphs):
        """decision_function."""
        x = self.transform(graphs)
        preds = self.model.decision_function(x)
        return preds

    @timeit
    def cross_val_score(self, graphs, targets,
                        scoring='roc_auc', cv=5):
        """cross_val_score."""
        x = self.transform(graphs)
        scores = cross_val_score(
            self.model, x, targets, cv=cv, scoring=scoring)
        return scores

    @timeit
    def cross_val_predict(self, graphs, targets, cv=5):
        """cross_val_score."""
        x = self.transform(graphs)
        scores = cross_val_predict(
            self.model, x, targets, cv=cv, method='decision_function')
#.........这里部分代码省略.........
开发者ID:fabriziocosta,项目名称:EDeN,代码行数:103,代码来源:estimator.py


注:本文中的eden.graph.Vectorizer.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。