当前位置: 首页>>代码示例>>Python>>正文


Python sparse.load_npz方法代码示例

本文整理汇总了Python中scipy.sparse.load_npz方法的典型用法代码示例。如果您正苦于以下问题:Python sparse.load_npz方法的具体用法?Python sparse.load_npz怎么用?Python sparse.load_npz使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scipy.sparse的用法示例。


在下文中一共展示了sparse.load_npz方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def process(self):
        data = np.load(osp.join(self.raw_dir, 'reddit_data.npz'))
        x = torch.from_numpy(data['feature']).to(torch.float)
        y = torch.from_numpy(data['label']).to(torch.long)
        split = torch.from_numpy(data['node_types'])

        adj = sp.load_npz(osp.join(self.raw_dir, 'reddit_graph.npz'))
        row = torch.from_numpy(adj.row).to(torch.long)
        col = torch.from_numpy(adj.col).to(torch.long)
        edge_index = torch.stack([row, col], dim=0)
        edge_index, _ = coalesce(edge_index, None, x.size(0), x.size(0))

        data = Data(x=x, edge_index=edge_index, y=y)
        data.train_mask = split == 1
        data.val_mask = split == 2
        data.test_mask = split == 3

        data = data if self.pre_transform is None else self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0]) 
开发者ID:rusty1s,项目名称:pytorch_geometric,代码行数:22,代码来源:reddit.py

示例2: _load

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def _load(self):
        # graph
        coo_adj = sp.load_npz(os.path.join(
            self._extract_dir, "reddit{}_graph.npz".format(self._self_loop_str)))
        self.graph = DGLGraph(coo_adj, readonly=True)
        # features and labels
        reddit_data = np.load(os.path.join(self._extract_dir, "reddit_data.npz"))
        self.features = reddit_data["feature"]
        self.labels = reddit_data["label"]
        self.num_labels = 41
        # tarin/val/test indices
        node_ids = reddit_data["node_ids"]
        node_types = reddit_data["node_types"]
        self.train_mask = (node_types == 1)
        self.val_mask = (node_types == 2)
        self.test_mask = (node_types == 3)

        print('Finished data loading.')
        print('  NumNodes: {}'.format(self.graph.number_of_nodes()))
        print('  NumEdges: {}'.format(self.graph.number_of_edges()))
        print('  NumFeats: {}'.format(self.features.shape[1]))
        print('  NumClasses: {}'.format(self.num_labels))
        print('  NumTrainingSamples: {}'.format(len(np.nonzero(self.train_mask)[0])))
        print('  NumValidationSamples: {}'.format(len(np.nonzero(self.val_mask)[0])))
        print('  NumTestSamples: {}'.format(len(np.nonzero(self.test_mask)[0]))) 
开发者ID:dmlc,项目名称:dgl,代码行数:27,代码来源:reddit.py

示例3: load_term_counts

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_term_counts(path='../dat/', force_redo=False):
    count_filename = path + 'reddit_term_counts'
    authors_counts_filename = path + 'reddit_author_term_counts'
    vocab_filename = path + 'vocab'

    if os.path.exists(count_filename + '.npz') and not force_redo:
        return sparse.load_npz(count_filename + '.npz'), sparse.load_npz(authors_counts_filename + '.npz'), np.load(
            vocab_filename + '.npy')

    reddit = load_reddit()
    post_docs = reddit['post_text'].values
    author_grouped = reddit.groupby('author')['post_text'].apply(lambda x: ' '.join(x)).reset_index()
    author_docs = author_grouped['post_text'].values
    counts, vocab, vec = tokenize_documents(post_docs)
    author_counts = vec.transform(author_docs)
    sparse.save_npz(count_filename, counts)
    sparse.save_npz(authors_counts_filename, author_counts)
    np.save(vocab_filename, vocab)
    return counts, author_counts, vocab 
开发者ID:blei-lab,项目名称:causal-text-embeddings,代码行数:21,代码来源:reddit_posts.py

示例4: test_average_regions_start

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def test_average_regions_start():

    outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
    matrix = ROOT + 'small_test_matrix.cool'
    bed_file = ROOT + 'hicAverageRegions/regions_multi.bed'
    args = "--matrix {} --regions {} -o {} --range 100000 100000 -cb {}".format(matrix, bed_file, outfile.name, 'start').split()
    log.debug('path: {}'.format(matrix))

    hicAverageRegions.main(args)

    test_file = load_npz(ROOT + 'hicAverageRegions/regions_multi_start.npz')
    new_file = load_npz(outfile.name)

    nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)

    os.remove(outfile.name) 
开发者ID:deeptools,项目名称:HiCExplorer,代码行数:18,代码来源:test_hicAverageRegions.py

示例5: test_average_regions_center

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def test_average_regions_center():

    outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
    matrix = ROOT + 'small_test_matrix.cool'
    bed_file = ROOT + 'hicAverageRegions/regions_multi.bed'
    args = "--matrix {} --regions {} -o {} --range 100000 100000 -cb {}".format(matrix, bed_file, outfile.name, 'center').split()
    log.debug('path: {}'.format(matrix))

    hicAverageRegions.main(args)

    test_file = load_npz(ROOT + 'hicAverageRegions/regions_multi_center.npz')
    new_file = load_npz(outfile.name)

    nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)

    os.remove(outfile.name) 
开发者ID:deeptools,项目名称:HiCExplorer,代码行数:18,代码来源:test_hicAverageRegions.py

示例6: test_average_regions_single

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def test_average_regions_single():

    outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
    matrix = ROOT + 'small_test_matrix.cool'
    bed_file = ROOT + 'hicAverageRegions/regions.bed'
    args = "--matrix {} --regions {} -o {} --range 100000 100000".format(matrix, bed_file, outfile.name).split()
    log.debug('path: {}'.format(matrix))

    hicAverageRegions.main(args)

    test_file = load_npz(ROOT + 'hicAverageRegions/result_range_100000.npz')
    new_file = load_npz(outfile.name)

    nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)

    os.remove(outfile.name) 
开发者ID:deeptools,项目名称:HiCExplorer,代码行数:18,代码来源:test_hicAverageRegions.py

示例7: save_to_one_flie_csrs

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def save_to_one_flie_csrs(self, fns):
        save_as = os.path.join(self.output_folder, "Xcsr.h5")
        try:
            os.unlink(save_as)
        except:
            pass
        h5f = h5sparse.File(save_as)
        first = True
        for fn in fns:
            logger.info(f"Saving {fn}")
            mat = load_npz(os.path.join(self.output_folder, "chunks", fn)).astype(np.float32)
            if first:
                h5f.create_dataset("matrix", data=mat, chunks=(10_000_000,), maxshape=(None,))
                first = False
            else:
                h5f["matrix"].append(mat)
            gc.collect()
        h5f.close() 
开发者ID:logicai-io,项目名称:recsys2019,代码行数:20,代码来源:vectorizers.py

示例8: load_edge_masks

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_edge_masks(dataset_str, data_path, adj_true, drop_edge_prop):
  """Loads adjacency matrix as sparse matrix and masks for val & test links.

  Args:
    dataset_str: dataset to use
    data_path: path to data folder
    adj_true: true adjacency matrix in dense format,
    drop_edge_prop: proportion of edges to remove.

  Returns:
    adj_matrix: adjacency matrix
    train_mask: mask for train edges
    val_mask: mask for val edges
    test_mask: mask for test edges
  """
  edge_mask_path = os.path.join(
      data_path, 'emask.{}.remove{}.'.format(dataset_str, drop_edge_prop))
  val_mask = sp.load_npz(edge_mask_path + 'val.npz')
  test_mask = sp.load_npz(edge_mask_path + 'test.npz')
  train_mask = 1. - val_mask.todense() - test_mask.todense()
  # remove val and test edges from true A
  adj_train = np.multiply(adj_true, train_mask)
  train_mask -= np.eye(train_mask.shape[0])
  return adj_train, sparse_to_tuple(val_mask), sparse_to_tuple(
      val_mask), sparse_to_tuple(test_mask) 
开发者ID:google,项目名称:gcnn-survey-paper,代码行数:27,代码来源:data_utils.py

示例9: add_top_k_edges

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def add_top_k_edges(data, edge_mask_path, gae_scores_path, topk, nb_nodes,
                    norm_adj):
  """Loads GAE scores and adds topK edges to train adjacency."""
  test_mask = sp.load_npz(os.path.join(edge_mask_path, 'test_mask.npz'))
  train_mask = 1. - test_mask.todense()
  # remove val and test edges from true A
  adj_train_curr = np.multiply(data['adj_true'], train_mask)
  # Predict test edges using precomputed scores
  scores = np.load(os.path.join(gae_scores_path, 'gae_scores.npy'))
  # scores_mask = 1 - np.eye(nb_nodes)
  scores_mask = np.zeros((nb_nodes, nb_nodes))
  scores_mask[:140, 140:] = 1.
  scores_mask[140:, :140] = 1.
  scores = np.multiply(scores, scores_mask).reshape((-1,))
  threshold = scores[np.argsort(-scores)[topk]]
  adj_train_curr += 1 * (scores > threshold).reshape((nb_nodes, nb_nodes))
  adj_train_curr = 1 * (adj_train_curr > 0)
  if norm_adj:
    adj_train_norm = normalize_adj(data['adj_train'])
  else:
    adj_train_norm = sp.coo_matrix(data['adj_train'])
  return adj_train_curr, sparse_to_tuple(adj_train_norm) 
开发者ID:google,项目名称:gcnn-survey-paper,代码行数:24,代码来源:data_utils.py

示例10: load_ppi_data

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_ppi_data(data_path):
  """Load PPI dataset."""
  with tf.gfile.Open(os.path.join(data_path, 'ppi.edges.npz')) as f:
    adj = sp.load_npz(f)

  with tf.gfile.Open(os.path.join(data_path, 'ppi.features.norm.npy')) as f:
    features = np.load(f)

  with tf.gfile.Open(os.path.join(data_path, 'ppi.labels.npz')) as f:
    labels = sp.load_npz(f).todense()

  train_mask = np.load(
      tf.gfile.Open(os.path.join(data_path, 'ppi.train_mask.npy'))) > 0
  val_mask = np.load(
      tf.gfile.Open(os.path.join(data_path, 'ppi.test_mask.npy'))) > 0
  test_mask = np.load(
      tf.gfile.Open(os.path.join(data_path, 'ppi.test_mask.npy'))) > 0

  return adj, features, labels, train_mask, val_mask, test_mask 
开发者ID:google,项目名称:gcnn-survey-paper,代码行数:21,代码来源:data_utils.py

示例11: uscensus_reverse_transform

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def uscensus_reverse_transform(input_path, output_path, metadata_path):
    with open(metadata_path, "r") as metadata_file:
        metadata = json.load(metadata_file)

    features = load_npz(input_path)

    csv_file = open(output_path, "w")
    output = csv.DictWriter(csv_file, ["caseid"] + metadata["variables"])
    output.writeheader()

    for row_value_indices in features:
        _, selected_value_indices = row_value_indices.nonzero()
        # there should be one value per variable
        assert len(selected_value_indices) == len(metadata["variables"])

        row_dict = dict()

        for selected_value_index in selected_value_indices:
            variable, value = metadata["index_to_value"][selected_value_index]
            row_dict[variable] = value

        output.writerow(row_dict)

    csv_file.close() 
开发者ID:rcamino,项目名称:multi-categorical-gans,代码行数:26,代码来源:reverse_transform.py

示例12: retrieve_dataset

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def retrieve_dataset(dataset, **kwargs):
    # if data not extracted, download zip and extract
    outdirname = "datasets.1.17.2019"
    if not os.path.exists(outdirname):
        try:
            from urllib import urlretrieve
        except ImportError:
            from urllib.request import urlretrieve
        import zipfile

        zipfilename = outdirname + ".zip"
        urlretrieve(
            "https://publictestdatasets.blob.core.windows.net/data/" + zipfilename,
            zipfilename,
        )
        with zipfile.ZipFile(zipfilename, "r") as unzip:
            unzip.extractall(".")
    extension = os.path.splitext(dataset)[1]
    filepath = os.path.join(outdirname, dataset)
    if extension == ".npz":
        # sparse format file
        import scipy.sparse as sparse

        return sparse.load_npz(filepath)
    elif extension == ".svmlight":
        from sklearn import datasets

        return datasets.load_svmlight_file(filepath)
    elif extension == ".json":
        import json

        with open(filepath, encoding="utf-8") as f:
            dataset = json.load(f)
        return dataset
    elif extension == ".csv":
        import pandas as pd

        return pd.read_csv(filepath, **kwargs)
    else:
        raise Exception("Unrecognized file extension: " + extension) 
开发者ID:interpretml,项目名称:interpret-text,代码行数:42,代码来源:datasets.py

示例13: load_state

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_state(self, folderpath):
        state = ut.read_jsonfile(
            ut.join_paths([folderpath, 'hash_model_state.json']))
        self.vals_lst = state['vals_lst']
        num_evals = state['num_evals']
        for i in range(num_evals):
            self.vecs_lst.append(
                sp.load_npz(ut.join_paths([folderpath,
                                           str(i) + '.npz'])))
        if num_evals > 0:
            self._refit() 
开发者ID:negrinho,项目名称:deep_architect,代码行数:13,代码来源:hashing.py

示例14: loadRedditFromNPZ

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def loadRedditFromNPZ(dataset_dir=datadir):
    adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
    data = np.load(dataset_dir +"reddit.npz")

    return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index'] 
开发者ID:DropEdge,项目名称:DropEdge,代码行数:7,代码来源:utils.py

示例15: load_data

# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_data(self):
        if not osp.exists(self.data_filename):
            self.download_npz()
        print('Loading {} dataset perturbed by 0.05 mettack...'.format(self.name))
        adj = sp.load_npz(self.data_filename)
        warnings.warn('''the adjacency matrix is perturbed, using the data splits under seed 15(default seed for deeprobust.graph.data.Dataset), so if you are going to verify the attacking performance, you should use the same data splits''')
        return adj 
开发者ID:DSE-MSU,项目名称:DeepRobust,代码行数:9,代码来源:attacked_data.py


注:本文中的scipy.sparse.load_npz方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。