本文整理汇总了Python中scipy.sparse.load_npz方法的典型用法代码示例。如果您正苦于以下问题:Python sparse.load_npz方法的具体用法?Python sparse.load_npz怎么用?Python sparse.load_npz使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.sparse
的用法示例。
在下文中一共展示了sparse.load_npz方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def process(self):
data = np.load(osp.join(self.raw_dir, 'reddit_data.npz'))
x = torch.from_numpy(data['feature']).to(torch.float)
y = torch.from_numpy(data['label']).to(torch.long)
split = torch.from_numpy(data['node_types'])
adj = sp.load_npz(osp.join(self.raw_dir, 'reddit_graph.npz'))
row = torch.from_numpy(adj.row).to(torch.long)
col = torch.from_numpy(adj.col).to(torch.long)
edge_index = torch.stack([row, col], dim=0)
edge_index, _ = coalesce(edge_index, None, x.size(0), x.size(0))
data = Data(x=x, edge_index=edge_index, y=y)
data.train_mask = split == 1
data.val_mask = split == 2
data.test_mask = split == 3
data = data if self.pre_transform is None else self.pre_transform(data)
torch.save(self.collate([data]), self.processed_paths[0])
示例2: _load
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def _load(self):
# graph
coo_adj = sp.load_npz(os.path.join(
self._extract_dir, "reddit{}_graph.npz".format(self._self_loop_str)))
self.graph = DGLGraph(coo_adj, readonly=True)
# features and labels
reddit_data = np.load(os.path.join(self._extract_dir, "reddit_data.npz"))
self.features = reddit_data["feature"]
self.labels = reddit_data["label"]
self.num_labels = 41
# tarin/val/test indices
node_ids = reddit_data["node_ids"]
node_types = reddit_data["node_types"]
self.train_mask = (node_types == 1)
self.val_mask = (node_types == 2)
self.test_mask = (node_types == 3)
print('Finished data loading.')
print(' NumNodes: {}'.format(self.graph.number_of_nodes()))
print(' NumEdges: {}'.format(self.graph.number_of_edges()))
print(' NumFeats: {}'.format(self.features.shape[1]))
print(' NumClasses: {}'.format(self.num_labels))
print(' NumTrainingSamples: {}'.format(len(np.nonzero(self.train_mask)[0])))
print(' NumValidationSamples: {}'.format(len(np.nonzero(self.val_mask)[0])))
print(' NumTestSamples: {}'.format(len(np.nonzero(self.test_mask)[0])))
示例3: load_term_counts
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_term_counts(path='../dat/', force_redo=False):
count_filename = path + 'reddit_term_counts'
authors_counts_filename = path + 'reddit_author_term_counts'
vocab_filename = path + 'vocab'
if os.path.exists(count_filename + '.npz') and not force_redo:
return sparse.load_npz(count_filename + '.npz'), sparse.load_npz(authors_counts_filename + '.npz'), np.load(
vocab_filename + '.npy')
reddit = load_reddit()
post_docs = reddit['post_text'].values
author_grouped = reddit.groupby('author')['post_text'].apply(lambda x: ' '.join(x)).reset_index()
author_docs = author_grouped['post_text'].values
counts, vocab, vec = tokenize_documents(post_docs)
author_counts = vec.transform(author_docs)
sparse.save_npz(count_filename, counts)
sparse.save_npz(authors_counts_filename, author_counts)
np.save(vocab_filename, vocab)
return counts, author_counts, vocab
示例4: test_average_regions_start
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def test_average_regions_start():
outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
matrix = ROOT + 'small_test_matrix.cool'
bed_file = ROOT + 'hicAverageRegions/regions_multi.bed'
args = "--matrix {} --regions {} -o {} --range 100000 100000 -cb {}".format(matrix, bed_file, outfile.name, 'start').split()
log.debug('path: {}'.format(matrix))
hicAverageRegions.main(args)
test_file = load_npz(ROOT + 'hicAverageRegions/regions_multi_start.npz')
new_file = load_npz(outfile.name)
nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)
os.remove(outfile.name)
示例5: test_average_regions_center
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def test_average_regions_center():
outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
matrix = ROOT + 'small_test_matrix.cool'
bed_file = ROOT + 'hicAverageRegions/regions_multi.bed'
args = "--matrix {} --regions {} -o {} --range 100000 100000 -cb {}".format(matrix, bed_file, outfile.name, 'center').split()
log.debug('path: {}'.format(matrix))
hicAverageRegions.main(args)
test_file = load_npz(ROOT + 'hicAverageRegions/regions_multi_center.npz')
new_file = load_npz(outfile.name)
nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)
os.remove(outfile.name)
示例6: test_average_regions_single
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def test_average_regions_single():
outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
matrix = ROOT + 'small_test_matrix.cool'
bed_file = ROOT + 'hicAverageRegions/regions.bed'
args = "--matrix {} --regions {} -o {} --range 100000 100000".format(matrix, bed_file, outfile.name).split()
log.debug('path: {}'.format(matrix))
hicAverageRegions.main(args)
test_file = load_npz(ROOT + 'hicAverageRegions/result_range_100000.npz')
new_file = load_npz(outfile.name)
nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)
os.remove(outfile.name)
示例7: save_to_one_flie_csrs
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def save_to_one_flie_csrs(self, fns):
save_as = os.path.join(self.output_folder, "Xcsr.h5")
try:
os.unlink(save_as)
except:
pass
h5f = h5sparse.File(save_as)
first = True
for fn in fns:
logger.info(f"Saving {fn}")
mat = load_npz(os.path.join(self.output_folder, "chunks", fn)).astype(np.float32)
if first:
h5f.create_dataset("matrix", data=mat, chunks=(10_000_000,), maxshape=(None,))
first = False
else:
h5f["matrix"].append(mat)
gc.collect()
h5f.close()
示例8: load_edge_masks
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_edge_masks(dataset_str, data_path, adj_true, drop_edge_prop):
"""Loads adjacency matrix as sparse matrix and masks for val & test links.
Args:
dataset_str: dataset to use
data_path: path to data folder
adj_true: true adjacency matrix in dense format,
drop_edge_prop: proportion of edges to remove.
Returns:
adj_matrix: adjacency matrix
train_mask: mask for train edges
val_mask: mask for val edges
test_mask: mask for test edges
"""
edge_mask_path = os.path.join(
data_path, 'emask.{}.remove{}.'.format(dataset_str, drop_edge_prop))
val_mask = sp.load_npz(edge_mask_path + 'val.npz')
test_mask = sp.load_npz(edge_mask_path + 'test.npz')
train_mask = 1. - val_mask.todense() - test_mask.todense()
# remove val and test edges from true A
adj_train = np.multiply(adj_true, train_mask)
train_mask -= np.eye(train_mask.shape[0])
return adj_train, sparse_to_tuple(val_mask), sparse_to_tuple(
val_mask), sparse_to_tuple(test_mask)
示例9: add_top_k_edges
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def add_top_k_edges(data, edge_mask_path, gae_scores_path, topk, nb_nodes,
norm_adj):
"""Loads GAE scores and adds topK edges to train adjacency."""
test_mask = sp.load_npz(os.path.join(edge_mask_path, 'test_mask.npz'))
train_mask = 1. - test_mask.todense()
# remove val and test edges from true A
adj_train_curr = np.multiply(data['adj_true'], train_mask)
# Predict test edges using precomputed scores
scores = np.load(os.path.join(gae_scores_path, 'gae_scores.npy'))
# scores_mask = 1 - np.eye(nb_nodes)
scores_mask = np.zeros((nb_nodes, nb_nodes))
scores_mask[:140, 140:] = 1.
scores_mask[140:, :140] = 1.
scores = np.multiply(scores, scores_mask).reshape((-1,))
threshold = scores[np.argsort(-scores)[topk]]
adj_train_curr += 1 * (scores > threshold).reshape((nb_nodes, nb_nodes))
adj_train_curr = 1 * (adj_train_curr > 0)
if norm_adj:
adj_train_norm = normalize_adj(data['adj_train'])
else:
adj_train_norm = sp.coo_matrix(data['adj_train'])
return adj_train_curr, sparse_to_tuple(adj_train_norm)
示例10: load_ppi_data
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_ppi_data(data_path):
"""Load PPI dataset."""
with tf.gfile.Open(os.path.join(data_path, 'ppi.edges.npz')) as f:
adj = sp.load_npz(f)
with tf.gfile.Open(os.path.join(data_path, 'ppi.features.norm.npy')) as f:
features = np.load(f)
with tf.gfile.Open(os.path.join(data_path, 'ppi.labels.npz')) as f:
labels = sp.load_npz(f).todense()
train_mask = np.load(
tf.gfile.Open(os.path.join(data_path, 'ppi.train_mask.npy'))) > 0
val_mask = np.load(
tf.gfile.Open(os.path.join(data_path, 'ppi.test_mask.npy'))) > 0
test_mask = np.load(
tf.gfile.Open(os.path.join(data_path, 'ppi.test_mask.npy'))) > 0
return adj, features, labels, train_mask, val_mask, test_mask
示例11: uscensus_reverse_transform
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def uscensus_reverse_transform(input_path, output_path, metadata_path):
with open(metadata_path, "r") as metadata_file:
metadata = json.load(metadata_file)
features = load_npz(input_path)
csv_file = open(output_path, "w")
output = csv.DictWriter(csv_file, ["caseid"] + metadata["variables"])
output.writeheader()
for row_value_indices in features:
_, selected_value_indices = row_value_indices.nonzero()
# there should be one value per variable
assert len(selected_value_indices) == len(metadata["variables"])
row_dict = dict()
for selected_value_index in selected_value_indices:
variable, value = metadata["index_to_value"][selected_value_index]
row_dict[variable] = value
output.writerow(row_dict)
csv_file.close()
示例12: retrieve_dataset
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def retrieve_dataset(dataset, **kwargs):
# if data not extracted, download zip and extract
outdirname = "datasets.1.17.2019"
if not os.path.exists(outdirname):
try:
from urllib import urlretrieve
except ImportError:
from urllib.request import urlretrieve
import zipfile
zipfilename = outdirname + ".zip"
urlretrieve(
"https://publictestdatasets.blob.core.windows.net/data/" + zipfilename,
zipfilename,
)
with zipfile.ZipFile(zipfilename, "r") as unzip:
unzip.extractall(".")
extension = os.path.splitext(dataset)[1]
filepath = os.path.join(outdirname, dataset)
if extension == ".npz":
# sparse format file
import scipy.sparse as sparse
return sparse.load_npz(filepath)
elif extension == ".svmlight":
from sklearn import datasets
return datasets.load_svmlight_file(filepath)
elif extension == ".json":
import json
with open(filepath, encoding="utf-8") as f:
dataset = json.load(f)
return dataset
elif extension == ".csv":
import pandas as pd
return pd.read_csv(filepath, **kwargs)
else:
raise Exception("Unrecognized file extension: " + extension)
示例13: load_state
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_state(self, folderpath):
state = ut.read_jsonfile(
ut.join_paths([folderpath, 'hash_model_state.json']))
self.vals_lst = state['vals_lst']
num_evals = state['num_evals']
for i in range(num_evals):
self.vecs_lst.append(
sp.load_npz(ut.join_paths([folderpath,
str(i) + '.npz'])))
if num_evals > 0:
self._refit()
示例14: loadRedditFromNPZ
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def loadRedditFromNPZ(dataset_dir=datadir):
adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
data = np.load(dataset_dir +"reddit.npz")
return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index']
示例15: load_data
# 需要导入模块: from scipy import sparse [as 别名]
# 或者: from scipy.sparse import load_npz [as 别名]
def load_data(self):
if not osp.exists(self.data_filename):
self.download_npz()
print('Loading {} dataset perturbed by 0.05 mettack...'.format(self.name))
adj = sp.load_npz(self.data_filename)
warnings.warn('''the adjacency matrix is perturbed, using the data splits under seed 15(default seed for deeprobust.graph.data.Dataset), so if you are going to verify the attacking performance, you should use the same data splits''')
return adj