本文整理匯總了Python中scipy.sparse.load_npz方法的典型用法代碼示例。如果您正苦於以下問題:Python sparse.load_npz方法的具體用法?Python sparse.load_npz怎麽用?Python sparse.load_npz使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scipy.sparse
的用法示例。
在下文中一共展示了sparse.load_npz方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: process
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def process(self):
data = np.load(osp.join(self.raw_dir, 'reddit_data.npz'))
x = torch.from_numpy(data['feature']).to(torch.float)
y = torch.from_numpy(data['label']).to(torch.long)
split = torch.from_numpy(data['node_types'])
adj = sp.load_npz(osp.join(self.raw_dir, 'reddit_graph.npz'))
row = torch.from_numpy(adj.row).to(torch.long)
col = torch.from_numpy(adj.col).to(torch.long)
edge_index = torch.stack([row, col], dim=0)
edge_index, _ = coalesce(edge_index, None, x.size(0), x.size(0))
data = Data(x=x, edge_index=edge_index, y=y)
data.train_mask = split == 1
data.val_mask = split == 2
data.test_mask = split == 3
data = data if self.pre_transform is None else self.pre_transform(data)
torch.save(self.collate([data]), self.processed_paths[0])
示例2: _load
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def _load(self):
# graph
coo_adj = sp.load_npz(os.path.join(
self._extract_dir, "reddit{}_graph.npz".format(self._self_loop_str)))
self.graph = DGLGraph(coo_adj, readonly=True)
# features and labels
reddit_data = np.load(os.path.join(self._extract_dir, "reddit_data.npz"))
self.features = reddit_data["feature"]
self.labels = reddit_data["label"]
self.num_labels = 41
# tarin/val/test indices
node_ids = reddit_data["node_ids"]
node_types = reddit_data["node_types"]
self.train_mask = (node_types == 1)
self.val_mask = (node_types == 2)
self.test_mask = (node_types == 3)
print('Finished data loading.')
print(' NumNodes: {}'.format(self.graph.number_of_nodes()))
print(' NumEdges: {}'.format(self.graph.number_of_edges()))
print(' NumFeats: {}'.format(self.features.shape[1]))
print(' NumClasses: {}'.format(self.num_labels))
print(' NumTrainingSamples: {}'.format(len(np.nonzero(self.train_mask)[0])))
print(' NumValidationSamples: {}'.format(len(np.nonzero(self.val_mask)[0])))
print(' NumTestSamples: {}'.format(len(np.nonzero(self.test_mask)[0])))
示例3: load_term_counts
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def load_term_counts(path='../dat/', force_redo=False):
count_filename = path + 'reddit_term_counts'
authors_counts_filename = path + 'reddit_author_term_counts'
vocab_filename = path + 'vocab'
if os.path.exists(count_filename + '.npz') and not force_redo:
return sparse.load_npz(count_filename + '.npz'), sparse.load_npz(authors_counts_filename + '.npz'), np.load(
vocab_filename + '.npy')
reddit = load_reddit()
post_docs = reddit['post_text'].values
author_grouped = reddit.groupby('author')['post_text'].apply(lambda x: ' '.join(x)).reset_index()
author_docs = author_grouped['post_text'].values
counts, vocab, vec = tokenize_documents(post_docs)
author_counts = vec.transform(author_docs)
sparse.save_npz(count_filename, counts)
sparse.save_npz(authors_counts_filename, author_counts)
np.save(vocab_filename, vocab)
return counts, author_counts, vocab
示例4: test_average_regions_start
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def test_average_regions_start():
outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
matrix = ROOT + 'small_test_matrix.cool'
bed_file = ROOT + 'hicAverageRegions/regions_multi.bed'
args = "--matrix {} --regions {} -o {} --range 100000 100000 -cb {}".format(matrix, bed_file, outfile.name, 'start').split()
log.debug('path: {}'.format(matrix))
hicAverageRegions.main(args)
test_file = load_npz(ROOT + 'hicAverageRegions/regions_multi_start.npz')
new_file = load_npz(outfile.name)
nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)
os.remove(outfile.name)
示例5: test_average_regions_center
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def test_average_regions_center():
outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
matrix = ROOT + 'small_test_matrix.cool'
bed_file = ROOT + 'hicAverageRegions/regions_multi.bed'
args = "--matrix {} --regions {} -o {} --range 100000 100000 -cb {}".format(matrix, bed_file, outfile.name, 'center').split()
log.debug('path: {}'.format(matrix))
hicAverageRegions.main(args)
test_file = load_npz(ROOT + 'hicAverageRegions/regions_multi_center.npz')
new_file = load_npz(outfile.name)
nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)
os.remove(outfile.name)
示例6: test_average_regions_single
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def test_average_regions_single():
outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
matrix = ROOT + 'small_test_matrix.cool'
bed_file = ROOT + 'hicAverageRegions/regions.bed'
args = "--matrix {} --regions {} -o {} --range 100000 100000".format(matrix, bed_file, outfile.name).split()
log.debug('path: {}'.format(matrix))
hicAverageRegions.main(args)
test_file = load_npz(ROOT + 'hicAverageRegions/result_range_100000.npz')
new_file = load_npz(outfile.name)
nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)
os.remove(outfile.name)
示例7: save_to_one_flie_csrs
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def save_to_one_flie_csrs(self, fns):
save_as = os.path.join(self.output_folder, "Xcsr.h5")
try:
os.unlink(save_as)
except:
pass
h5f = h5sparse.File(save_as)
first = True
for fn in fns:
logger.info(f"Saving {fn}")
mat = load_npz(os.path.join(self.output_folder, "chunks", fn)).astype(np.float32)
if first:
h5f.create_dataset("matrix", data=mat, chunks=(10_000_000,), maxshape=(None,))
first = False
else:
h5f["matrix"].append(mat)
gc.collect()
h5f.close()
示例8: load_edge_masks
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def load_edge_masks(dataset_str, data_path, adj_true, drop_edge_prop):
"""Loads adjacency matrix as sparse matrix and masks for val & test links.
Args:
dataset_str: dataset to use
data_path: path to data folder
adj_true: true adjacency matrix in dense format,
drop_edge_prop: proportion of edges to remove.
Returns:
adj_matrix: adjacency matrix
train_mask: mask for train edges
val_mask: mask for val edges
test_mask: mask for test edges
"""
edge_mask_path = os.path.join(
data_path, 'emask.{}.remove{}.'.format(dataset_str, drop_edge_prop))
val_mask = sp.load_npz(edge_mask_path + 'val.npz')
test_mask = sp.load_npz(edge_mask_path + 'test.npz')
train_mask = 1. - val_mask.todense() - test_mask.todense()
# remove val and test edges from true A
adj_train = np.multiply(adj_true, train_mask)
train_mask -= np.eye(train_mask.shape[0])
return adj_train, sparse_to_tuple(val_mask), sparse_to_tuple(
val_mask), sparse_to_tuple(test_mask)
示例9: add_top_k_edges
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def add_top_k_edges(data, edge_mask_path, gae_scores_path, topk, nb_nodes,
norm_adj):
"""Loads GAE scores and adds topK edges to train adjacency."""
test_mask = sp.load_npz(os.path.join(edge_mask_path, 'test_mask.npz'))
train_mask = 1. - test_mask.todense()
# remove val and test edges from true A
adj_train_curr = np.multiply(data['adj_true'], train_mask)
# Predict test edges using precomputed scores
scores = np.load(os.path.join(gae_scores_path, 'gae_scores.npy'))
# scores_mask = 1 - np.eye(nb_nodes)
scores_mask = np.zeros((nb_nodes, nb_nodes))
scores_mask[:140, 140:] = 1.
scores_mask[140:, :140] = 1.
scores = np.multiply(scores, scores_mask).reshape((-1,))
threshold = scores[np.argsort(-scores)[topk]]
adj_train_curr += 1 * (scores > threshold).reshape((nb_nodes, nb_nodes))
adj_train_curr = 1 * (adj_train_curr > 0)
if norm_adj:
adj_train_norm = normalize_adj(data['adj_train'])
else:
adj_train_norm = sp.coo_matrix(data['adj_train'])
return adj_train_curr, sparse_to_tuple(adj_train_norm)
示例10: load_ppi_data
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def load_ppi_data(data_path):
"""Load PPI dataset."""
with tf.gfile.Open(os.path.join(data_path, 'ppi.edges.npz')) as f:
adj = sp.load_npz(f)
with tf.gfile.Open(os.path.join(data_path, 'ppi.features.norm.npy')) as f:
features = np.load(f)
with tf.gfile.Open(os.path.join(data_path, 'ppi.labels.npz')) as f:
labels = sp.load_npz(f).todense()
train_mask = np.load(
tf.gfile.Open(os.path.join(data_path, 'ppi.train_mask.npy'))) > 0
val_mask = np.load(
tf.gfile.Open(os.path.join(data_path, 'ppi.test_mask.npy'))) > 0
test_mask = np.load(
tf.gfile.Open(os.path.join(data_path, 'ppi.test_mask.npy'))) > 0
return adj, features, labels, train_mask, val_mask, test_mask
示例11: uscensus_reverse_transform
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def uscensus_reverse_transform(input_path, output_path, metadata_path):
with open(metadata_path, "r") as metadata_file:
metadata = json.load(metadata_file)
features = load_npz(input_path)
csv_file = open(output_path, "w")
output = csv.DictWriter(csv_file, ["caseid"] + metadata["variables"])
output.writeheader()
for row_value_indices in features:
_, selected_value_indices = row_value_indices.nonzero()
# there should be one value per variable
assert len(selected_value_indices) == len(metadata["variables"])
row_dict = dict()
for selected_value_index in selected_value_indices:
variable, value = metadata["index_to_value"][selected_value_index]
row_dict[variable] = value
output.writerow(row_dict)
csv_file.close()
示例12: retrieve_dataset
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def retrieve_dataset(dataset, **kwargs):
# if data not extracted, download zip and extract
outdirname = "datasets.1.17.2019"
if not os.path.exists(outdirname):
try:
from urllib import urlretrieve
except ImportError:
from urllib.request import urlretrieve
import zipfile
zipfilename = outdirname + ".zip"
urlretrieve(
"https://publictestdatasets.blob.core.windows.net/data/" + zipfilename,
zipfilename,
)
with zipfile.ZipFile(zipfilename, "r") as unzip:
unzip.extractall(".")
extension = os.path.splitext(dataset)[1]
filepath = os.path.join(outdirname, dataset)
if extension == ".npz":
# sparse format file
import scipy.sparse as sparse
return sparse.load_npz(filepath)
elif extension == ".svmlight":
from sklearn import datasets
return datasets.load_svmlight_file(filepath)
elif extension == ".json":
import json
with open(filepath, encoding="utf-8") as f:
dataset = json.load(f)
return dataset
elif extension == ".csv":
import pandas as pd
return pd.read_csv(filepath, **kwargs)
else:
raise Exception("Unrecognized file extension: " + extension)
示例13: load_state
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def load_state(self, folderpath):
state = ut.read_jsonfile(
ut.join_paths([folderpath, 'hash_model_state.json']))
self.vals_lst = state['vals_lst']
num_evals = state['num_evals']
for i in range(num_evals):
self.vecs_lst.append(
sp.load_npz(ut.join_paths([folderpath,
str(i) + '.npz'])))
if num_evals > 0:
self._refit()
示例14: loadRedditFromNPZ
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def loadRedditFromNPZ(dataset_dir=datadir):
adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
data = np.load(dataset_dir +"reddit.npz")
return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index']
示例15: load_data
# 需要導入模塊: from scipy import sparse [as 別名]
# 或者: from scipy.sparse import load_npz [as 別名]
def load_data(self):
if not osp.exists(self.data_filename):
self.download_npz()
print('Loading {} dataset perturbed by 0.05 mettack...'.format(self.name))
adj = sp.load_npz(self.data_filename)
warnings.warn('''the adjacency matrix is perturbed, using the data splits under seed 15(default seed for deeprobust.graph.data.Dataset), so if you are going to verify the attacking performance, you should use the same data splits''')
return adj