本文整理汇总了Python中scipy.io.mmread方法的典型用法代码示例。如果您正苦于以下问题:Python io.mmread方法的具体用法?Python io.mmread怎么用?Python io.mmread使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.io
的用法示例。
在下文中一共展示了io.mmread方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read_mtx
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData:
"""\
Read `.mtx` file.
Parameters
----------
filename
The filename.
dtype
Numpy data type.
"""
from scipy.io import mmread
# could be rewritten accounting for dtype to be more performant
X = mmread(fspath(filename)).astype(dtype)
from scipy.sparse import csr_matrix
X = csr_matrix(X)
return AnnData(X, dtype=dtype)
示例2: factorize_nmf
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def factorize_nmf():
print('factorizing matrix')
newsgroups_mmf_file = '/Users/fpena/tmp/nmf_graphlab/newsgroups/newsgroups_matrix.mmf'
document_term_matrix = mmread(newsgroups_mmf_file)
factorizer = decomposition.NMF(
init="nndsvd", n_components=Constants.TOPIC_MODEL_NUM_TOPICS,
max_iter=Constants.TOPIC_MODEL_ITERATIONS,
alpha=Constants.NMF_REGULARIZATION,
l1_ratio=Constants.NMF_REGULARIZATION_RATIO
)
document_topic_matrix = \
factorizer.fit_transform(document_term_matrix)
topic_term_matrix = factorizer.components_
# mmwrite(mmf_file, small_matrix)
# mmwrite(newsgroups_mmf_file, X)
示例3: read_ATAC_10x
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def read_ATAC_10x(matrix, cell_names='', var_names='', path_file=''):
"""
Load sparse matrix (including matrices corresponding to 10x data) as AnnData objects.
read the mtx file, tsv file coresponding to cell_names and the bed file containing the variable names
Parameters
----------
matrix: sparse count matrix
cell_names: optional, tsv file containing cell names
var_names: optional, bed file containing the feature names
Return
------
AnnData object
"""
mat = mmread(''.join([path_file, matrix]))
mat = mat.toarray()
mat = np.matrix(mat.transpose())
with open(path_file+cell_names) as f:
barcodes = f.readlines()
barcodes = [x[:-1] for x in barcodes]
with open(path_file+var_names) as f:
var_names = f.readlines()
var_names = ["_".join(x[:-1].split('\t')) for x in var_names]
adata = ad.AnnData(mat, obs=pd.DataFrame(index=barcodes), var=pd.DataFrame(index=var_names))
adata.uns['omic'] = 'ATAC'
return(adata)
示例4: read_mtx
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def read_mtx(path):
for filename in glob(path+'/*'):
basename = os.path.basename(filename)
if (('count' in basename) or ('matrix' in basename)) and ('mtx' in basename):
count = mmread(filename).T.tocsr().astype('float32')
elif 'barcode' in basename:
barcode = pd.read_csv(filename, sep='\t', header=None)[0].values
elif 'gene' in basename or 'peak' in basename:
feature = pd.read_csv(filename, sep='\t', header=None).iloc[:, -1].values
return count, feature, barcode
示例5: generate_related_mat
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def generate_related_mat(folder, triples1, triples2, ref_ent1, ref_ent2):
t = time.time()
if "15" in folder:
out_related_file = folder + "out_related_mat.npy"
in_related_file = folder + "in_related_mat.npy"
if os.path.exists(out_related_file):
out_related_mat = np.load(out_related_file)
else:
out_related_mat = generate_out_related_mat(triples1, triples2, ref_ent1, ref_ent2)
np.save(out_related_file, out_related_mat)
if os.path.exists(in_related_file):
in_related_mat = np.load(in_related_file)
else:
in_related_mat = generate_in_related_mat(triples1, triples2, ref_ent1, ref_ent2)
np.save(in_related_file, in_related_mat)
related_mat1 = out_related_mat
# related_mat2 = out_related_mat + in_related_mat
print("load related mat", round(time.time() - t, 2))
return related_mat1
else:
out_related_file = folder + "out_related_mat.mtx"
in_related_file = folder + "in_related_mat.mtx"
if os.path.exists(out_related_file):
out_related_mat = io.mmread(out_related_file)
else:
out_related_mat = generate_out_related_mat(triples1, triples2, ref_ent1, ref_ent2)
io.mmwrite(out_related_file, sp.sparse.lil_matrix(out_related_mat))
if os.path.exists(in_related_file):
in_related_mat = io.mmread(in_related_file)
else:
in_related_mat = generate_in_related_mat(triples1, triples2, ref_ent1, ref_ent2)
io.mmwrite(in_related_file, in_related_mat)
related_mat1 = out_related_mat
# related_mat2 = out_related_mat + in_related_mat
print("load related mat", round(time.time() - t, 2))
return related_mat1
示例6: load_mtx
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def load_mtx(mtx_file, cell_axis="row", gene_names=None, cell_names=None, sparse=None):
"""Load a mtx file
Parameters
----------
filename : str
The name of the mtx file to be loaded
cell_axis : {'row', 'column'}, optional (default: 'row')
If your data has genes on the rows and cells on the columns, use
cell_axis='column'
gene_names : `str`, array-like, or `None` (default: None)
Expects a filename or an array containing a list of gene symbols or ids
cell_names : `str`, array-like, or `None` (default: None)
Expects a filename or an array containing a list of cell barcodes.
sparse : bool, optional (default: None)
If True, loads the data as a pd.DataFrame[pd.SparseArray]. This uses less memory
but more CPU.
Returns
-------
data : array-like, shape=[n_samples, n_features]
If either gene or cell names are given, data will be a pd.DataFrame or
pd.DataFrame[pd.SparseArray]. If no names are given, data will be a np.ndarray
or scipy.sparse.spmatrix
"""
if cell_axis not in ["row", "column", "col"]:
raise ValueError(
"cell_axis {} not recognized. Expected 'row' or 'column'".format(cell_axis)
)
# Read in mtx file
data = sio.mmread(mtx_file)
if cell_axis in ["column", "col"]:
data = data.T
data = _matrix_to_data_frame(
data, gene_names=gene_names, cell_names=cell_names, sparse=sparse
)
return data
示例7: get_all_sim_mat_sparse
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def get_all_sim_mat_sparse(folder):
cross_sim_mat = preprocessing.normalize(io.mmread(folder + 'ents_sim.mtx'), norm='l1')
kb1_sim_mat = preprocessing.normalize(io.mmread(folder + 'kb1_ents_sim.mtx'), norm='l1')
kb2_sim_mat = preprocessing.normalize(io.mmread(folder + 'kb2_ents_sim.mtx'), norm='l1')
return cross_sim_mat, kb1_sim_mat, kb2_sim_mat
示例8: main
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def main(testFormat, testDS, annoFile, visMethod):
#load test data
print('##########loading test data')
if testFormat == '10x':
fileItem = glob.glob(os.path.join(testDS, "matrix.mtx"))[0]
em = io.mmread(fileItem)
em = em.tocsr().toarray()
if os.path.exists(os.path.join(opt.testDS, 'genes.tsv')):
row = pd.read_table(fileItem[:-10]+"genes.tsv", header=None, index_col=None)
else:
row = pd.read_table(fileItem[:-10]+"features.tsv", header=None, index_col=None)
col = pd.read_table(fileItem[:-10]+"barcodes.tsv", header=None, index_col=None)
em = pd.DataFrame(em, index=row.T.values[1], columns=col.T.values[0])
savefolder = testDS
else:
em = pd.read_csv(testDS, index_col=0, header=0)
savefolder = testDS[:-4]
print('##########reducing dimensions')
cords = CalCords(savefolder, em, visMethod)
annos = pd.read_csv(annoFile, index_col=0, header=0)
commonIdx = set(cords.index).intersection(set(annos.index))
cords = cords.ix[commonIdx,]
annos = annos.ix[commonIdx,]
print('##########darwing the scatter plots in the folder: %s' % savefolder)
DrawScatters(savefolder, annoFile, visMethod, cords, annos)
print('##########DONE!')
示例9: load
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def load(cls, path, manifest='manifest.json'):
'''Load an MSM object from disk into memory.
Parameters
----------
path : str
The location of the root directory of the MSM seralization
manifest : str
The name of the file to save as a json manifest of the MSM
directory (contains the paths to each other file).
'''
if not os.path.isdir(path):
raise NotImplementedError("MSMs don't handle zip archives yet.")
with open(os.path.join(path, manifest)) as f:
fname_dict = json.load(f)
# decorate fname_dict values with path
fname_dict = {k: os.path.join(path, v) for k, v in fname_dict.items()}
with open(fname_dict['config'], 'rb') as f:
config = pickle.load(f)
msm = MSM(**config)
msm.tcounts_ = mmread(fname_dict['tcounts_'])
msm.tprobs_ = mmread(fname_dict['tprobs_'])
msm.mapping_ = TrimMapping.load(fname_dict['mapping_'])
msm.eq_probs_ = np.loadtxt(fname_dict['eq_probs_'])
return msm
示例10: create_from_cellranger
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def create_from_cellranger(indir: str, outdir: str = None, genome: str = None) -> str:
"""
Create a .loom file from 10X Genomics cellranger output
Args:
indir (str): path to the cellranger output folder (the one that contains 'outs')
outdir (str): output folder wher the new loom file should be saved (default to indir)
genome (str): genome build to load (e.g. 'mm10'; if None, determine species from outs folder)
Returns:
path (str): Full path to the created loom file.
Remarks:
The resulting file will be named ``{sampleID}.loom``, where the sampleID is the one given by cellranger.
"""
if outdir is None:
outdir = indir
sampleid = os.path.split(os.path.abspath(indir))[-1]
matrix_folder = os.path.join(indir, 'outs', 'filtered_gene_bc_matrices')
if os.path.exists(matrix_folder):
if genome is None:
genome = [f for f in os.listdir(matrix_folder) if not f.startswith(".")][0]
matrix_folder = os.path.join(matrix_folder, genome)
matrix = mmread(os.path.join(matrix_folder, "matrix.mtx")).todense()
genelines = open(os.path.join(matrix_folder, "genes.tsv"), "r").readlines()
bclines = open(os.path.join(matrix_folder, "barcodes.tsv"), "r").readlines()
else: # cellranger V3 file locations
if genome is None:
genome = "" # Genome is not visible from V3 folder
matrix_folder = os.path.join(indir, 'outs', 'filtered_feature_bc_matrix')
matrix = mmread(os.path.join(matrix_folder, "matrix.mtx.gz")).todense()
genelines = [l.decode() for l in gzip.open(os.path.join(matrix_folder, "features.tsv.gz"), "r").readlines()]
bclines = [l.decode() for l in gzip.open(os.path.join(matrix_folder, "barcodes.tsv.gz"), "r").readlines()]
accession = np.array([x.split("\t")[0] for x in genelines]).astype("str")
gene = np.array([x.split("\t")[1].strip() for x in genelines]).astype("str")
cellids = np.array([sampleid + ":" + x.strip() for x in bclines]).astype("str")
col_attrs = {"CellID": cellids}
row_attrs = {"Accession": accession, "Gene": gene}
tsne_file = os.path.join(indir, "outs", "analysis", "tsne", "projection.csv")
# In cellranger V2 the file moved one level deeper
if not os.path.exists(tsne_file):
tsne_file = os.path.join(indir, "outs", "analysis", "tsne", "2_components", "projection.csv")
if os.path.exists(tsne_file):
tsne = np.loadtxt(tsne_file, usecols=(1, 2), delimiter=',', skiprows=1)
col_attrs["X"] = tsne[:, 0].astype('float32')
col_attrs["Y"] = tsne[:, 1].astype('float32')
clusters_file = os.path.join(indir, "outs", "analysis", "clustering", "graphclust", "clusters.csv")
if os.path.exists(clusters_file):
labels = np.loadtxt(clusters_file, usecols=(1, ), delimiter=',', skiprows=1)
col_attrs["ClusterID"] = labels.astype('int') - 1
path = os.path.join(outdir, sampleid + ".loom")
create(path, matrix, row_attrs, col_attrs, file_attrs={"Genome": genome})
return path
示例11: create_from_matrix_market
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def create_from_matrix_market(out_file: str, sample_id: str, layer_paths: Dict[str, str], row_metadata_path: str, column_metadata_path: str, delim: str = "\t", skip_row_headers: bool = False, skip_colums_headers: bool = False, file_attrs: Dict[str, str] = None, matrix_transposed: bool = False) -> None:
"""
Create a .loom file from .mtx matrix market format
Args:
out_file: path to the newly created .loom file (will be overwritten if it exists)
sample_id: string to use as prefix for cell IDs
layer_paths: dict mapping layer names to paths to the corresponding matrix file (usually with .mtx extension)
row_metadata_path: path to the row (usually genes) metadata file
column_metadata_path: path to the column (usually cells) metadata file
delim: delimiter used for metadata (default: "\t")
skip_row_headers: if true, skip first line in rows metadata file
skip_column_headers: if true, skip first line in columns metadata file
file_attrs: dict of global file attributes, or None
matrix_transposed: if true, the main matrix is transposed
Remarks:
layer_paths should typically map the empty string to a matrix market file: {"": "path/to/filename.mtx"}.
To create a multilayer loom file, map multiple named layers {"": "path/to/layer1.mtx", "layer2": "path/to/layer2.mtx"}
Note: the created file MUST have a main layer named "". If no such layer is given, BUT all given layers are the same
datatype, then a main layer will be created as the sum of the other layers. For example, {"spliced": "spliced.mtx", "unspliced": "unspliced.mtx"}
will create three layers, "", "spliced", and "unspliced", where "" is the sum of the other two.
"""
layers: Dict[str, Union[np.ndarray, scipy.sparse.coo_matrix]] = {}
for name, path in layer_paths.items():
matrix = mmread(path)
if matrix_transposed:
matrix = matrix.T
layers[name] = matrix
if "" not in layers:
main_matrix = None
for name, matrix in layers.items():
if main_matrix is None:
main_matrix = matrix.copy()
else:
main_matrix = main_matrix + matrix
layers[""] = main_matrix
genelines = open(row_metadata_path, "r").readlines()
bclines = open(column_metadata_path, "r").readlines()
accession = np.array([x.split("\t")[0] for x in genelines]).astype("str")
if(len(genelines[0].split("\t")) > 1):
gene = np.array([x.split("\t")[1].strip() for x in genelines]).astype("str")
row_attrs = {"Accession": accession, "Gene": gene}
else:
row_attrs = {"Accession": accession}
cellids = np.array([sample_id + ":" + x.strip() for x in bclines]).astype("str")
col_attrs = {"CellID": cellids}
create(out_file, layers[""], row_attrs, col_attrs, file_attrs=file_attrs)
if len(layers) > 1:
with loompy.connect(out_file) as ds:
for name, layer in layers.items():
if name == "":
continue
ds[name] = layer
示例12: from_cellranger_mtx
# 需要导入模块: from scipy import io [as 别名]
# 或者: from scipy.io import mmread [as 别名]
def from_cellranger_mtx(mtx_directory, genome=None, returnGenes=False):
"""
Creates a CellCollection from a sparse matrix (.mtx and associated files) exported by CellRanger
Recognize directories from CellRanger version 2 (files: matrix.mtx, genes.tsv, barcodes.tsv) and
CellRanger v3 (files: matrix.mtx.gz, features.tsv.gz, barcodes.tsv.gz)
"""
start = time.time()
coll = CellCollection()
cellranger_version = 2
if '.mtx' in mtx_directory:
mtx_file = mtx_directory ### Hence an mtx file was directly supplied
mtx_directory = os.path.abspath(os.path.join(mtx_file, os.pardir))
else:
mtx_file = os.path.join(mtx_directory, "matrix.mtx")
if not os.path.exists(mtx_file):
cellranger_version = 3
mtx_file = mtx_file + ".gz"
if not os.path.exists(mtx_file):
raise Exception("Directory {} does not contain a recognizable matrix file".format(mtx_directory))
if '.gz' in mtx_file:
cellranger_version = 3
sparse_matrix = io.mmread(mtx_file)
coll._matrix = sparse_matrix.tocsc()
coll._gene_ids = np.empty((coll._matrix.shape[0], ), np.object)
coll._gene_names = np.empty((coll._matrix.shape[0], ), np.object)
if cellranger_version == 2:
with open(os.path.join(mtx_directory, "genes.tsv"), "rU") as f:
idx = 0
for line in f:
i, n = line.rstrip().split("\t")
coll._gene_ids[idx] = i
coll._gene_names[idx] = n
idx += 1
with open(os.path.join(mtx_directory, "barcodes.tsv"), "rU") as f:
coll._barcodes = np.array( [ line.rstrip() for line in f ] )
else:
with gzip.open(os.path.join(mtx_directory, "features.tsv.gz"), "rt") as f:
idx = 0
indices = []
for line in f:
i, n, t = line.rstrip().split("\t")
coll._gene_ids[idx] = i
coll._gene_names[idx] = n
if t == 'Gene Expression':
indices.append(idx)
idx += 1
coll._filter_genes_by_index(indices)
with gzip.open(os.path.join(mtx_directory, "barcodes.tsv.gz"), "rt") as f:
coll._barcodes = np.array( [ line.rstrip() for line in f ] )
if returnGenes:
""" Do not import the matrix at this point """
return list(coll._gene_names)
print('sparse matrix data imported from mtx file in %s seconds' % str(time.time()-start))
return coll