本文整理汇总了Python中oncotator.DatasourceFactory.DatasourceFactory.createDatasources方法的典型用法代码示例。如果您正苦于以下问题:Python DatasourceFactory.createDatasources方法的具体用法?Python DatasourceFactory.createDatasources怎么用?Python DatasourceFactory.createDatasources使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类oncotator.DatasourceFactory.DatasourceFactory
的用法示例。
在下文中一共展示了DatasourceFactory.createDatasources方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testAnnotateListOfMutations
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def testAnnotateListOfMutations(self):
"""Test that we can initialize an Annotator, without an input or output and then feed mutations,
one at a time... using a runspec"""
# Locate the datasource directory and create a runspec
dbDir = self.config.get("DEFAULT", "dbDir")
ds = DatasourceFactory.createDatasources(dbDir)
runSpec = RunSpecification()
runSpec.initialize(None, None, datasources=ds)
# Initialize the annotator with the runspec
annotator = Annotator()
annotator.initialize(runSpec)
m = MutationData()
m.chr = "1"
m.start = "12941796"
m.end = "12941796"
m.alt_allele = "G"
m.ref_allele = "T"
muts = [m]
muts = annotator.annotate_mutations(muts)
m2 = muts.next()
self.assertTrue(m2.get("gene", None) is not None)
示例2: test_simple_transcript_annotation
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def test_simple_transcript_annotation(self):
"""Test web api backend call /transcript/ """
# http://www.broadinstitute.org/oncotator/transcript/ENST00000215832.6/
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
tx = annotator.retrieve_transcript_by_id("ENST00000215832.6")
self.assertTrue(tx is not None)
self.assertTrue(tx.get_gene() == "MAPK1")
示例3: test_querying_transcripts_by_genes
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def test_querying_transcripts_by_genes(self):
"""Test that we can get all of the transcripts for a given set of genes. """
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
# Step 1 get all of the relevant transcripts
txs = annotator.retrieve_transcripts_by_genes(["MAPK1", "PIK3CA"])
self.assertTrue(len(txs) > 3)
示例4: test_simple_genes_by_gene_annotation
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def test_simple_genes_by_gene_annotation(self):
"""Test web api backend call /gene/ """
# http://www.broadinstitute.org/oncotator/gene/MAPK1/
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
txs = annotator.retrieve_transcripts_by_genes(["MAPK1"])
self.assertTranscriptsFound(txs)
mut_dict = annotator.annotate_genes_given_txs(txs)
self.assertTrue(len(mut_dict.keys()) == 1)
示例5: testInitializingDatasources
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def testInitializingDatasources(self):
""" Test initializing a database dir, both single and multicore. This test is RAM intensive and requires default data corpus."""
multiDS = DatasourceFactory.createDatasources(self.config.get("DEFAULT", "dbDir"), "hg19", isMulticore=True)
self.assertTrue(multiDS is not None, "Datasource list was None")
self.assertTrue(len(multiDS) != 0, "Datasource list was empty")
for i in range(0,len(multiDS)):
self.assertTrue(multiDS[i] is not None, "multi core datasource was None: " + str(i))
self.assertTrue(isinstance(multiDS[i],Datasource))
# This test can be memory intensive, so get rid of the multiDS, but record how many datasources were created.
numMultiDS = len(multiDS)
del multiDS
singleCoreDS = DatasourceFactory.createDatasources(self.config.get("DEFAULT", "dbDir"), "hg19", isMulticore=False)
self.assertTrue(singleCoreDS is not None, "Datasource list was None")
self.assertTrue(len(singleCoreDS) != 0, "Datasource list was empty")
for i in range(0,len(singleCoreDS)):
self.assertTrue(singleCoreDS[i] is not None, "single core datasource was None: " + str(i))
self.assertTrue(isinstance(singleCoreDS[i],Datasource))
self.assertTrue(numMultiDS == len(singleCoreDS), "Length of single core datasource list was not the same as multicore")
del singleCoreDS
示例6: test_simple_genes_by_region_annotation
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def test_simple_genes_by_region_annotation(self):
"""Test web api backend call /genes/ """
# http://www.broadinstitute.org/oncotator/genes/chr22_22112223_22312558/
# Two genes: chr22:22,112,223-22,312,558
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
# Here is what the API would call....
txs = annotator.retrieve_transcripts_by_region("22", 22112223, 22312558)
self.assertTranscriptsFound(txs)
mut_dict = annotator.annotate_genes_given_txs(txs)
# Each mut will be for a separate gene
for gene in mut_dict.keys():
mut = mut_dict[gene]
alt_accessions = mut["UniProt_alt_uniprot_accessions"].split("|")
tcgascape_amp_peaks = mut["TCGAScape_Amplification_Peaks"].split("|")
tcgascape_del_peaks = mut["TCGAScape_Deletion_Peaks"].split("|")
tumorscape_amp_peaks = mut["TUMORScape_Amplification_Peaks"].split("|")
tumorscape_del_peaks = mut["TUMORScape_Deletion_Peaks"].split("|")
full_name = mut["HGNC_Approved Name"]
cosmic = {
"tissue_types_affected": mut["COSMIC_Tissue_tissue_types_affected"],
"total_alterations_in_gene": mut["COSMIC_Tissue_tissue_types_affected"],
}
alt_aliases = list(
itertools.chain([mut["HGNC_Previous Symbols"].split(", "), mut["HGNC_Synonyms"].split(", ")])
)
location = mut["HGNC_Chromosome"]
uniprot_accession = mut["UniProt_uniprot_accession"]
transcripts = mut["transcripts"]
self.assertTrue(transcripts is not None)
self.assertTrue(len(transcripts) > 0)
self.assertTrue(transcripts.startswith("ENST"))
strand = mut["strand"]
klass = mut["class"]
uniprot_experimentals = mut["UniProt_AA_experimental_info"].split("|")
self.assertTrue(uniprot_experimentals is not None)
uniprot_natural_variations = mut["UniProt_AA_natural_variation"].split("|")
uniprot_regions = mut["UniProt_AA_region"].split("|")
uniprot_sites = mut["UniProt_AA_site"].split("|")
uniprot_go_biological_processes = mut["UniProt_GO_Biological_Process"].split("|")
uniprot_go_cellular_components = mut["UniProt_GO_Cellular_Component"].split("|")
self.assertTrue(uniprot_go_cellular_components is not None)
uniprot_go_molecular_functions = mut["UniProt_GO_Molecular_Function"].split("|")
pass
示例7: create_run_spec
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations=None,
datasourceDir=None, genomeBuild="hg19", isMulticore=False, numCores=4,
defaultAnnotations=None, cacheUrl=None, read_only_cache=True,
tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None):
""" This is a very simple interface to start an Oncotator session. As a warning, this interface may notbe supported in future versions.
If datasourceDir is None, then the default location is used. TODO: Define default location.
IMPORTANT: Current implementation attempts to annotate using a default set of datasources.
TODO: Make sure that this note above is no longer the case. Current implementation attempts to annotate using a default set of datasources
TODO: This method may get refactored into a separate class that handles RunConfigutaion objects.
"""
# TODO: Use dependency injection for list of name value pairs? Otherwise, set it up as an attribute on this class.
# TODO: Use dependency injection to return instance of the input/output classes
# TODO: Support more than the default configs.
# TODO: On error, list the supported formats (both input and output)
# TODO: Make sure that we can pass in both a class and a config file, not just a class.
globalAnnotations = dict() if globalAnnotations is None else globalAnnotations
defaultAnnotations = dict() if defaultAnnotations is None else defaultAnnotations
other_opts = dict() if other_opts is None else other_opts
other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts
# Step 1 Initialize input and output
inputCreator = OncotatorCLIUtils.create_input_creator(inputFilename, inputFormat, genomeBuild, other_opts)
outputRenderer = OncotatorCLIUtils.create_output_renderer(outputFilename, outputFormat, other_opts)
# Step 2 Datasources
datasourceList = DatasourceFactory.createDatasources(datasourceDir, genomeBuild, isMulticore=isMulticore, numCores=numCores, tx_mode=tx_mode)
#TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way.
for ds in datasourceList:
if isinstance(ds, TranscriptProvider):
logging.getLogger(__name__).info("Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode))
ds.set_tx_mode(tx_mode)
result = RunSpecification()
result.initialize(inputCreator, outputRenderer, manualAnnotations=globalAnnotations, datasources=datasourceList,
isMulticore=isMulticore, numCores=numCores, defaultAnnotations=defaultAnnotations,
cacheUrl=cacheUrl, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts)
return result
示例8: test_querying_transcripts_by_region
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def test_querying_transcripts_by_region(self):
"""Test web api backend call /transcripts/.... """
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
txs = annotator.retrieve_transcripts_by_region("4", 50164411, 60164411)
self.assertTranscriptsFound(txs)
## Here is an example of getting enough data to populate the json in doc/transcript_json_commented.json.txt
# None of these values are validated.
for tx in txs:
transcript_id = tx.get_transcript_id()
tx_start = tx.determine_transcript_start()
tx_end = tx.determine_transcript_stop()
gene = tx.get_gene()
chr = tx.get_contig()
n_exons = len(tx.get_exons())
strand = tx.get_strand()
footprint_start, footprint_end = tx.determine_cds_footprint()
klass = tx.get_gene_type()
cds_start = tx.determine_cds_start()
cds_end = tx.determine_cds_stop()
id = tx.get_gene_id()
genomic_coords = [[exon[0], exon[1]] for exon in tx.get_exons()]
transcript_coords = [
[TranscriptProviderUtils.convert_genomic_space_to_exon_space(exon[0] + 1, exon[1], tx)]
for exon in tx.get_exons()
]
code_len = int(cds_end) - int(cds_start) + 1
# If refseq datasources are not available, this will fail.
# Step 2 annotate the transcript, which produces a dummy mutation with the refseq annotations.
dummy_mut = annotator.annotate_transcript(tx)
refseq_mRNA_id = dummy_mut["gencode_xref_refseq_mRNA_id"]
refseq_prot_id = dummy_mut["gencode_xref_refseq_prot_acc"]
# Description is unavailable right now
description = ""
self.assertTrue(refseq_mRNA_id is not None)
self.assertTrue(refseq_prot_id is not None)
self.assertTrue(len(transcript_coords) == n_exons)
示例9: create_run_spec
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def create_run_spec(input_format, output_format, input_filename, output_filename, global_annotations=None,
datasource_dir=None, genomeBuild="hg19", is_multicore=False, num_cores=4,
default_annotations=None, cache_url=None, read_only_cache=True,
tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None):
""" This is a very simple interface to start an Oncotator session. As a warning, this interface may notbe supported in future versions.
If datasourceDir is None, then no datasources are used
"""
if datasource_dir:
datasource_list = DatasourceFactory.createDatasources(datasource_dir, genomeBuild, isMulticore=is_multicore, numCores=num_cores, tx_mode=tx_mode)
else:
datasource_list = []
global_annotations = dict() if global_annotations is None else global_annotations
default_annotations = dict() if default_annotations is None else default_annotations
other_opts = dict() if other_opts is None else other_opts
#TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way.
for ds in datasource_list:
if isinstance(ds, TranscriptProvider):
logging.getLogger(__name__).info("Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode))
ds.set_tx_mode(tx_mode)
if other_opts.get(OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE, None) is not None:
cc_txs_filename = other_opts[OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE]
cc_txs_fp = file(cc_txs_filename, 'r')
cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp]
cc_txs_fp.close()
ds.set_custom_canonical_txs(cc_txs)
logging.getLogger(__name__).info(str(len(cc_txs)) + " custom canonical transcripts specified.")
else:
logging.getLogger(__name__).info("No custom canonical transcripts specified.")
return RunSpecificationFactory.create_run_spec_given_datasources(input_format, output_format, input_filename, output_filename, global_annotations,
datasource_list, genomeBuild, is_multicore, num_cores,
default_annotations, cache_url, read_only_cache,
tx_mode, is_skip_no_alts, other_opts, annotating_type)
示例10: _createDatasourceCorpus
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def _createDatasourceCorpus(self):
dbDir = self.config.get('DEFAULT', "dbDir")
return DatasourceFactory.createDatasources(dbDir, "hg19", isMulticore=False)
示例11: testMulticoreNoDatasources
# 需要导入模块: from oncotator.DatasourceFactory import DatasourceFactory [as 别名]
# 或者: from oncotator.DatasourceFactory.DatasourceFactory import createDatasources [as 别名]
def testMulticoreNoDatasources(self):
""" If using multicore, does not hang when no datasources are in the db dir"""
multiDS = DatasourceFactory.createDatasources('testdata/maflite/', "hg19", True)
self.assertTrue(len(multiDS) == 0, "Length of multiDS when there were no datasources was not zero.")