本文整理汇总了Python中oncotator.Annotator.Annotator类的典型用法代码示例。如果您正苦于以下问题:Python Annotator类的具体用法?Python Annotator怎么用?Python Annotator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Annotator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _simple_annotate
def _simple_annotate(self, is_skip_no_alts):
runSpec = RunSpecification()
runSpec.initialize(None, None, datasources=[], is_skip_no_alts=is_skip_no_alts)
# Initialize the annotator with the runspec
annotator = Annotator()
annotator.initialize(runSpec)
m = MutationData()
m.chr = "1"
m.start = "12941796"
m.end = "12941796"
m.alt_allele = "G"
m.ref_allele = "T"
m.createAnnotation("alt_allele_seen", "False")
m2 = MutationData()
m2.chr = "1"
m2.start = "12941796"
m2.end = "12941796"
m2.alt_allele = "G"
m2.ref_allele = "T"
muts = [m, m2]
muts = annotator.annotate_mutations(muts)
ctr = 0
for m in muts:
ctr += 1
return ctr
示例2: test_basic_rendering
def test_basic_rendering(self):
"""Test that we can render a basic seg file as a gene list"""
inputFilename = "testdata/seg/Patient0.seg.txt"
output_filename = "out/test_basic_rendering.gene_list.tsv"
db_dir = self.config.get('DEFAULT',"dbDir")
if os.path.exists(output_filename):
os.remove(output_filename)
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "GENE_LIST", inputFilename, output_filename,
datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
annotator.initialize(run_spec)
annotator.annotate()
# Now check the output
output_reader = GenericTsvReader(output_filename)
headers = output_reader.getFieldNames()
for line_dict in output_reader:
self.assertTrue(line_dict['segment_start'] is not None)
self.assertTrue(line_dict['segment_start'].strip() != "")
self.assertTrue(line_dict['segment_end'] is not None)
self.assertTrue(line_dict['segment_end'].strip() != "")
self.assertTrue("gene" in line_dict.keys())
self.assertTrue(len(line_dict["gene"]) > 0)
self.assertTrue(float(line_dict["segment_num_probes"]))
self.assertTrue(line_dict['sample'] == "Patient0")
示例3: test_no_overwriting_muts
def test_no_overwriting_muts(self):
"""Ensure that (given configuration that disallows) we cannot annotate from a datasource when a value was specified in the input."""
# We will have an input with a "Who" annotation that this datasource will try to write.
gene_ds = DatasourceFactory.createDatasource(
"testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
)
input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
output_filename = "out/who_alt1_vs_alt2.maf.annotated"
input_format = "MAFLITE"
output_format = "TCGAMAF"
other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: False, OptionConstants.NO_PREPEND: True}
run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
input_format,
output_format,
input_filename,
output_filename,
datasource_list=[gene_ds],
other_opts=other_opts,
)
annotator = Annotator()
annotator.initialize(run_spec)
self.assertRaises(DuplicateAnnotationException, annotator.annotate)
示例4: testAnnotateListOfMutations
def testAnnotateListOfMutations(self):
"""Test that we can initialize an Annotator, without an input or output and then feed mutations,
one at a time... using a runspec"""
# Locate the datasource directory and create a runspec
dbDir = self.config.get("DEFAULT", "dbDir")
ds = DatasourceFactory.createDatasources(dbDir)
runSpec = RunSpecification()
runSpec.initialize(None, None, datasources=ds)
# Initialize the annotator with the runspec
annotator = Annotator()
annotator.initialize(runSpec)
m = MutationData()
m.chr = "1"
m.start = "12941796"
m.end = "12941796"
m.alt_allele = "G"
m.ref_allele = "T"
muts = [m]
muts = annotator.annotate_mutations(muts)
m2 = muts.next()
self.assertTrue(m2.get("gene", None) is not None)
示例5: test_full_seg_file_annotations
def test_full_seg_file_annotations(self):
"""Test that we can read in a seg file, do a proper full annotation, and output as SIMPLE_TSV"""
inputFilename = "testdata/seg/Patient0.seg.txt"
output_filename = "out/test_full_seg_file_annotations.tsv"
db_dir = self.config.get('DEFAULT',"dbDir")
if os.path.exists(output_filename):
os.remove(output_filename)
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "SIMPLE_TSV", inputFilename, output_filename,
datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
annotator.initialize(run_spec)
annotator.annotate()
# Now check the output
output_reader = GenericTsvReader(output_filename)
required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
headers = output_reader.getFieldNames()
for rcol in required_cols:
self.assertTrue(rcol in headers)
for line_dict in output_reader:
self.assertTrue(line_dict['start'] is not None)
self.assertTrue(line_dict['start'].strip() != "")
self.assertTrue(line_dict['end'] is not None)
self.assertTrue(line_dict['end'].strip() != "")
self.assertTrue("genes" in line_dict.keys())
self.assertTrue(len(line_dict["genes"].split(",")) > 0)
示例6: test_overwriting_muts
def test_overwriting_muts(self):
"""Ensure that (given correct configuration) we can annotate from a datasource, even if the datasource will overwrite an existing mutation."""
# We will have an input with a "Who" annotation that this datasource will try to write.
gene_ds = DatasourceFactory.createDatasource(
"testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
)
input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
output_filename = "out/who_alt1_vs_alt2.maf.annotated"
input_format = "MAFLITE"
output_format = "TCGAMAF"
other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True, OptionConstants.NO_PREPEND: True}
run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
input_format,
output_format,
input_filename,
output_filename,
datasource_list=[gene_ds],
other_opts=other_opts,
)
annotator = Annotator()
annotator.initialize(run_spec)
annotator.annotate()
tsv_reader = GenericTsvReader(output_filename)
for i, line_dict in enumerate(tsv_reader):
self.assertTrue(line_dict.get("TJ_Data_Who", "") != "Tromokratis")
示例7: test_rendering_combined_to_tsv
def test_rendering_combined_to_tsv(self):
"""Test that we produce a merged ONP simple tsv file without crashing """
input_filename = os.path.join(*["testdata", "maflite", "onp_combination.maf.txt"])
output_filename = os.path.join("out", "onp_combination.tsv")
spec = RunSpecificationFactory.create_run_spec("MAFLITE","SIMPLE_TSV",input_filename, output_filename,
other_opts={OptionConstants.INFER_ONPS: True})
annotator = Annotator()
annotator.initialize(spec)
annotator.annotate()
示例8: testVersionHeader
def testVersionHeader(self):
""" This method simply tests that the version string returned by the annotator does not cause an exception.
Minimal checking that the returned sting is actually correct.
Does not attempt to initialize input or output. Only a gaf datasource.
"""
annotator = Annotator()
annotator.addDatasource(TestUtils.createTranscriptProviderDatasource(self.config))
tmp = annotator.createHeaderString()
self.assertTrue(tmp.find("Gaf ") != -1 or tmp.find("GENCODE") != -1, "Could not find Gaf or GENCODE version in header string.")
self.assertTrue(tmp.find("Oncotator") != -1, "Could not find the word Oncotator in header string.")
示例9: testTCGAMAFAsInputAndQuickAnnotate
def testTCGAMAFAsInputAndQuickAnnotate(self):
""" Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
inputFilename = "testdata/maf/Patient0.maf.annotated"
tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
annotator = Annotator()
annotator.setInputCreator(tmp)
annotator.setOutputRenderer(outputRenderer)
ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
annotator.addDatasource(ds)
annotator.annotate()
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReaderIn = GenericTsvReader(inputFilename)
tsvReader = GenericTsvReader(outputFilename)
self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
ctrOut = 0
for lineDict in tsvReader:
ctrOut += 1
ctrIn = 0
for lineDict in tsvReaderIn:
ctrIn += 1
ctrIn += len(tsvReaderIn.getCommentsAsList())
ctrOut += len(tsvReader.getCommentsAsList())
self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file. (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
示例10: test_querying_transcripts_by_genes
def test_querying_transcripts_by_genes(self):
"""Test that we can get all of the transcripts for a given set of genes. """
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
# Step 1 get all of the relevant transcripts
txs = annotator.retrieve_transcripts_by_genes(["MAPK1", "PIK3CA"])
self.assertTrue(len(txs) > 3)
示例11: _annotateTest
def _annotateTest(self, inputFilename, outputFilename, datasource_dir, inputFormat="MAFLITE", outputFormat="TCGAMAF", default_annotations=TCGA_MAF_DEFAULTS, override_annotations=None, is_skip_no_alts=False):
self.logger.info("Initializing Annotator...")
if override_annotations is None:
override_annotations = dict()
annotator = Annotator()
runSpec = RunSpecificationFactory.create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, defaultAnnotations=default_annotations, datasourceDir=datasource_dir, globalAnnotations=override_annotations, is_skip_no_alts=is_skip_no_alts)
annotator.initialize(runSpec)
self.logger.info("Annotation starting...")
return annotator.annotate()
示例12: test_simple_transcript_annotation
def test_simple_transcript_annotation(self):
"""Test web api backend call /transcript/ """
# http://www.broadinstitute.org/oncotator/transcript/ENST00000215832.6/
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
tx = annotator.retrieve_transcript_by_id("ENST00000215832.6")
self.assertTrue(tx is not None)
self.assertTrue(tx.get_gene() == "MAPK1")
示例13: test_single_sample_onp_combiner
def test_single_sample_onp_combiner(self):
"""test that we can create an onp combined TCGA maf without crashing"""
input_filename = 'testdata/maflite/onp.singlesample.maf.txt'
output_filename = 'out/testSingleSampleOnpCombiner.maf'
config = TestUtils.createUnitTestConfig()
defaultdb = config.get('DEFAULT',"dbDir")
spec = RunSpecificationFactory.create_run_spec("MAFLITE","TCGAMAF", input_filename, output_filename,
datasource_dir=defaultdb,
other_opts={OptionConstants.INFER_ONPS: True})
annotator = Annotator()
annotator.initialize(spec)
annotator.annotate()
示例14: testAnnotationWithMafliteWithTrailingSpaces
def testAnnotationWithMafliteWithTrailingSpaces(self):
"""
Tests the ability to annotate a maflite file that contains trailing spaces in ref and alt alleles.
"""
db_dir = self.config.get('DEFAULT',"dbDir")
inputFilename = os.path.join(*["testdata", "maflite", "example.trailing_whitespace_in_alleles.maflite"])
outputFilename = os.path.join("out", "example.trailing_whitespace_in_alleles.maf.txt")
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("MAFLITE", "TCGAMAF", inputFilename, outputFilename,
datasource_dir=db_dir, annotating_type=RunSpecification.ANNOTATE_MUTATIONS)
annotator.initialize(run_spec)
annotator.annotate()
示例15: _annotate_m2_vcf
def _annotate_m2_vcf(self, input_vcf_file, output_tcgamaf_file):
# For this conversion, you must specify the barcodes manually
override_annotations = dict()
override_annotations.update({'tumor_barcode': 'Patient0-Tumor', 'normal_barcode': 'Patient0-Normal'})
other_opts = {OptionConstants.COLLAPSE_FILTER_COLS: True, OptionConstants.NO_PREPEND: True,
OptionConstants.SPLIT_ALLELIC_DEPTH: False, OptionConstants.INFER_ONPS: True}
# Use an empty datasource dir in order to speed this up.
annotator = Annotator()
runSpec = RunSpecificationFactory.create_run_spec("VCF", "TCGAMAF", input_vcf_file, output_tcgamaf_file,
datasource_dir=".", global_annotations=override_annotations,
is_skip_no_alts=True, other_opts=other_opts)
annotator.initialize(runSpec)
annotator.annotate()