本文整理汇总了Python中CGATPipelines.PipelineLncRNA类的典型用法代码示例。如果您正苦于以下问题:Python PipelineLncRNA类的具体用法?Python PipelineLncRNA怎么用?Python PipelineLncRNA使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PipelineLncRNA类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: classifyLncRNA
def classifyLncRNA(infiles, outfile):
'''
Classify lncRNA realtive to protein coding loci
Classify lincRNA in terms of their relationship to
protein coding genes - creates indices for intervals on the
fly - mayb should be creating additional annotations:
antisense
transcript overlapping protein coding exons on opposite strand
antisense_upstream
transcript < 2kb from tss on opposite strand
antisense_downstream
transcript < 2kb from gene end on opposite strand
sense_upstream
transcript < 2kb from tss on same strand
sense_downstream
transcript < 2kb from gene end on same strand
intergenic
transcript >2kb from any protein coding gene
intronic
overlaps protein coding gene intron on same strand
antisense_intronic
overlaps protein coding intron on opposite strand
'''
PipelineLncRNA.classifyLncRNAGenes(
infiles[0], infiles[1], outfile, dist=PARAMS["lncrna_dist"])
示例2: classifyFilteredLncRNA
def classifyFilteredLncRNA(infiles, outfile):
'''
classifies all lincRNA before cpc filtering to define any classes that
are represented in the coding set that are filtered
NOTE: This task is not included when running the full pipeline
'''
PipelineLncRNA.classifyLncRNAGenes(
infiles[0], infiles[1], outfile, dist=PARAMS["lncrna_dist"])
示例3: buildFilteredLncRNAGeneSet
def buildFilteredLncRNAGeneSet(infiles, outfile):
"""
Creates a filtered lncRNA geneset.
This geneset will not include any single exon lncRNA
unless it has been seen previously i.e. it overlaps
a previously identified lncRNA
"""
PipelineLncRNA.buildFilteredLncRNAGeneSet(infiles[0], outfile, infiles[1 : len(infiles)])
示例4: flagExonStatus
def flagExonStatus(infile, outfile):
'''
Adds two attributes to the gtf entry:
exon_status_locus - specifies whether the gene model is multi- or single exon
exon_status - specifies whether the transcript is mult- or single exon
'''
PipelineLncRNA.flagExonStatus(infile, outfile)
示例5: buildFinalLncRNAGeneSet
def buildFinalLncRNAGeneSet(infile, outfile):
"""
the final lncRNA gene set consists of transcripts that pass
the initial filtering stage i.e. are;
multi-exonic/previously seen single exon transcripts
display low evidence for coding potential
"""
# filter based on coding potential
PipelineLncRNA.buildFinalLncRNAGeneSet(infile, "lncrna_filtered_cpc_result", outfile, PARAMS["filtering_cpc"])
示例6: buildRefnoncodingGeneSet
def buildRefnoncodingGeneSet(infile, outfile):
'''
filter the refnoncoding geneset for things that are described in ensembl
as being:
Ambiguous_orf
Retained_intron
Sense_intronic
antisense
Sense_overlapping
Processed transcript
'''
PipelineLncRNA.buildRefnoncodingGeneSet(infile, outfile)
示例7: buildLncRNAGeneSet
def buildLncRNAGeneSet(infiles, outfile):
'''
build lncRNA gene set.
This is a set of transcripts in the abinitio set that
do not overlap at any protein coding or pseudogene transcripts
or additional biotypes from ensembl that are unwanted
(exons) in a reference gene set.
Transcripts need to have a length of at least 200 bp.
'''
PipelineLncRNA.buildLncRNAGeneSet( infiles[0], infiles[1], infiles[2], infiles[3], infiles[4], outfile, PARAMS["lncrna_min_length"] )
示例8: splitLncRNAFasta
def splitLncRNAFasta(infile, outfiles):
out_dir = "./phyloCSF/lncrna_fasta"
name_dict = {}
for mapping in PARAMS["phyloCSF_map_species_names"].split(","):
pair = mapping.split(":")
key = ">" + pair[0]
value = ">" + pair[1]
name_dict[key] = value
E.info("Name mapping: %s" % name_dict)
PipelineLncRNA.splitAlignedFasta(infile, out_dir, name_dict)
示例9: buildFilteredLncRNAGeneSet
def buildFilteredLncRNAGeneSet(infiles, outfile):
'''
Creates a filtered lncRNA geneset. That contains previously identified
gene models supplied in contig file.
'''
assert PARAMS["filtering_remove_single_exon"] in ["loci",
"transcripts",
None]
PipelineLncRNA.buildFilteredLncRNAGeneSet(
infiles[0],
outfile,
infiles[1:len(infiles)],
filter_se=PARAMS["filtering_remove_single_exon"])
示例10: extractLncRNAFastaAlignments
def extractLncRNAFastaAlignments(infiles, outfile):
"""
Recieves a MAF file containing pairwise alignments and a gtf12 file
containing intervals. Outputs a single fasta file containing aligned
sequence for each interval.
"""
bed_file, maf_file = infiles
maf_tmp = P.getTempFilename("./phyloCSF")
to_cluster = False
statement = ("gunzip -c %(maf_file)s > %(maf_tmp)s")
P.run()
target_genome = PARAMS["genome"]
query_genome = PARAMS["phyloCSF_query_genome"]
genome_file = os.path.join(PARAMS["genomedir"], PARAMS["genome"])
gene_models = PipelineLncRNA.extractMAFGeneBlocks(bed_file,
maf_tmp,
genome_file,
outfile,
target_genome,
query_genome,
keep_gaps=False)
E.info("%i gene_models extracted" % gene_models)
os.unlink(maf_tmp)
示例11: createMAFAlignment
def createMAFAlignment(infiles, outfile):
"""
Takes all .axt files in the input directory, filters them to remove
files based on supplied regular expressions, converts to a single maf file
using axtToMaf, filters maf alignments under a specified length.
"""
outfile = P.snip(outfile, ".gz")
axt_dir = PARAMS["phyloCSF_location_axt"]
to_ignore = re.compile(PARAMS["phyloCSF_ignore"])
axt_files = []
for axt_file in os.listdir(axt_dir):
if axt_file.endswith("net.axt.gz") and not to_ignore.search(axt_file):
axt_files.append(os.path.join(axt_dir, axt_file))
axt_files = (" ").join(sorted(axt_files))
E.info("axt files from which MAF alignment will be created: %s" %
axt_files)
target_genome = PARAMS["phyloCSF_target_genome"]
target_contigs = os.path.join(PARAMS["annotations_annotations_dir"],
PARAMS_ANNOTATIONS["interface_contigs"])
query_genome = PARAMS["phyloCSF_query_genome"]
query_contigs = os.path.join(PARAMS["phyloCSF_query_assembly"],
PARAMS_ANNOTATIONS["interface_contigs"])
tmpf1 = P.getTempFilename("./phyloCSF")
tmpf2 = P.getTempFilename("./phyloCSF")
to_cluster = False
# concatenate axt files, then remove headers
statement = ("zcat %(axt_files)s"
" > %(tmpf1)s;"
" axtToMaf "
" -tPrefix=%(target_genome)s."
" -qPrefix=%(query_genome)s."
" %(tmpf1)s"
" %(target_contigs)s"
" %(query_contigs)s"
" %(tmpf2)s")
P.run()
E.info("Temporary axt file created %s" % os.path.abspath(tmpf1))
E.info("Temporary maf file created %s" % os.path.abspath(tmpf2))
removed = P.snip(outfile, ".maf") + "_removed.maf"
to_cluster = False
filtered = PipelineLncRNA.filterMAF(tmpf2,
outfile,
removed,
PARAMS["phyloCSF_filter_alignments"])
E.info("%s blocks were ignored in MAF alignment"
" because length of target alignment was too short" % filtered[0])
E.info("%s blocks were output to filtered MAF alignment" % filtered[1])
os.unlink(tmpf1)
os.unlink(tmpf2)
to_cluster = False
statement = ("gzip %(outfile)s;"
" gzip %(removed)s")
P.run()
示例12: buildCodingGeneSet
def buildCodingGeneSet(infiles, outfile):
'''
takes the output from cuffcompare of a transcript
assembly and filters for annotated protein coding
genes.
NB "pruned" refers to nomenclature in the transcript
building pipeline - transcripts that appear in at least
two samples.
Because an abinitio assembly will often contain
fragments of known transcripts and describe them as
novel, the default behaviour is to produce a set that
is composed of 'complete' or 'contained' transcripts
i.e. nothing novel. This may underestimate the number
of transcripts that are actually expressed
'''
PipelineLncRNA.buildCodingGeneSet(infiles[0], infiles[1], outfile)
示例13: buildRefcodingGeneSetStats
def buildRefcodingGeneSetStats(infile, outfile):
'''
counts:
no. of transcripts
no. genes
average number of exons per transcript
average number of exons per gene
no. multi-exon transcripts
no. single exon transcripts
no. multi-exon genes
no. single exon genes
in the coding and lncRNA genesets
'''
# calculate exon status for refcoding genes.
tmpf = P.getTempFilename(".") + ".gz"
PipelineLncRNA.flagExonStatus(infile, tmpf)
outf = open(outfile, "w")
outf.write("\t".join(["no_transcripts",
"no_genes",
"no_exons_per_transcript",
"no_exons_per_gene",
"no_single_exon_transcripts",
"no_multi_exon_transcripts",
"no_single_exon_genes",
"no_multi_exon_genes"]) + "\n")
outf.write("\t".join(map(str, [PipelineLncRNA.CounterTranscripts(tmpf).count(),
PipelineLncRNA.CounterGenes(tmpf).count(),
PipelineLncRNA.CounterExonsPerTranscript(tmpf).count(),
PipelineLncRNA.CounterExonsPerGene(tmpf).count(),
PipelineLncRNA.CounterSingleExonTranscripts(tmpf).count(),
PipelineLncRNA.CounterMultiExonTranscripts(tmpf).count(),
PipelineLncRNA.CounterSingleExonGenes(tmpf).count(),
PipelineLncRNA.CounterMultiExonGenes(tmpf).count()])))
os.unlink(tmpf)
os.unlink(tmpf + ".log")
os.unlink(P.snip(tmpf, ".gz"))
示例14: extractControllLncRNAFastaAlignments
def extractControllLncRNAFastaAlignments(infiles, outfile):
bed_file, maf_file = infiles
maf_tmp = P.getTempFilename("/ifs/scratch")
to_cluster = False
statement = ("gunzip -c %(maf_file)s > %(maf_tmp)s")
P.run()
target_genome = PARAMS["genome"]
query_genome = PARAMS["phyloCSF_query_genome"]
genome_file = os.path.join(PARAMS["genomedir"], PARAMS["genome"])
gene_models = PipelineLncRNA.extractMAFGeneBlocks(bed_file,
maf_tmp,
genome_file,
outfile,
target_genome,
query_genome,
keep_gaps=False)
E.info("%i gene_models extracted" % gene_models)
os.unlink(maf_tmp)
示例15: convertGTFToBed12
def convertGTFToBed12(infile, outfile):
"""
Transform the lncrna_final.gtf.gz into lncrna_final.bed
"""
PipelineLncRNA.gtfToBed12(infile, outfile, "transcript")