本文整理汇总了Python中CGAT.Bed.setName方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.setName方法的具体用法?Python Bed.setName怎么用?Python Bed.setName使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CGAT.Bed
的用法示例。
在下文中一共展示了Bed.setName方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: exportSequencesFromBedFile
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import setName [as 别名]
def exportSequencesFromBedFile( infile, outfile, masker = None, mode = "intervals" ):
'''export sequences for intervals in :term:`bed`-formatted *infile*
to :term:`fasta` formatted *outfile*
'''
track = P.snip( infile, ".bed.gz" )
fasta = IndexedFasta.IndexedFasta( os.path.join( PARAMS["genome_dir"], PARAMS["genome"] ) )
outs = IOTools.openFile( outfile, "w")
ids, seqs = [], []
for bed in Bed.setName(Bed.iterator( IOTools.openFile(infile) )):
lcontig = fasta.getLength( bed.contig )
if mode == "intervals":
seqs.append( fasta.getSequence( bed.contig, "+", bed.start, bed.end) )
ids.append( "%s_%s %s:%i..%i" % (track, bed.name, bed.contig, bed.start, bed.end) )
elif mode == "leftright":
l = bed.end - bed.start
start, end = max(0,bed.start-l), bed.end-l
ids.append( "%s_%s_l %s:%i..%i" % (track, bed.name, bed.contig, start, end) )
seqs.append( fasta.getSequence( bed.contig, "+", start, end) )
start, end = bed.start+l, min(lcontig,bed.end+l)
ids.append( "%s_%s_r %s:%i..%i" % (track, bed.name, bed.contig, start, end) )
seqs.append( fasta.getSequence( bed.contig, "+", start, end) )
masked = maskSequences( seqs, masker )
outs.write("\n".join( [ ">%s\n%s" % (x,y) for x,y in zip(ids, masked) ] ) )
outs.close()
示例2: main
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import setName [as 别名]
def main(argv=None):
if argv is None:
argv = sys.argv
parser = E.OptionParser(
version="%prog version: $Id$",
usage=globals()["__doc__"])
parser.add_option("-g", "--genome-file", dest="genome_file", type="string",
help="filename with genomic sequence to retrieve "
"sequences from.")
parser.add_option("-m", "--masker", dest="masker", type="choice",
choices=("dust", "dustmasker", "softmask", "none"),
help="apply masker to mask output sequences "
"[%default].")
parser.add_option("--output-mode", dest="output_mode", type="choice",
choices=("intervals", "leftright", "segments"),
help="what to output. "
"'intervals' generates a single sequence for "
"each bed interval. 'leftright' generates two "
"sequences, one in each direction, for each bed "
"interval. 'segments' can be used to output "
"sequence from bed12 files so that sequence only covers "
"the segements [%default]")
parser.add_option("--min-sequence-length", dest="min_length", type="int",
help="require a minimum sequence length [%default]")
parser.add_option("--max-sequence-length", dest="max_length", type="int",
help="require a maximum sequence length [%default]")
parser.add_option(
"--extend-at", dest="extend_at", type="choice",
choices=("none", "3", "5", "both", "3only", "5only"),
help="extend at 3', 5' or both or no ends. If 3only or 5only "
"are set, only the added sequence is returned [default=%default]")
parser.add_option(
"--extend-by", dest="extend_by", type="int",
help="extend by # bases [default=%default]")
parser.add_option(
"--use-strand", dest="ignore_strand",
action="store_false",
help="use strand information and return reverse complement "
"on intervals located on the negative strand. "
"[default=%default]")
parser.set_defaults(
genome_file=None,
masker=None,
output_mode="intervals",
min_length=0,
max_length=0,
extend_at=None,
extend_by=100,
ignore_strand=True,
)
(options, args) = E.Start(parser)
if options.genome_file:
fasta = IndexedFasta.IndexedFasta(options.genome_file)
contigs = fasta.getContigSizes()
fasta.setConverter(IndexedFasta.getConverter("zero-both-open"))
counter = E.Counter()
ids, seqs = [], []
E.info("collecting sequences")
for bed in Bed.setName(Bed.iterator(options.stdin)):
counter.input += 1
lcontig = fasta.getLength(bed.contig)
if options.ignore_strand:
strand = "+"
else:
strand = bed.strand
if options.output_mode == "segments" and bed.columns == 12:
ids.append("%s %s:%i..%i (%s) %s %s" %
(bed.name, bed.contig, bed.start, bed.end, strand,
bed["blockSizes"], bed["blockStarts"]))
seg_seqs = [fasta.getSequence(bed.contig, strand, start, end)
for start, end in bed.toIntervals()]
seqs.append("".join(seg_seqs))
elif (options.output_mode == "intervals" or
options.output_mode == "segments"):
ids.append("%s %s:%i..%i (%s)" %
(bed.name, bed.contig, bed.start, bed.end, strand))
seqs.append(
fasta.getSequence(bed.contig, strand, bed.start, bed.end))
elif options.output_mode == "leftright":
l = bed.end - bed.start
#.........这里部分代码省略.........
示例3: main
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import setName [as 别名]
def main(argv=None):
if argv is None:
argv = sys.argv
parser = E.OptionParser(
version="%prog version: $Id: gff2fasta.py 2861 2010-02-23 17:36:32Z andreas $")
parser.add_option("-g", "--genome-file", dest="genome_file", type="string",
help="filename with genome.")
parser.add_option("-m", "--masker", dest="masker", type="choice",
choices=("dust", "dustmasker", "softmask", "none"),
help="apply masker [%default].")
parser.add_option("-o", "--mode", dest="mode", type="choice",
choices=("intervals", "leftright"),
help="what to output [%default]")
parser.add_option("--min-length", dest="min_length", type="int",
help="require a minimum sequence length [%default]")
parser.add_option("--max-length", dest="max_length", type="int",
help="require a maximum sequence length [%default]")
parser.add_option("--extend-at", dest="extend_at", type="choice",
choices=("none", "3", "5", "both", "3only", "5only"),
help="extend at no, 3', 5' or both ends. If 3only or 5only are set, only the added sequence is returned [default=%default]")
parser.add_option("--extend-by", dest="extend_by", type="int",
help="extend by # bases [default=%default]")
parser.add_option("--use-strand", dest="ignore_strand", action="store_false",
help="use strand information and return reverse complement [default=%default]")
parser.set_defaults(
genome_file=None,
masker=None,
mode="intervals",
min_length=0,
max_length=0,
extend_at=None,
extend_by=100,
ignore_strand=True,
)
(options, args) = E.Start(parser)
if options.genome_file:
fasta = IndexedFasta.IndexedFasta(options.genome_file)
contigs = fasta.getContigSizes()
fasta.setConverter(IndexedFasta.getConverter("zero-both-open"))
counter = E.Counter()
ids, seqs = [], []
E.info("collecting sequences")
for bed in Bed.setName(Bed.iterator(options.stdin)):
counter.input += 1
lcontig = fasta.getLength(bed.contig)
if options.ignore_strand:
strand = "+"
else:
strand = bed.strand
if options.mode == "intervals":
ids.append("%s %s:%i..%i (%s)" %
(bed.name, bed.contig, bed.start, bed.end, strand))
seqs.append(
fasta.getSequence(bed.contig, strand, bed.start, bed.end))
elif options.mode == "leftright":
l = bed.end - bed.start
start, end = max(0, bed.start - l), bed.end - l
ids.append("%s_l %s:%i..%i (%s)" %
(bed.name, bed.contig, start, end, strand))
seqs.append(fasta.getSequence(bed.contig, strand, start, end))
start, end = bed.start + l, min(lcontig, bed.end + l)
ids.append("%s_r %s:%i..%i (%s)" %
(bed.name, bed.contig, start, end, strand))
seqs.append(fasta.getSequence(bed.contig, strand, start, end))
E.info("collected %i sequences" % len(seqs))
masked = Masker.maskSequences(seqs, options.masker)
options.stdout.write(
"\n".join([">%s\n%s" % (x, y) for x, y in zip(ids, masked)]) + "\n")
E.info("masked %i sequences" % len(seqs))
counter.output = len(seqs)
E.info("%s" % counter)
E.Stop()