本文整理汇总了Python中Bio.SeqUtils.GC属性的典型用法代码示例。如果您正苦于以下问题:Python SeqUtils.GC属性的具体用法?Python SeqUtils.GC怎么用?Python SeqUtils.GC使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类Bio.SeqUtils
的用法示例。
在下文中一共展示了SeqUtils.GC属性的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: computeGCDiffs
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def computeGCDiffs(self, diff):
"""Update the energy based on the change in GC content. Basically,
considers the cases where previously the block had no GC and now has
one added or previously there was a G or C and it is no longer in the
sliding window.."""
self.numGC += diff
if self.numGC > 0 and self.noGC:
# subtract the init for no GC
self.currdH -= self.stackTable['init_allA/T'][self.dH]
self.currdS -= self.stackTable['init_allA/T'][self.dS]
# add the init for one GC
self.currdH += self.stackTable['init_oneG/C'][self.dH]
self.currdS += self.stackTable['init_oneG/C'][self.dS]
self.noGC = False
elif self.numGC == 0 and not self.noGC:
# subtract the init for one GC
self.currdH -= self.stackTable['init_oneG/C'][self.dH]
self.currdS -= self.stackTable['init_oneG/C'][self.dS]
# add the init for no GC
self.currdH += self.stackTable['init_allA/T'][self.dH]
self.currdS += self.stackTable['init_allA/T'][self.dS]
self.noGC = True
示例2: gene_feature
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def gene_feature(Y, X, learn_options):
'''
Things like the sequence of the gene, the DNA Tm of the gene, etc.
'''
gene_names = Y['Target gene']
gene_length = np.zeros((gene_names.values.shape[0], 1))
gc_content = np.zeros((gene_names.shape[0], 1))
temperature = np.zeros((gene_names.shape[0], 1))
molecular_weight = np.zeros((gene_names.shape[0], 1))
for gene in gene_names.unique():
seq = util.get_gene_sequence(gene)
gene_length[gene_names.values==gene] = len(seq)
gc_content[gene_names.values==gene] = SeqUtil.GC(seq)
temperature[gene_names.values==gene] = Tm.Tm_staluc(seq, rna=False)
molecular_weight[gene_names.values==gene] = SeqUtil.molecular_weight(seq, 'DNA')
all = np.concatenate((gene_length, gc_content, temperature, molecular_weight), axis=1)
df = pandas.DataFrame(data=all, index=gene_names.index, columns=['gene length',
'gene GC content',
'gene temperature',
'gene molecular weight'])
return df
示例3: resetTmVals
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def resetTmVals(self, startInd, startLen):
"""Update the Tm calculation variables, by repopulating the queue. This
happens when the crawler jumps ahead by more than a single base.."""
self.currInd = startInd
# Initialize values.
self.numGC = 0
(self.frontH, self.frontS) = self.getFrontVals(self.block[self.currInd])
(self.backH, self.backS) = self.getBackVals(self.block[self.currInd \
+ startLen - 1])
# Iterate through the block and compute the nearest neighbor
# contributions to deltaH and deltaH.
for i in range(min(self.L, len(self.block) - self.currInd - 2)):
neighbors = self.block[self.currInd + i: self.currInd + i + 2]
if i < startLen and self.block[self.currInd + i] in 'GCgc':
self.numGC += 1
if neighbors in self.stackTable:
self.hQueue[i] = self.stackTable[neighbors][self.dH]
self.sQueue[i] = self.stackTable[neighbors][self.dS]
# Sum the nearest neighbor and edge contributions.
self.currdH = sum(self.hQueue[:startLen - 1]) \
+ self.stackTable['init'][self.dH] \
+ self.frontH + self.backH
self.currdS = sum(self.sQueue[:startLen - 1]) \
+ self.stackTable['init'][self.dS] \
+ self.frontS + self.backS
# Handle the GC content cases.
self.noGC = self.numGC == 0
if self.noGC:
self.currdH += self.stackTable['init_allA/T'][self.dH]
self.currdS += self.stackTable['init_allA/T'][self.dS]
else:
self.currdH += self.stackTable['init_oneG/C'][self.dH]
self.currdS += self.stackTable['init_oneG/C'][self.dS]
self.currLen = startLen
self.queueInd = 0
示例4: gcCheck
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def gcCheck(self, seq3):
"""Check whether a candidate sequence has the right GC content."""
return float(self.gcPercent) <= self.numGC * 100.0 / len(seq3) \
<= float(self.GCPercent)
示例5: test_defaults
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def test_defaults():
"""runs on good input"""
out_file = 'out.fa'
try:
if os.path.isfile(out_file):
os.remove(out_file)
rv, out = getstatusoutput(prg)
assert rv == 0
assert out == f'Done, wrote 10 DNA sequences to "{out_file}".'
assert os.path.isfile(out_file)
# correct number of seqs
seqs = list(SeqIO.parse(out_file, 'fasta'))
assert len(seqs) == 10
# the lengths are in the correct range
seq_lens = list(map(lambda seq: len(seq.seq), seqs))
assert max(seq_lens) <= 75
assert min(seq_lens) >= 50
# bases are correct
bases = ''.join(
sorted(
set(chain(map(lambda seq: ''.join(sorted(set(seq.seq))),
seqs)))))
assert bases == 'ACGT'
# the pct GC is about right
gc = list(map(lambda seq: GC(seq.seq) / 100, seqs))
assert .47 <= mean(gc) <= .53
finally:
if os.path.isfile(out_file):
os.remove(out_file)
# --------------------------------------------------
示例6: target_genes_stats
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def target_genes_stats(genes=['HPRT1', 'TADA1', 'NF2', 'TADA2B', 'NF1', 'CUL3', 'MED12', 'CCDC101']):
for gene in genes:
seq = get_gene_sequence(gene)
if seq != None:
print '%s \t\t\t\t len: %d \t GCcont: %.3f \t Temp: %.4f \t molweight: %.4f' % (gene, len(seq), SeqUtil.GC(seq), Tm.Tm_staluc(seq, rna=False), SeqUtil.molecular_weight(seq, 'DNA'))
示例7: countGC
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def countGC(s, length_audit=True):
'''
GC content for only the 20mer, as per the Doench paper/code
'''
if length_audit:
assert len(s) == 30, "seems to assume 30mer"
return len(s[4:24].replace('A', '').replace('T', ''))
示例8: gc_features
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def gc_features(data, audit=True):
gc_count = data['30mer'].apply(lambda seq: countGC(seq, audit))
gc_count.name = 'GC count'
gc_above_10 = (gc_count > 10)*1
gc_above_10.name = 'GC > 10'
gc_below_10 = (gc_count < 10)*1
gc_below_10.name = 'GC < 10'
return gc_above_10, gc_below_10, gc_count
示例9: randomSeq
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def randomSeq(length, GC):
global RandomSeq
pctGC = length * GC / 2
pctGC = math.trunc(pctGC)
pctAT = old_div(length,2) - pctGC
Seq = "A"*pctAT + "T"*pctAT + "G"*pctGC + "C"*pctGC
SeqList = list(Seq)
random.shuffle(SeqList)
RandomSeq = "".join(SeqList)
return RandomSeq
示例10: test_options
# 需要导入模块: from Bio import SeqUtils [as 别名]
# 或者: from Bio.SeqUtils import GC [as 别名]
def test_options():
"""runs on good input"""
out_file = random_string() + '.fasta'
try:
if os.path.isfile(out_file):
os.remove(out_file)
min_len = random.randint(50, 99)
max_len = random.randint(100, 150)
num_seqs = random.randint(100, 150)
pct_gc = random.random()
cmd = (f'{prg} -m {min_len} -x {max_len} -o {out_file} '
f'-n {num_seqs} -t rna -p {pct_gc:.02f} -s 1')
rv, out = getstatusoutput(cmd)
assert rv == 0
assert out == f'Done, wrote {num_seqs} RNA sequences to "{out_file}".'
assert os.path.isfile(out_file)
# correct number of seqs
seqs = list(SeqIO.parse(out_file, 'fasta'))
assert len(seqs) == num_seqs
# the lengths are in the correct range
seq_lens = list(map(lambda seq: len(seq.seq), seqs))
assert max(seq_lens) <= max_len
assert min(seq_lens) >= min_len
# bases are correct
bases = ''.join(
sorted(
set(chain(map(lambda seq: ''.join(sorted(set(seq.seq))),
seqs)))))
assert bases == 'ACGU'
# the pct GC is about right
gc = list(map(lambda seq: GC(seq.seq) / 100, seqs))
assert pct_gc - .3 <= mean(gc) <= pct_gc + .3
finally:
if os.path.isfile(out_file):
os.remove(out_file)