当前位置: 首页>>代码示例>>Python>>正文


Python UI.getSpeciesInfo方法代码示例

本文整理汇总了Python中UI.getSpeciesInfo方法的典型用法代码示例。如果您正苦于以下问题:Python UI.getSpeciesInfo方法的具体用法?Python UI.getSpeciesInfo怎么用?Python UI.getSpeciesInfo使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在UI的用法示例。


在下文中一共展示了UI.getSpeciesInfo方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: importUCSCTranscriptSequences

# 需要导入模块: import UI [as 别名]
# 或者: from UI import getSpeciesInfo [as 别名]
def importUCSCTranscriptSequences(species,array_type,probeset_seq_db):
    start_time = time.time()

    if force == 'yes':
        ### Download mRNA sequence file from website
        import UI; species_names = UI.getSpeciesInfo()
        species_full = species_names[species]
        species_full = string.replace(species_full,' ','_')
        ucsc_mRNA_dir = update.getFTPData('hgdownload.cse.ucsc.edu','/goldenPath/currentGenomes/'+species_full+'/bigZips','mrna.fa.gz')
        output_dir = 'AltDatabase/'+species+'/SequenceData/'
        try:
            gz_filepath, status = update.download(ucsc_mRNA_dir,output_dir,'')        
            if status == 'not-removed':
                try: os.remove(gz_filepath) ### Not sure why this works now and not before
                except OSError: status = status
        except Exception: null=[] ### Occurs when file is not available for this species
            
    filename = 'AltDatabase/'+species+'/SequenceData/mrna.fa'
    output_file = 'AltDatabase/'+species+'/SequenceData/output/'+array_type+'_UCSC-mRNA_alignments.txt'
    dataw = export.ExportFile(output_file)      
    output_file = 'AltDatabase/'+species+'/SequenceData/output/sequences/'+array_type+'_UCSC_mRNA_seqmatches.txt'
    datar = export.ExportFile(output_file)

    ucsc_mrna_to_gene = importUCSCTranscriptAssociations(species)    
    
    print "Begining generic fasta import of",filename
    #'>gnl|ENS|Mm#S10859962 Mus musculus 12 days embryo spinal ganglion cDNA /gb=AK051143 /gi=26094349 /ens=Mm.1 /len=2289']
    #'ATCGTGGTGTGCCCAGCTCTTCCAAGGACTGCTGCGCTTCGGGGCCCAGGTGAGTCCCGC'
    fn=filepath(filename); sequence = '|'; ucsc_mRNA_hit_len={}; ucsc_probeset_null_hits={}; k=0
    fn=filepath(filename); sequence = '|'; ucsc_mRNA_hit_len={}; ucsc_probeset_null_hits={}; k=0
    for line in open(fn,'rU').xreadlines():
        try: data, newline= string.split(line,'\n')
        except ValueError: continue
        if len(data)>0:
            if data[0] != '#':
                try:
                    if data[0] == '>':
                        if len(sequence) > 1:
                            if accession in ucsc_mrna_to_gene:
                                gene_found = 'no'
                                for ens_gene in ucsc_mrna_to_gene[accession]:
                                    if ens_gene in probeset_seq_db:
                                        sequence = string.upper(sequence); gene_found = 'yes'
                                        mRNA_seq = sequence[1:]; mRNA_length = len(mRNA_seq)    
                                        k+=1; probeset_seq_data = probeset_seq_db[ens_gene]
                                        results = simpleSeqMatchProtocol(probeset_seq_data,mRNA_seq)
                                        for (call,probeset) in results:
                                            dataw.write(string.join([probeset,str(call),accession],'\t')+'\n')
                                if gene_found == 'yes':
                                    values = [accession,mRNA_seq]; values = string.join(values,'\t')+'\n'
                                    datar.write(values)
                        values = string.split(data,' '); accession = values[0][1:]
                        sequence = '|'; continue
                except IndexError: null = []       
                try:
                    if data[0] != '>': sequence = sequence + data
                except IndexError: print kill; continue
    datar.close()
    end_time = time.time(); time_diff = int(end_time-start_time)
    print "UCSC mRNA sequences analyzed in %d seconds" % time_diff
开发者ID:venkatmi,项目名称:altanalyze,代码行数:62,代码来源:mRNASeqAlign.py

示例2: runExtractUniProt

# 需要导入模块: import UI [as 别名]
# 或者: from UI import getSpeciesInfo [as 别名]
def runExtractUniProt(species,species_full,uniprot_filename_url,trembl_filename_url,force):
    global uniprot_ensembl_db;uniprot_ensembl_db={}
    global uniprot_db;uniprot_db={}; global species_name; global uniprot_fildir
    global secondary_to_primary_db; secondary_to_primary_db={}
    import update; reload(update)
    
    species_name = species_full
    
    import UI; species_names = UI.getSpeciesInfo()
    species_full = species_names[species]
    species_full = string.replace(species_full,' ','_')

    uniprot_file = string.split(uniprot_filename_url,'/')[-1]; uniprot_file = string.replace(uniprot_file,'.gz','')
    trembl_file = string.split(trembl_filename_url,'/')[-1]; trembl_file = string.replace(trembl_file,'.gz','')
    uniprot_fildir = 'AltDatabase/uniprot/'+species+'/'
    uniprot_download_fildir = 'AltDatabase/uniprot/'
    uniprot_ens_file = species+'_Ensembl-UniProt.txt'; uniprot_ens_location = uniprot_fildir+uniprot_ens_file
    uniprot_location = uniprot_download_fildir+uniprot_file
    trembl_location = uniprot_download_fildir+trembl_file

    add_trembl_annotations = 'no' ### Currently we don't need these annotations    
    try: importEnsemblUniprot(uniprot_ens_location)
    except IOError:
        try:
            ### Download the data from the AltAnalyze website (if there)
            update.downloadCurrentVersion(uniprot_ens_location,species,'txt')
            importEnsemblUniprot(uniprot_ens_location)
        except Exception: null=[]
    try:
        uniprot_ens_location_built = string.replace(uniprot_ens_location,'UniProt','Uniprot-SWISSPROT')
        uniprot_ens_location_built = string.replace(uniprot_ens_location_built,'uniprot','Uniprot-SWISSPROT')
        importEnsemblUniprot(uniprot_ens_location_built)
    except Exception: null=[]
    
    ### Import UniProt annotations
    counts = update.verifyFile(uniprot_location,'counts')
    if force == 'no' or counts > 8: import_uniprot_db(uniprot_location)
    else:
        ### Directly download the data from UniProt
        gz_filepath, status = update.download(uniprot_filename_url,uniprot_download_fildir,'')

        if status == 'not-removed':
            try: os.remove(gz_filepath) ### Not sure why this works now and not before
            except OSError: status = status     
        import_uniprot_db(uniprot_location)
        
    if add_trembl_annotations == 'yes':
        ### Import TreMBL annotations
        try:
            if force == 'yes': uniprot_location += '!!!!!' ### Force an IOError
            import_uniprot_db(trembl_location)
        except IOError:
            ### Directly download the data from UniProt
            update.download(trembl_filename_url,uniprot_download_fildir,'')
            import_uniprot_db(trembl_location)        
    export()
    exportEnsemblUniprot(uniprot_ens_location)
开发者ID:wuxue,项目名称:altanalyze,代码行数:59,代码来源:ExtractUniProtFunctAnnot.py

示例3: importmiRNAMap

# 需要导入模块: import UI [as 别名]
# 或者: from UI import getSpeciesInfo [as 别名]
def importmiRNAMap(parse_sequences,force):
    """ Added in AltAnalyze version 2.0, this database provides target sequences for several species and different databases, 
    including miRanda, RNAhybrid and TargetScan. For more information see: http://mirnamap.mbc.nctu.edu.tw/html/about.html"""
    gz_filepath = verifyFileAdvanced('miRNA_targets_',species)
    if force == 'yes' or len(gz_filepath)==0:
        import UI; species_names = UI.getSpeciesInfo()
        species_full = species_names[species]
        species_full = string.replace(species_full,' ','_')
        miRNAMap_dir = update.getFTPData('mirnamap.mbc.nctu.edu.tw','/miRNAMap2/miRNA_Targets/'+species_full,'.txt.tar.gz')
        output_dir = 'AltDatabase/miRBS/'+species+'/'
        gz_filepath, status = update.download(miRNAMap_dir,output_dir,'')
        if status == 'not-removed':
            try: os.remove(gz_filepath) ### Not sure why this works now and not before
            except OSError: status = status 

    fn=filepath(string.replace(gz_filepath,'.tar.gz','')); x=0; count=0
    for line in open(fn,'rU').readlines():             
        data = cleanUpLine(line)
        t = string.split(data,'\t')
        if x==0: x=1
        else:
            try:
                miRNA, ensembl_transcript_id, target_start, target_end, miRNA_seq, alignment, target_seq, algorithm, c1, c2, c3 = t
                #if 'GGCTCCTGTCACCTGGGTCCGT'in target_seq:
                #print 'a'; sys.exit()
                #if 'TCF7L1' in symbol or 'TCF3' in symbol:
                #if '-422a' in miRNA:
                #print miRNA;sys.exit()
                #print symbol, mir; sys.exit()
                if ensembl_transcript_id in ens_gene_to_transcript:
                    geneids = ens_gene_to_transcript[ensembl_transcript_id]     
                    target_seq = string.upper(string.replace(target_seq,'-',''))
                    target_seq = string.replace(target_seq,'U','T')
                    for ensembl_geneid in geneids:
                        if parse_sequences == 'yes':
                            if (miRNA,ensembl_geneid) in combined_results:
                                combined_results[(miRNA,ensembl_geneid)].append(target_seq)
                        else:
                            y = MicroRNATargetData(ensembl_geneid,'',miRNA,target_seq,algorithm); count+=1
                            try: microRNA_target_db[miRNA].append(y)
                            except KeyError: microRNA_target_db[miRNA] = [y]
            except Exception: x=1 ### Bad formatting
                       
    print count, 'miRNA-target relationships added for mirnamap'
    return count
开发者ID:venkatmi,项目名称:altanalyze,代码行数:47,代码来源:MatchMiRTargetPredictions.py

示例4: importUCSCTranscriptSequences

# 需要导入模块: import UI [as 别名]
# 或者: from UI import getSpeciesInfo [as 别名]
def importUCSCTranscriptSequences(species, array_type, probeset_seq_db):
    start_time = time.time()

    if force == "yes":
        ### Download mRNA sequence file from website
        import UI

        species_names = UI.getSpeciesInfo()
        species_full = species_names[species]
        species_full = string.replace(species_full, " ", "_")
        ucsc_mRNA_dir = update.getFTPData(
            "hgdownload.cse.ucsc.edu", "/goldenPath/currentGenomes/" + species_full + "/bigZips", "mrna.fa.gz"
        )
        output_dir = "AltDatabase/" + species + "/SequenceData/"
        try:
            gz_filepath, status = update.download(ucsc_mRNA_dir, output_dir, "")
            if status == "not-removed":
                try:
                    os.remove(gz_filepath)  ### Not sure why this works now and not before
                except OSError:
                    status = status
        except Exception:
            null = []  ### Occurs when file is not available for this species

    filename = "AltDatabase/" + species + "/SequenceData/mrna.fa"
    output_file = "AltDatabase/" + species + "/SequenceData/output/" + array_type + "_UCSC-mRNA_alignments.txt"
    dataw = export.ExportFile(output_file)
    output_file = (
        "AltDatabase/" + species + "/SequenceData/output/sequences/" + array_type + "_UCSC_mRNA_seqmatches.txt"
    )
    datar = export.ExportFile(output_file)

    ucsc_mrna_to_gene = importUCSCTranscriptAssociations(species)

    print "Begining generic fasta import of", filename
    #'>gnl|ENS|Mm#S10859962 Mus musculus 12 days embryo spinal ganglion cDNA /gb=AK051143 /gi=26094349 /ens=Mm.1 /len=2289']
    #'ATCGTGGTGTGCCCAGCTCTTCCAAGGACTGCTGCGCTTCGGGGCCCAGGTGAGTCCCGC'
    fn = filepath(filename)
    sequence = "|"
    ucsc_mRNA_hit_len = {}
    ucsc_probeset_null_hits = {}
    k = 0
    fn = filepath(filename)
    sequence = "|"
    ucsc_mRNA_hit_len = {}
    ucsc_probeset_null_hits = {}
    k = 0
    for line in open(fn, "rU").xreadlines():
        try:
            data, newline = string.split(line, "\n")
        except ValueError:
            continue
        if len(data) > 0:
            if data[0] != "#":
                try:
                    if data[0] == ">":
                        if len(sequence) > 1:
                            if accession in ucsc_mrna_to_gene:
                                gene_found = "no"
                                for ens_gene in ucsc_mrna_to_gene[accession]:
                                    if ens_gene in probeset_seq_db:
                                        sequence = string.upper(sequence)
                                        gene_found = "yes"
                                        mRNA_seq = sequence[1:]
                                        mRNA_length = len(mRNA_seq)
                                        k += 1
                                        probeset_seq_data = probeset_seq_db[ens_gene]
                                        results = simpleSeqMatchProtocol(probeset_seq_data, mRNA_seq)
                                        for (call, probeset) in results:
                                            dataw.write(string.join([probeset, str(call), accession], "\t") + "\n")
                                if gene_found == "yes":
                                    values = [accession, mRNA_seq]
                                    values = string.join(values, "\t") + "\n"
                                    datar.write(values)
                        values = string.split(data, " ")
                        accession = values[0][1:]
                        sequence = "|"
                        continue
                except IndexError:
                    null = []
                try:
                    if data[0] != ">":
                        sequence = sequence + data
                except IndexError:
                    print kill
                    continue
    datar.close()
    end_time = time.time()
    time_diff = int(end_time - start_time)
    print "UCSC mRNA sequences analyzed in %d seconds" % time_diff
开发者ID:venkatmi,项目名称:altanalyze,代码行数:92,代码来源:mRNASeqAlign2.py


注:本文中的UI.getSpeciesInfo方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。