本文整理汇总了Python中unique.unique函数的典型用法代码示例。如果您正苦于以下问题:Python unique函数的具体用法?Python unique怎么用?Python unique使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了unique函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compareProteinFeatures
def compareProteinFeatures(protein_ft,neg_coding_seq,pos_coding_seq):
###Parse out ft-information. Generate ft-fragment sequences for querying
###This is a modification of the original script from FeatureAlignment but simplified for exon analysis
protein_ft_unique=[]; new_ft_list = []
for ft_data in protein_ft:
ft_name = ft_data.PrimaryAnnot(); domain_seq = ft_data.DomainSeq(); annotation = ft_data.SecondaryAnnot()
protein_ft_unique.append((ft_name,annotation,domain_seq))
###Redundant entries that are class objects can't be eliminated, so save to a new list and eliminate redundant entries
protein_ft_unique = unique.unique(protein_ft_unique)
for (ft_name,annotation,domain_seq) in protein_ft_unique:
ft_length = len(domain_seq)
new_ft_data = 'null',domain_seq,ft_name,annotation
new_ft_list.append(new_ft_data)
new_ft_list = unique.unique(new_ft_list)
pos_ft = []; neg_ft = []; all_fts = []
for (pos,seq,ft_name,annot) in new_ft_list:
if seq in pos_coding_seq:
pos_ft.append([pos,seq,ft_name,annot]); all_fts.append([pos,seq,ft_name,annot])
if seq in neg_coding_seq:
neg_ft.append([pos,seq,ft_name,annot]); all_fts.append([pos,seq,ft_name,annot])
all_fts = unique.unique(all_fts)
pos_ft_missing=[]; neg_ft_missing=[]
for entry in all_fts:
if entry not in pos_ft: pos_ft_missing.append(entry)
if entry not in neg_ft: neg_ft_missing.append(entry)
pos_ft_missing2=[]; neg_ft_missing2=[]
for entry in pos_ft_missing: entry[1] = ''; pos_ft_missing2.append(entry)
for entry in neg_ft_missing: entry[1] = ''; neg_ft_missing2.append(entry)
pos_ft_missing2 = unique.unique(pos_ft_missing2)
neg_ft_missing2 = unique.unique(neg_ft_missing2)
return neg_ft_missing2,pos_ft_missing2
示例2: grabRNAIdentifiers
def grabRNAIdentifiers(mrna_assignment):
ensembl_ids=[]; mRNA_ids=[]
mRNA_entries = string.split(mrna_assignment,' /// ')
for entry in mRNA_entries:
mRNA_info = string.split(entry,' // '); mrna_ac = mRNA_info[0]
if 'ENS' in mrna_ac: ensembl_ids.append(mrna_ac)
else:
try: int(mrna_ac[-3:]); mRNA_ids.append(mrna_ac)
except ValueError: continue
ensembl_ids = unique.unique(ensembl_ids)
mRNA_ids = unique.unique(mRNA_ids)
return ensembl_ids, mRNA_ids
示例3: reformatHeatmapFile
def reformatHeatmapFile(input_file):
import unique
export_file=string.replace(input_file,'Clustering-','Input-')
eo = export.ExportFile(export_file)
first_row = True
fn=filepath(input_file)
for line in open(fn,'rU').xreadlines():
data = cleanUpLine(line)
t = string.split(data,'\t')
if first_row == True:
if 'column_clusters-flat' not in t:
array_names = []
for i in t[2:]:
array_names.append(string.replace(i,':','-'))
#array_names.append(i)
elif 'column_clusters-flat' in t:
array_clusters = t[2:]
unique_clusters = unique.unique(array_clusters)
ind=0; headers=[]
for c in array_clusters:
headers.append(c+'::'+array_names[ind])
ind+=1
headers = string.join(['uid']+headers,'\t')+'\n'
eo.write(headers)
first_row = False
else:
values = string.join([t[0]]+t[2:],'\t')+'\n'
eo.write(values)
return export_file, len(unique_clusters)
示例4: reformatHeatmapFile
def reformatHeatmapFile(input_file):
import unique
export_file = string.replace(input_file, "Clustering-", "Input-")
eo = export.ExportFile(export_file)
first_row = True
fn = filepath(input_file)
for line in open(fn, "rU").xreadlines():
data = cleanUpLine(line)
t = string.split(data, "\t")
if first_row == True:
if "column_clusters-flat" not in t:
array_names = []
for i in t[2:]:
array_names.append(string.replace(i, ":", "-"))
# array_names.append(i)
elif "column_clusters-flat" in t:
array_clusters = t[2:]
unique_clusters = unique.unique(array_clusters)
ind = 0
headers = []
for c in array_clusters:
headers.append(c + "::" + array_names[ind])
ind += 1
headers = string.join(["uid"] + headers, "\t") + "\n"
eo.write(headers)
first_row = False
else:
values = string.join([t[0]] + t[2:], "\t") + "\n"
eo.write(values)
return export_file, len(unique_clusters)
示例5: getFeatureIsoformGenomePositions
def getFeatureIsoformGenomePositions(species,protein_ft_db,mRNA_protein_seq_db,gene_transcript_db,coordinate_type):
""" Adapted from compareProteinFeatures but for one isoform and returns genomic coordinates for each feature
This function is designed to export all unique isoforms rather than just comparison isoforms """
import export
export_file = 'AltDatabase/ensembl/'+species+'/ProteinFeatureIsoform_complete.txt'
export_data = export.ExportFile(export_file)
failed = 0
worked = 0
failed_ac=[]
for gene in protein_ft_db:
transcript_feature_db={}
for ft in protein_ft_db[gene]:
try:
ft_name = ft.PrimaryAnnot(); annotation = ft.SecondaryAnnot()
for (mRNA,type) in gene_transcript_db[gene]:
try:
protein,protein_seq = mRNA_protein_seq_db[mRNA]
error = False
except Exception:
failed_ac.append(mRNA)
error = True
if error == False:
if ft.DomainSeq() in protein_seq:
if coordinate_type == 'genomic':
pos1 = ft.GenomicStart(); pos2 = ft.GenomicStop()
else:
pos1 = str(ft.DomainStart()); pos2 = str(ft.DomainEnd())
### There are often many features that overlap within a transcript, so consistently pick just one
if mRNA in transcript_feature_db:
db = transcript_feature_db[mRNA]
if (pos1,pos2) in db:
db[pos1, pos2].append([protein,ft_name,annotation])
else:
db[pos1, pos2]=[[protein,ft_name,annotation]]
else:
db={}
db[pos1, pos2]=[[protein,ft_name,annotation]]
transcript_feature_db[mRNA] = db
#values = [mRNA, protein, pos1, pos2,ft_name,annotation]; unique_entries.append(values)
worked+=1
except IOError:
failed+=1
for transcript in transcript_feature_db:
db = transcript_feature_db[transcript]
for (pos1,pos2) in db:
db[pos1,pos2].sort() ### Pick the alphabetically listed first feature
protein,ft_name,annotation = db[pos1,pos2][0]
values = [transcript, protein, pos1, pos2,ft_name,annotation]
export_data.write(string.join(values,'\t')+'\n')
export_data.close()
print failed,'features failed to have corresponding aligned genomic locations out of', worked+failed
failed_ac = unique.unique(failed_ac)
print len(failed_ac),'mRNAs without identified/in silico derived proteins' ### Appear to be ncRNAs without ATGs
print failed_ac[:20]
示例6: eliminate_redundant_dict_values
def eliminate_redundant_dict_values(database):
db1={}
for key in database:
list = unique.unique(database[key])
list.sort()
db1[key] = list
return db1
示例7: eliminateRedundant
def eliminateRedundant(database):
db1 = {}
for key in database:
list = unique.unique(database[key])
list.sort()
db1[key] = list
return db1
示例8: importSplicingEventsToVisualize
def importSplicingEventsToVisualize(eventsToVisualizeFilename):
splicing_events = []
### Import the splicing events to visualize from an external text file (multiple formats supported)
type = None
expandedSearch = False
firstLine = True
for line in open(eventsToVisualizeFilename, "rU").xreadlines():
line = cleanUpLine(line)
t = string.split(line, "\t")
if firstLine:
if "junctionID-1" in t:
j1i = t.index("junctionID-1")
j2i = t.index("junctionID-2")
type = "ASPIRE"
expandedSearch = True
if "ANOVA" in t:
type = "PSI"
elif "independent confirmation" in t:
type = "confirmed"
expandedSearch = True
elif "ANOVA" in eventsToVisualizeFilename:
type = "ANOVA"
firstLine = False
if "|" in t[0]:
type = "ANOVA"
if " " in t[0] and ":" in t[0]:
splicing_events.append(t[0])
elif type == "ASPIRE":
splicing_events.append(t[j1i] + " " + t[j2i])
splicing_events.append(t[j2i] + " " + t[j1i])
elif type == "ANOVA":
try:
a, b = string.split(t[0], "|")
a = string.split(a, ":")
a = string.join(a[1:], ":")
splicing_events.append(a + " " + b)
splicing_events.append(b + " " + a)
except Exception:
pass
elif type == "PSI":
try:
j1, j2 = string.split(t[0], "|")
a, b, c = string.split(j1, ":")
j1 = b + ":" + c
splicing_events.append(j1 + " " + j2)
splicing_events.append(j2 + " " + j1)
except Exception:
# print traceback.format_exc();sys.exit()
pass
elif type == "confirmed":
try:
event_pair1 = string.split(t[1], "|")[0]
a, b, c, d = string.split(event_pair1, "-")
splicing_events.append(a + "-" + b + " " + c + "-" + d)
splicing_events.append(c + "-" + d + " " + a + "-" + b)
except Exception:
pass
splicing_events = unique.unique(splicing_events)
return splicing_events, expandedSearch
示例9: sangerImport
def sangerImport(parse_sequences):
""""Sanger center (miRBase) sequence was provided as a custom (requested) dump of their v5 target predictions
(http://microrna.sanger.ac.uk/targets/v5/), containing Ensembl gene IDs, microRNA names, and putative target
sequences, specific for either mouse or human. Mouse was requested in late 2005 whereas human in late 2007.
These same annotation files, missing the actual target sequence but containing an ENS transcript and coordinate
locations for that build (allowing seqeunce extraction with the appropriate Ensembl build) exist at:
http://microrna.sanger.ac.uk/cgi-bin/targets/v5/download.pl"""
if species == 'Hs': filename = 'AltDatabase/miRBS/'+species+'/'+'mirbase-v5_homo_sapiens.mirna.txt'; prefix = 'hsa-'
if species == 'Rn': filename = 'AltDatabase/miRBS/'+species+'/'+'sanger_miR_target_predictions.txt'; prefix = 'rno-'
if species == 'Mm': filename = 'AltDatabase/miRBS/'+species+'/'+'sanger_miR_target_predictions.txt'; prefix = 'mmu-'
print 'parsing', filename; count=0
fn=filepath(filename); x=1; mir_sequences=[]
verifyFile(filename,species) ### Makes sure file is local and if not downloads.
for line in open(fn,'rU').xreadlines():
data = cleanUpLine(line)
t = string.split(data,'\t')
if x==0: x=1
else:
ensembl_geneids=[]
if species == 'Hs':
try:
mir = t[1]; ens_transcript = t[2]; ensembl_geneid = t[17]; mir_sequences = string.upper(t[14])
ensembl_geneids.append(ensembl_geneid)
except IndexError: print line;kill
elif species == 'Mm':
ens_transcript,mir,mir_sequences = t
if ens_transcript in ens_gene_to_transcript:
ensembl_geneids = ens_gene_to_transcript[ens_transcript]; ensembl_geneid = ensembl_geneids[0]
elif species == 'Rn':
ensembl_geneid,mir,mir_sequences = t
mir_sequences = string.lower(mir_sequences); mir = string.replace(mir,'hsa','rno'); mir = string.replace(mir,'mmu','rno')
ensembl_geneids=[ensembl_geneid]
geneid_ls=[]
#mir_sequences = string.replace(mir_sequences,'-',''); mir_sequences = string.replace(mir_sequences,'=','')
#mir_sequences = string.upper(mir_sequences)
#if 'GGCTCCTGTCACCTGGGTCCGT' in mir_sequences:
#print ensembl_geneid, mir; sys.exit()
for ensembl_geneid in ensembl_geneids:
if ensembl_geneid in redundant_ensembl_by_build: ###Thus there are redundant geneids
geneid_ls += redundant_ensembl_by_build[ensembl_geneid]+[ensembl_geneid]
else: geneid_ls += [ensembl_geneid]
if species == 'Hs':
if ens_transcript in ens_gene_to_transcript: geneid_ls+= ens_gene_to_transcript[ens_transcript]
geneid_ls = unique.unique(geneid_ls)
if len(geneid_ls) == 1 and geneid_ls[0]=='': null =[] ###not a valid gene
elif prefix in mir:
for ensembl_geneid in geneid_ls:
if parse_sequences == 'yes':
if (mir,ensembl_geneid) in combined_results:
mir_sequences = string.replace(mir_sequences,'-',''); mir_sequences = string.replace(mir_sequences,'=',''); count+=1
combined_results[(mir,ensembl_geneid)].append(string.upper(mir_sequences))
else:
if prefix in mir:
y = MicroRNATargetData(ensembl_geneid,'',mir,mir_sequences,'mirbase'); count+=1
try: microRNA_target_db[mir].append(y)
except KeyError: microRNA_target_db[mir] = [y]
print count, 'miRNA-target relationships added for mirbase'
示例10: importSplicingEventsToVisualize
def importSplicingEventsToVisualize(eventsToVisualizeFilename):
splicing_events=[]
### Import the splicing events to visualize from an external text file (multiple formats supported)
type = None
expandedSearch = False
firstLine = True
for line in open(eventsToVisualizeFilename,'rU').xreadlines():
line = cleanUpLine(line)
t = string.split(line,'\t')
if firstLine:
if 'junctionID-1' in t:
j1i = t.index('junctionID-1')
j2i = t.index('junctionID-2')
type='ASPIRE'
expandedSearch = True
if 'ANOVA' in t:
type='PSI'
elif 'independent confirmation' in t:
type='confirmed'
expandedSearch = True
elif 'ANOVA' in eventsToVisualizeFilename:
type = 'ANOVA'
firstLine=False
if '|' in t[0]:
type = 'ANOVA'
if ' ' in t[0] and ':' in t[0]:
splicing_events.append(t[0])
elif type=='ASPIRE':
splicing_events.append(t[j1i] +' '+ t[j2i])
splicing_events.append(t[j2i] +' '+ t[j1i])
elif type=='ANOVA':
try:
a,b = string.split(t[0],'|')
a = string.split(a,':')
a = string.join(a[1:],':')
splicing_events.append(a +' '+ b)
splicing_events.append(b +' '+ a)
except Exception: pass
elif type=='PSI':
try:
j1,j2 = string.split(t[0],'|')
a,b,c = string.split(j1,':')
j1 = b+':'+c
splicing_events.append(j1 +' '+ j2)
splicing_events.append(j2 +' '+ j1)
except Exception:
#print traceback.format_exc();sys.exit()
pass
elif type=='confirmed':
try:
event_pair1 = string.split(t[1],'|')[0]
a,b,c,d = string.split(event_pair1,'-')
splicing_events.append(a+'-'+b +' '+ c+'-'+d)
splicing_events.append(c+'-'+d +' '+ a+'-'+b)
except Exception: pass
else:
splicing_events.append(t[0])
splicing_events = unique.unique(splicing_events)
return splicing_events,expandedSearch
示例11: writeIsoformFile
def writeIsoformFile(isoform_junctions,o):
for coord in isoform_junctions:
isoform_junctions[coord] = unique.unique(isoform_junctions[coord])
if '+' in coord:
print coord, isoform_junctions[coord]
if '+' in coord:
sys.exit()
示例12: eliminateRedundant
def eliminateRedundant(database):
for key in database:
try:
list = makeUnique(database[key])
list.sort()
except Exception: list = unique.unique(database[key])
database[key] = list
return database
示例13: Coordinates
def Coordinates(self):
x=0; coords=[]
for i in self.start_set:
coord = self.Chr()+':'+str(i)+'-'+str(self.end_set[x])
coords.append(coord)
x+=1
coords = unique.unique(coords)
coords = string.join(coords,'|') ###If multiple coordinates
return coords
示例14: grabNestedOntologyIDs
def grabNestedOntologyIDs():
nested_ontology_tree={}
for path in path_dictionary:
parent_ontology_id = path_ontology_db[path]
child_ontology_list=[]
for child_path in path_dictionary[path]:
child_ontology_id = path_ontology_db[child_path]; child_ontology_list.append(child_ontology_id)
child_ontology_list = unique.unique(child_ontology_list)
nested_ontology_tree[parent_ontology_id] = child_ontology_list
return nested_ontology_tree
示例15: findAvailableOntologies
def findAvailableOntologies(species,mod_types):
program_type,database_dir = unique.whatProgramIsThis()
c = GrabFiles(); c.setdirectory('/'+database_dir+'/'+species+'/gene-go'); file_dirs=[]
for mod in mod_types:
file_dirs+= c.searchdirectory(mod+'-')
avaialble_ontologies=[]
for filedir in file_dirs:
ontology_type = string.split(filedir,'-')[-1][:-4] ### remove the .txt
avaialble_ontologies.append(ontology_type)
avaialble_ontologies = unique.unique(avaialble_ontologies)
return avaialble_ontologies