本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.addClassToGraph方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.addClassToGraph方法的具体用法?Python GraphUtils.addClassToGraph怎么用?Python GraphUtils.addClassToGraph使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.utils.GraphUtils.GraphUtils
的用法示例。
在下文中一共展示了GraphUtils.addClassToGraph方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _map_eom_terms
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _map_eom_terms(self, raw, limit=None):
"""
This table contains the HP ID mappings from the local tsv file.
Triples:
<eom id> owl:equivalentClass <hp id>
:param raw:
:param limit:
:return:
"""
gu = GraphUtils(curie_map.get())
line_counter = 0
with open(raw, 'r') as f1:
f1.readline() # read the header row; skip
for line in f1:
line_counter += 1
(morphology_term_id, morphology_term_label, hp_id, hp_label, notes) = line.split('\t')
# Sub out the underscores for colons.
hp_id = re.sub('_', ':', hp_id)
if re.match(".*HP:.*", hp_id):
# add the HP term as a class
gu.addClassToGraph(self.graph, hp_id, None)
# Add the HP ID as an equivalent class
gu.addEquivalentClass(self.graph, morphology_term_id, hp_id)
else:
logger.warning('No matching HP term for %s', morphology_term_label)
if limit is not None and line_counter > limit:
break
return
示例2: _process_phenotypicseries
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _process_phenotypicseries(self, limit):
"""
Creates classes from the OMIM phenotypic series list. These are grouping classes
to hook the more granular OMIM diseases.
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
logger.info("getting phenotypic series titles")
gu = GraphUtils(curie_map.get())
line_counter = 0
start = False
with open('/'.join((self.rawdir, self.files['phenotypicSeries']['file']))) as f:
for line in f:
# there's several lines of header in the file, so need to skip several lines:
if not start:
if re.match('Phenotypic Series', line):
start = True
continue
if re.match('\w*$', line):
# skip blank lines
continue
line = line.strip()
line_counter += 1
(ps_label, ps_num) = line.split('\t')
omim_id = 'OMIM:'+ps_num
gu.addClassToGraph(g, omim_id, ps_label)
return
示例3: _get_gene_history
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _get_gene_history(self, limit):
"""
Loops through the gene_history file and adds the old gene ids as deprecated classes, where the new
gene id is the replacement for it. The old gene symbol is added as a synonym to the gene.
:param limit:
:return:
"""
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
logger.info("Processing Gene records")
line_counter = 0
myfile = '/'.join((self.rawdir, self.files['gene_history']['file']))
logger.info("FILE: %s", myfile)
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match('^#', line):
continue
(tax_num, gene_num, discontinued_num, discontinued_symbol, discontinued_date) = line.split('\t')
##### set filter=None in init if you don't want to have a filter
#if self.filter is not None:
# if ((self.filter == 'taxids' and (int(tax_num) not in self.tax_ids))
# or (self.filter == 'geneids' and (int(gene_num) not in self.gene_ids))):
# continue
##### end filter
if gene_num == '-' or discontinued_num == '-':
continue
if self.testMode and int(gene_num) not in self.gene_ids:
continue
if int(tax_num) not in self.tax_ids:
continue
line_counter += 1
gene_id = ':'.join(('NCBIGene', gene_num))
discontinued_gene_id = ':'.join(('NCBIGene', discontinued_num))
tax_id = ':'.join(('NCBITaxon', tax_num))
# add the two genes
gu.addClassToGraph(g, gene_id, None)
gu.addClassToGraph(g, discontinued_gene_id, discontinued_symbol)
# add the new gene id to replace the old gene id
gu.addDeprecatedClass(g, discontinued_gene_id, [gene_id])
# also add the old symbol as a synonym of the new gene
gu.addSynonym(g, gene_id, discontinued_symbol)
if (not self.testMode) and (limit is not None and line_counter > limit):
break
return
示例4: _parse_curated_chem_disease
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _parse_curated_chem_disease(self, limit):
line_counter = 0
file_path = '/'.join((self.rawdir, self.static_files['publications']['file']))
gu = GraphUtils(curie_map.get())
with open(file_path, 'r') as tsvfile:
reader = csv.reader(tsvfile, delimiter="\t")
for row in reader:
# catch comment lines
if re.match('^#', ' '.join(row)):
continue
line_counter += 1
self._check_list_len(row, 10)
(pub_id, disease_label, disease_id, disease_cat, evidence,
chem_label, chem_id, cas_rn, gene_symbol, gene_acc) = row
rel_id = self._get_relationship_id(evidence)
chem_id = 'MESH:'+chem_id
gu.addClassToGraph(self.g, chem_id, chem_label)
gu.addClassToGraph(self.g, disease_id, None)
if pub_id != '':
pub_id = 'PMID:'+pub_id
r = Reference(pub_id, Reference.ref_types['journal_article'])
r.addRefToGraph(self.g)
else:
pub_id = None
self._make_association('MESH:'+chem_id, disease_id, rel_id, ['PMID:'+pub_id])
if not self.testMode and limit is not None and line_counter >= limit:
break
return
示例5: _get_phenotypicseries_parents
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _get_phenotypicseries_parents(entry, g):
"""
Extract the phenotypic series parent relationship out of the entry
:param entry:
:return:
"""
gu = GraphUtils(curie_map.get())
omimid = 'OMIM:'+str(entry['mimNumber'])
# the phenotypic series mappings
serieslist = []
if 'phenotypicSeriesExists' in entry:
if entry['phenotypicSeriesExists'] is True:
if 'phenotypeMapList' in entry:
phenolist = entry['phenotypeMapList']
for p in phenolist:
serieslist.append(p['phenotypeMap']['phenotypicSeriesNumber'])
if 'geneMap' in entry and 'phenotypeMapList' in entry['geneMap']:
phenolist = entry['geneMap']['phenotypeMapList']
for p in phenolist:
if 'phenotypicSeriesNumber' in p['phenotypeMap']:
serieslist.append(p['phenotypeMap']['phenotypicSeriesNumber'])
# add this entry as a subclass of the series entry
for ser in serieslist:
series_id = 'OMIM:'+ser
gu.addClassToGraph(g, series_id, None)
gu.addSubclass(g, series_id, omimid)
return
示例6: _process_genes
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _process_genes(self, taxid, limit=None):
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
raw = '/'.join((self.rawdir, self.files[taxid]['file']))
line_counter = 0
logger.info("Processing Ensembl genes for tax %s", taxid)
with open(raw, 'r', encoding="utf8") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t')
for row in filereader:
if len(row) < 4:
logger.error("Data error for file %s", raw)
return
(ensembl_gene_id, external_gene_name, description,
gene_biotype, entrezgene) = row[0:5]
# in the case of human genes, we also get the hgnc id,
# and is the last col
if taxid == '9606':
hgnc_id = row[5]
else:
hgnc_id = None
if self.testMode and entrezgene != '' \
and int(entrezgene) not in self.gene_ids:
continue
line_counter += 1
gene_id = 'ENSEMBL:'+ensembl_gene_id
if description == '':
description = None
gene_type_id = self._get_gene_type(gene_biotype)
gene_type_id = None
gu.addClassToGraph(
g, gene_id, external_gene_name, gene_type_id, description)
if entrezgene != '':
gu.addEquivalentClass(g, gene_id, 'NCBIGene:'+entrezgene)
if hgnc_id is not None and hgnc_id != '':
gu.addEquivalentClass(g, gene_id, hgnc_id)
geno.addTaxon('NCBITaxon:'+taxid, gene_id)
if not self.testMode \
and limit is not None and line_counter > limit:
break
gu.loadProperties(g, Feature.object_properties, gu.OBJPROP)
gu.loadProperties(g, Feature.data_properties, gu.DATAPROP)
gu.loadProperties(g, Genotype.object_properties, gu.OBJPROP)
gu.loadAllProperties(g)
return
示例7: _get_gene2pubmed
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _get_gene2pubmed(self, limit):
"""
Loops through the gene2pubmed file and adds a simple triple to say that a given publication
is_about a gene. Publications are added as NamedIndividuals.
:param limit:
:return:
"""
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
is_about = gu.getNode(gu.object_properties['is_about'])
logger.info("Processing Gene records")
line_counter = 0
myfile = '/'.join((self.rawdir, self.files['gene2pubmed']['file']))
logger.info("FILE: %s", myfile)
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match('^#', line):
continue
(tax_num, gene_num, pubmed_num) = line.split('\t')
##### set filter=None in init if you don't want to have a filter
#if self.filter is not None:
# if ((self.filter == 'taxids' and (int(tax_num) not in self.tax_ids))
# or (self.filter == 'geneids' and (int(gene_num) not in self.gene_ids))):
# continue
##### end filter
if self.testMode and int(gene_num) not in self.gene_ids:
continue
if int(tax_num) not in self.tax_ids:
continue
if gene_num == '-' or pubmed_num == '-':
continue
line_counter += 1
gene_id = ':'.join(('NCBIGene', gene_num))
pubmed_id = ':'.join(('PMID', pubmed_num))
# add the gene, in case it hasn't before
gu.addClassToGraph(g, gene_id, None)
# add the publication as a NamedIndividual
gu.addIndividualToGraph(g, pubmed_id, None, None) # add type publication
self.graph.add((gu.getNode(pubmed_id), is_about, gu.getNode(gene_id)))
if not self.testMode and limit is not None and line_counter > limit:
break
return
示例8: _process_ortholog_classes
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _process_ortholog_classes(self, limit=None):
"""
This method add the KEGG orthology classes to the graph.
Triples created:
<orthology_class_id> is a class
<orthology_class_id> has label <orthology_symbols>
<orthology_class_id> has description <orthology_description>
:param limit:
:return:
"""
logger.info("Processing ortholog classes")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
raw = '/'.join((self.rawdir, self.files['ortholog_classes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(orthology_class_id, orthology_class_name) = row
if self.testMode and orthology_class_id not in self.test_ids['ortholog_classes']:
continue
# FIXME: What's the proper route for this?
# The orthology class is essentially a KEGG gene ID that is species agnostic.
# Add the ID and label as a class. Would it be considered a gene as well?
other_labels = re.split(';', orthology_class_name)
orthology_label = other_labels[0] # the first one is the label we'll use
orthology_class_id = 'KEGG-'+orthology_class_id.strip()
orthology_type = OrthologyAssoc.terms['gene_family']
gu.addClassToGraph(g, orthology_class_id, orthology_label, orthology_type)
if len(other_labels) > 1:
# add the rest as synonyms
# todo skip the first
for s in other_labels:
gu.addSynonym(g, orthology_class_id, s)
# add the last one as the description
gu.addDescription(g, orthology_class_id, other_labels[len(other_labels)-1])
if (not self.testMode) and (limit is not None and line_counter > limit):
break
logger.info("Done with ortholog classes")
return
示例9: _process_orthologs
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _process_orthologs(self, raw, limit=None):
"""
This method maps orthologs for a species to the KEGG orthology classes.
Triples created:
<gene_id> is a class
<orthology_class_id> is a class
<assoc_id> has subject <gene_id>
<assoc_id> has object <orthology_class_id>
:param limit:
:return:
"""
logger.info("Processing orthologs")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
gu.loadAllProperties(g)
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(gene_id, orthology_class_id) = row
orthology_class_id = 'KEGG:'+orthology_class_id.strip()
gene_id = 'KEGG:'+gene_id.strip()
# note that the panther_id references a group of orthologs,
# and is not 1:1 with the rest
# add the KO id as a gene-family grouping class
OrthologyAssoc(
self.name, gene_id, None).add_gene_family_to_graph(
g, orthology_class_id)
# add gene and orthology class to graph;
# assume labels will be taken care of elsewhere
gu.addClassToGraph(g, gene_id, None)
gu.addClassToGraph(g, orthology_class_id, None)
if not self.testMode and \
limit is not None and line_counter > limit:
break
logger.info("Done with orthologs")
return
示例10: _process_genes_kegg2ncbi
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _process_genes_kegg2ncbi(self, limit=None):
"""
This method maps the KEGG human gene IDs
to the corresponding NCBI Gene IDs.
Triples created:
<kegg_gene_id> is a class
<ncbi_gene_id> is a class
<kegg_gene_id> equivalentClass <ncbi_gene_id>
:param limit:
:return:
"""
logger.info("Processing KEGG gene IDs to NCBI gene IDs")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
raw = '/'.join((self.rawdir, self.files['ncbi']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(kegg_gene_id, ncbi_gene_id, link_type) = row
if self.testMode and \
kegg_gene_id not in self.test_ids['genes']:
continue
# Adjust the NCBI gene ID prefix.
ncbi_gene_id = re.sub(r'ncbi-geneid', 'NCBIGene', ncbi_gene_id)
kegg_gene_id = 'KEGG-'+kegg_gene_id
# Adding the KEGG gene ID to the graph here is redundant,
# unless there happens to be additional gene IDs in this table
# not present in the genes table.
gu.addClassToGraph(g, kegg_gene_id, None)
gu.addClassToGraph(g, ncbi_gene_id, None)
gu.addEquivalentClass(g, kegg_gene_id, ncbi_gene_id)
if (not self.testMode) and (
limit is not None and line_counter > limit):
break
logger.info("Done with KEGG gene IDs to NCBI gene IDs")
return
示例11: _process_diseases
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _process_diseases(self, limit=None):
"""
This method processes the KEGG disease IDs.
Triples created:
<disease_id> is a class
<disease_id> rdfs:label <disease_name>
:param limit:
:return:
"""
logger.info("Processing diseases")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
raw = '/'.join((self.rawdir, self.files['disease']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(disease_id, disease_name) = row
disease_id = 'KEGG-'+disease_id.strip()
if disease_id not in self.label_hash:
self.label_hash[disease_id] = disease_name
if self.testMode and\
disease_id not in self.test_ids['disease']:
continue
# Add the disease as a class.
# we don't get all of these from MONDO yet see:
# https://github.com/monarch-initiative/human-disease-ontology/issues/3
gu.addClassToGraph(g, disease_id, disease_name)
# not typing the diseases as DOID:4 yet because
# I don't want to bulk up the graph unnecessarily
if (not self.testMode) and (
limit is not None and line_counter > limit):
break
logger.info("Done with diseases")
return
示例12: process_gene_ids
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def process_gene_ids(self, limit):
raw = '/'.join((self.rawdir, self.files['gene_ids']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
logger.info("Processing Gene IDs")
line_counter = 0
geno = Genotype(g)
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter=',',
quotechar='\"')
for row in filereader:
line_counter += 1
(taxon_num, gene_num, gene_symbol, gene_synonym, live) = row
# 6239,WBGene00000001,aap-1,Y110A7A.10,Live
if self.testMode and gene_num not in self.test_ids['gene']:
continue
taxon_id = 'NCBITaxon:'+taxon_num
gene_id = 'WormBase:'+gene_num
if gene_symbol == '':
gene_symbol = gene_synonym
if gene_symbol == '':
gene_symbol = None
gu.addClassToGraph(
g, gene_id, gene_symbol, Genotype.genoparts['gene'])
if live == 'Dead':
gu.addDeprecatedClass(g, gene_id)
geno.addTaxon(taxon_id, gene_id)
if gene_synonym != '':
gu.addSynonym(g, gene_id, gene_synonym)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例13: _get_titles
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _get_titles(self, limit):
"""
The file processed here is of the format:
#NBK_id GR_shortname OMIM
NBK1103 trimethylaminuria 136132
NBK1103 trimethylaminuria 602079
NBK1104 cdls 122470
Where each of the rows represents a mapping between
a gr id and an omim id. These are a 1:many relationship,
and some of the omim ids are genes (not diseases).
Therefore, we need to create a loose coupling here.
We make the assumption that these NBKs are generally higher-level
grouping classes; therefore the OMIM ids are treated as subclasses.
(This assumption is poor for those omims that are actually genes,
but we have no way of knowing what those are here...
we will just have to deal with that for now.)
:param limit:
:return:
"""
raw = '/'.join((self.rawdir, self.files['titles']['file']))
gu = GraphUtils(curie_map.get())
line_counter = 0
with open(raw, 'r', encoding='latin-1') as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
if line_counter == 1: # skip header
continue
(shortname, title, nbk_num) = row
gr_id = 'GeneReviews:'+nbk_num
self.book_ids.add(nbk_num) # a global set of the book nums
if limit is None or line_counter < limit:
gu.addClassToGraph(self.graph, gr_id, title)
gu.addSynonym(self.graph, gr_id, shortname)
return
示例14: _get_mappedids
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _get_mappedids(self, entry, g):
"""
Extract the Orphanet and UMLS ids as equivalences from the entry
:param entry:
:return:
"""
# umlsIDs
gu = GraphUtils(curie_map.get())
omimid = 'OMIM:'+str(entry['mimNumber'])
orpha_mappings = []
if 'externalLinks' in entry:
links = entry['externalLinks']
if 'orphanetDiseases' in links:
# triple semi-colon delimited list of double semi-colon delimited orphanet ID/disease pairs
# 2970;;566;;Prune belly syndrome
items = links['orphanetDiseases'].split(';;;')
for i in items:
(orpha_num, internal_num, orpha_label) = i.split(';;')
orpha_id = 'Orphanet:'+orpha_num.strip()
orpha_mappings.append(orpha_id)
gu.addClassToGraph(g, orpha_id, orpha_label.strip())
gu.addXref(g, omimid, orpha_id)
if 'umlsIDs' in links:
umls_mappings = links['umlsIDs'].split(',')
for i in umls_mappings:
umls_id = 'UMLS:'+i
gu.addClassToGraph(g, umls_id, None)
gu.addXref(g, omimid, umls_id)
if self._get_omimtype(entry) == Genotype.genoparts['gene'] and 'geneIDs' in links:
entrez_mappings = links['geneIDs']
for i in entrez_mappings.split(','):
gu.addEquivalentClass(g, omimid, 'NCBIGene:'+str(i))
return
示例15: _get_identifiers
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addClassToGraph [as 别名]
def _get_identifiers(self, limit):
"""
This will process the id mapping file provided by Biogrid.
The file has a very large header, which we scan past,
then pull the identifiers, and make equivalence axioms
:param limit:
:return:
"""
logger.info("getting identifier mapping")
line_counter = 0
f = '/'.join((self.rawdir, self.files['identifiers']['file']))
myzip = ZipFile(f, 'r')
# assume that the first entry is the item
fname = myzip.namelist()[0]
foundheader = False
gu = GraphUtils(curie_map.get())
# TODO align this species filter with the one above
# speciesfilters = 'Homo sapiens,Mus musculus,Drosophila melanogaster,
# Danio rerio, Caenorhabditis elegans,Xenopus laevis'.split(',')
speciesfilters = 'Homo sapiens,Mus musculus'.split(',')
with myzip.open(fname, 'r') as csvfile:
for line in csvfile:
# skip header lines
if not foundheader:
if re.match(r'BIOGRID_ID', line.decode()):
foundheader = True
continue
line = line.decode().strip()
# BIOGRID_ID
# IDENTIFIER_VALUE
# IDENTIFIER_TYPE
# ORGANISM_OFFICIAL_NAME
# 1 814566 ENTREZ_GENE Arabidopsis thaliana
(biogrid_num, id_num, id_type,
organism_label) = line.split('\t')
if self.testMode:
g = self.testgraph
# skip any genes that don't match our test set
if int(biogrid_num) not in self.biogrid_ids:
continue
else:
g = self.graph
# for each one of these,
# create the node and add equivalent classes
biogrid_id = 'BIOGRID:'+biogrid_num
prefix = self._map_idtype_to_prefix(id_type)
# TODO make these filters available as commandline options
# geneidtypefilters='NCBIGene,OMIM,MGI,FlyBase,ZFIN,MGI,HGNC,
# WormBase,XenBase,ENSEMBL,miRBase'.split(',')
geneidtypefilters = 'NCBIGene,MGI,ENSEMBL,ZFIN,HGNC'.split(',')
# proteinidtypefilters='HPRD,Swiss-Prot,NCBIProtein'
if (speciesfilters is not None) \
and (organism_label.strip() in speciesfilters):
line_counter += 1
if (geneidtypefilters is not None) \
and (prefix in geneidtypefilters):
mapped_id = ':'.join((prefix, id_num))
gu.addEquivalentClass(g, biogrid_id, mapped_id)
# this symbol will only get attached to the biogrid class
elif id_type == 'OFFICIAL_SYMBOL':
gu.addClassToGraph(g, biogrid_id, id_num)
# elif (id_type == 'SYNONYM'):
# FIXME - i am not sure these are synonyms, altids?
# gu.addSynonym(g,biogrid_id,id_num)
if not self.testMode and limit is not None \
and line_counter > limit:
break
myzip.close()
return