本文整理汇总了Python中Bio.Medline.parse方法的典型用法代码示例。如果您正苦于以下问题:Python Medline.parse方法的具体用法?Python Medline.parse怎么用?Python Medline.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Medline
的用法示例。
在下文中一共展示了Medline.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: createTable
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def createTable(query):
if not query:
return "<h3> No query </h3>"
MAX_COUNT = 100
pubmedquery = query.replace("-", "\-")
Entrez.email = "[email protected]"
h = Entrez.esearch(db="pubmed", term=pubmedquery, retmax=MAX_COUNT)
result = Entrez.read(h)
ids = result["IdList"]
h = Entrez.efetch(db="pubmed", id=ids, rettype="medline", retmode="text")
records = Medline.parse(h)
tableContent = ""
for record in records:
try:
tableContent += (
"<tr><td width='22%'>" + str(record.get("TI")) + "</td>"
"<td width='5%'>" + str(record.get("DP")) + "</td>"
"<td width='5%'>" + str(writers(record.get("FAU"))) + "</td>"
"<td width='5%'>" + str(record.get("JT")) + "</td>"
"<td width='5%'>" + str(query) + "</td>"
"<td>"
"<a href='http://www.ncbi.nlm.nih.gov/pubmed/"
+ str(record.get("PMID"))
+ "'><img src='PubMed.png' height='75px' width='75px' alt='PubMed' align='right'/></a>"
+ str(record.get("AB"))
+ "</td></tr>"
)
except (TypeError):
continue
return tableContent
示例2: retrive_record
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def retrive_record(row):
name=row[1]+"[AUTH]"
handle = Entrez.esearch(db="pubmed",term=name)
record=Entrez.read(handle)
idlist=record["IdList"]
handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline",
retmode="text")
records = Medline.parse(handle)
for record in records:
temp=[]
temp.append(row[0])
temp.append(row[1])
#title
temp.append(record.get("TI","?"))
#authors
temp.append(record.get("AU","?"))
#
temp.append(record.get("AD","?"))
#
temp.append(record.get("DP","?"))
#pubmed id for url
temp.append(record.get("PMID","?"))
return temp
示例3: store_abstracts_for_query
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def store_abstracts_for_query(query,query_tag,maxN=None,preview_only=False):
# if query_tag=="":
# simpleQuery=" ".join(map(lambda x:x.name,queryTerms))
# else:
# simpleQuery=query_tag
# query=pg.build_query(queryTerms)
print "will search",query
Entrez.email = "[email protected]"
search_results = Entrez.read(Entrez.esearch(db="pubmed",
term=query,
reldate=10*365, datetype="pdat",
usehistory="y"))
count = int(search_results["Count"])
print "Found %i results" % count
if maxN!=None and maxN<count:
count=maxN
print "Only keeping first",count,"abstracts"
if preview_only:
return
sys.stdout.flush()
batch_size = 50
for start in range(0,count,batch_size):
end = min(count, start+batch_size)
print "Going to download record %i to %i" % (start+1, end)
sys.stdout.flush()
fetch_handle = Entrez.efetch(db="pubmed",
rettype="medline", retmode="text",
retstart=start, retmax=batch_size,
webenv=search_results["WebEnv"],
query_key=search_results["QueryKey"])
records=Medline.parse(fetch_handle)
for r in records:
pubmed_to_pg.store_medline_entry(r,query_tag)
示例4: __init__
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def __init__(self, pmids):
Entrez.email = '[email protected]'
## pmids is a list (array of pmid)
handle = Entrez.efetch(db='pubmed', id=pmids, rettype='medline', retmode='text')
self.records = Medline.parse(handle)
示例5: getMedlineList
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def getMedlineList(pmids):
"""
This function takes a list of article-ids and returns a list of
MedLine articles that contains an abstract.
"""
records = []
cleaned_records = []
listLength = len(pmids)
Entrez.email = '[email protected]'
for i in range(0, listLength, 650):
tempList = pmids[i:i + 650]
handle = Entrez.efetch(db='pubmed', id=tempList,rettype='medline', retmode='text')
try:
records.extend(list(Medline.parse(handle)))
except:
IOmodule.writeOutTxt(_mainFolder+'/'+'errordir_medline_records', pmids[i], '')
print 'Downloaded',len(records),'MedLine articles.',str(listLength-len(records)),'remaining...'
for article in records:
if 'AB' in article:
cleaned_records.append(article)
print 'Returned',len(cleaned_records),'MedLine articles containing an abstract.'
return cleaned_records
示例6: get_wikiref
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def get_wikiref(pmid):
""" Returns the Wiki cite journal entry for a given Pubmed ID """
handle = Entrez.efetch(db="pubmed", id=pmid, rettype="medline", retmode="text")
records = Medline.parse(handle)
records = list(records)
import datetime
now = datetime.datetime.now()
jetzt= now.strftime("%Y-%m-%d")
# generate the {{cite journal}} format
for rec in records:
aut = rec["AU"]
firstauthor = aut.pop(0)
coauthors = ", ".join(aut)
# get date of publication
# CRDT
datee = rec["CRDT"][0].split('/')[0]
#if datee == '':
# datee = rec["PD"]
outstring = "{{cite journal|title=%s|journal=%s|year=%s|author=%s|coauthors=%s|volume=%s|pages=%s|id=PMID %s|accessdate=%s}}" % \
(rec["TI"], rec["JT"], datee, firstauthor, coauthors, rec["VI"], rec["PG"], pmid, jetzt)
# example:
#{{cite journal|title=|journal=|date=2008/07/31/|first=Cyril|last=Herry|coauthors=i|volume=454|issue=7204|pages=600-606|id=PMID 18615015 {{doi|10.1038/nature07166}}|url=http://www.fmi.ch/downloads/news/2008.07.11.01.luthi.nature.press.release.pdf|format=|accessdate=2009-09-12 }}
return outstring
示例7: get_pubmed_document
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def get_pubmed_document(pubmed_ids, source='pubmed', include_pubtator=True):
Entrez.email = settings.ENTREZ_EMAIL
if type(pubmed_ids) == list:
ids = [str(doc_id) for doc_id in pubmed_ids]
else:
ids = [str(pubmed_ids)]
h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text')
records = Medline.parse(h)
# Reference to abbreviations: http://www.nlm.nih.gov/bsd/mms/medlineelements.html
for record in records:
if record.get('TI') and record.get('AB') and record.get('PMID') and record.get('CRDT'):
#if Document.objects.pubmed_count(record.get('PMID')) is 0:
title = ' '.join( pad_split(record.get('TI')) )
abstract = ' '.join( pad_split(record.get('AB')) )
doc, doc_c = Document.objects.get_or_create(document_id=record.get('PMID'))
doc.title = title
doc.source = source
doc.save()
sec, sec_c = Section.objects.get_or_create(kind='t', document=doc)
sec.text = title
sec.save()
sec, sec_c = Section.objects.get_or_create(kind='a', document=doc)
sec.text = abstract
sec.save()
if include_pubtator:
doc.init_pubtator()
示例8: fetch
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def fetch(t, s):
h = Entrez.esearch(db="pubmed", term=t, retmax=10000, sort=s)
idList = Entrez.read(h)["IdList"]
results = "Total publications for SA Beatson: **" + str(len(idList)) + "**\n\n"
results += "Chronologically sorted:\n\n"
if idList:
handle = Entrez.efetch(db="pubmed", id=idList, rettype="medline", retmode="text")
records = Medline.parse(handle)
max = len(idList) + 1
for record in records:
title = record["TI"]
author = ", ".join(record["AU"])
source = record["SO"]
pub_date = datetime.strptime(record["DA"], "%Y%m%d").date()
pmid = record["PMID"]
cur_pub = "| **%i.** %s\n| %s\n| %s\n| http://www.ncbi.nlm.nih.gov/pubmed/%s\n|\n" % (
max - 1,
title,
author,
source,
pmid,
)
results = results + cur_pub
max = max - 1
return results
示例9: main
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def main(Substance, Organism, Gene):
zoekterm1 = "Cocaine"
zoekterm2 = "Elegans"
MAX_COUNT = 50
dic = {}
titels = []
TERM = ''
TERMS = []
count = 1
if zoekterm2 == "":
TERM = zoekterm1
if zoekterm1 == "":
print("vul een zoekterm in")
sys.exit()
elif zoekterm2 != "":
TERM = zoekterm1+" and "+zoekterm2
TERMS.append(TERM)
print(TERM)
handle = Entrez.esearch(db="pubmed", term= TERM, retmax=MAX_COUNT)
record = Entrez.read(handle)
idlist = record["IdList"]
handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline",
retmode="text")
records = Medline.parse(handle)
records = list(records)
for record in records:
titel = record.get("PMID","?")
titels.append(titel)
pubSet = set(titels)
dic[TERM] = pubSet
print(dic)
return "Jay"
示例10: fetch_pubmed_data
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def fetch_pubmed_data(pmid):
from Bio import Medline,Entrez
try:
ncbiemail= settings.author_email
except:
try:
ncbiemail= settings.author_email
except:
raise Exception('Please set an email to use ncbi services')
Entrez.email = ncbiemail
Entrez.tool = 'mybiodb'
try:
entrez_response=Medline.parse( Entrez.efetch(db="pubmed", id=pmid, rettype="medline",retmode="text",)).next()
if not entrez_response.has_key('PMID'):
response.flash='pubmed ID error'
else:
return entrez_response
except IOError:
session.flash='Remote service not available, please try again.'
return
示例11: search_pubmed
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def search_pubmed(term):
"Searches a term on pubmed"
print("Searching for", term)
try:
# Do a first query
handle = Entrez.egquery(term=term)
record = Entrez.read(handle)
nb_entries = 0
for row in record["eGQueryResult"]:
if row["DbName"]=="pubmed":
nb_entries = row["Count"]
print(row["Count"], 'results found.')
if int(nb_entries) == 0:
return BibDatabase()
# Search the IDs
handle = Entrez.esearch(db="pubmed", term=term, retmax=min(int(nb_entries), MAX_RESULTS))
record = Entrez.read(handle)
idlist = record["IdList"]
# Get the descriptions
handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text")
records = Medline.parse(handle)
records = list(records)
return transform_pubmed(records)
except Exception as e:
print('The search failed.')
print(e)
return []
示例12: fetch
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def fetch(self, batchSize=100):
"""Return a batch of results.
"""
if self._done:
return []
end = min(self._searchCount, self._searchPosition + batchSize)
log.info("Downloading from %i to %i..." % (self._searchPosition+1, end))
fetchHandle = Entrez.efetch(db="pubmed", rettype="medline", retmode="text", retstart=self._searchPosition, retmax=batchSize, webenv=self._searchSession, query_key=self._queryKey)
result = Medline.parse(fetchHandle)
papers = [paper.Paper(r) for r in result if r.get("PMID") is not None ]
fetchHandle.close()
log.info("... downloading done")
self._searchPosition = self._searchPosition + batchSize
if self._searchPosition >= self._searchCount:
self._done = True
log.info("Search ended.")
return papers
示例13: test_pubmed_16381885_and_19850725
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def test_pubmed_16381885_and_19850725(self):
"""Bio.TogoWS.entry("pubmed", "16381885,19850725")"""
handle = TogoWS.entry("pubmed", "16381885,19850725")
records = list(Medline.parse(handle))
handle.close()
self.assertEqual(len(records), 2)
self.assertEqual(records[0]["TI"], "From genomics to chemical genomics: new developments in KEGG.")
self.assertEqual(
records[0]["AU"],
[
"Kanehisa M",
"Goto S",
"Hattori M",
"Aoki-Kinoshita KF",
"Itoh M",
"Kawashima S",
"Katayama T",
"Araki M",
"Hirakawa M",
],
)
self.assertEqual(
records[1]["TI"],
"DDBJ launches a new archive database with analytical tools " + "for next-generation sequence data.",
)
self.assertEqual(
records[1]["AU"],
["Kaminuma E", "Mashima J", "Kodama Y", "Gojobori T", "Ogasawara O", "Okubo K", "Takagi T", "Nakamura Y"],
)
示例14: top_papers
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def top_papers(papers,outpath=None,delim="\t", top=20):
"""This function fetches all the relevant PubMed info for each PMID in 'papers' and
1) puts it into a list and 2) outputs it to a file named in outpath."""
#
# Can be used with SP & GOA data
papers_annots = [(len(papers[p]), p) for p in papers]
papers_annots2 = []
papers_annots.sort()
idlist = [p[1] for p in papers_annots[-top:]]
Entrez.email = "[email protected]"
h = Entrez.efetch(db="pubmed", id=",".join(idlist),
rettype="medline", retmode="text")
medrecs = list(Medline.parse(h))
titles = [medrec.get("TI","?") for medrec in medrecs]
years = [medrec.get("DP","?") for medrec in medrecs]
journals = [medrec.get("JT", "?") for medrec in medrecs]
for p, title, year, journal in zip(papers_annots[-top:], titles,years, journals):
papers_annots2.append((p[0],p[1], title, year.split()[0].strip(), journal))
if outpath:
fout = open(outpath,"w")
print >> fout, "num proteins\tpubmed ID\tTitle\tYear\tJournal"
for p in papers_annots2:
print >> fout, "%d\t%s\t%s\t%s\t%s" % p
fout.close()
#papers_annots2 = [(# all annotations, PMID, Title, Year, Journal)]
return papers_annots2
示例15: top_papers_dict
# 需要导入模块: from Bio import Medline [as 别名]
# 或者: from Bio.Medline import parse [as 别名]
def top_papers_dict(papers, outpath=None,delim="\t", top=None):
"""This function fetches all the relevent PubMed info for each PMID in 'papers'
(at the limit supplied in 'top') and 1) puts it into a dict."""
#
# Can be used with SP & GOA data
# papers_annots = [(len(papers_prots[p]), p) for p in papers_prots]
papers_annots = [(len(papers[p]), p) for p in papers]
papers_annots2_dict = {}
papers_annots.sort()
if top is None:
negTop = 0
else:
negTop = -top
idlist = [p[1] for p in papers_annots[negTop:]]
Entrez.email = MY_EMAIL
h = Entrez.efetch(db="pubmed", id=",".join(idlist),
rettype="medline", retmode="text")
medrecs = list(Medline.parse(h))
titles = [medrec.get("TI","?") for medrec in medrecs]
years = [medrec.get("DP","?") for medrec in medrecs]
journals = [medrec.get("JT", "?") for medrec in medrecs]
for p, title, year, journal in zip(papers_annots[negTop:], titles,years, journals):
#papers_annots2_dict[PMID] = [# of total annotations, Title, Year, Journal]
papers_annots2_dict[p[1]] = [len(papers[p[1]]), title, year.split()[0].strip(), journal]
"""if outpath:
fout = open(outpath,"w")
print >> fout, "num proteins\tpubmed ID\tTitle\tYear\tJournal"
for p in papers_annots2:
print >> fout, "%d\t%s\t%s\t%s\t%s" % p
fout.close()
"""
return papers_annots2_dict