本文整理汇总了Python中wikipedia.page函数的典型用法代码示例。如果您正苦于以下问题:Python page函数的具体用法?Python page怎么用?Python page使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了page函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: findRelevantArticles
def findRelevantArticles(term,data_path='.'):
articleList = []
articles = wikipedia.search(term) #Setting suggestion = False (default value); No clear use for it now
for article in articles:
try:
article = wikipedia.page(article)
category_keywords = set(list(itertools.chain.from_iterable([category.lower().split() for category in article.categories])))
if len(category_keywords & relevant_categories) > 0:
articlefilename = "content_"+str(article.title.lower())+".txt"
if os.path.isfile(articlefilename):
articlefilename = "content_"+ str(article.title.lower())+'%s.txt' % str(term+time.strftime("%Y%m%d-%H%M%S"))
with codecs.open(os.path.join(data_path,articlefilename),'wb', 'utf-8') as outfile:
content = wikipedia.page(article).content
print>>outfile,content
articleList.append(str(article.title))
except wikipedia.exceptions.PageError as e:
pass
except wikipedia.exceptions.DisambiguationError as e:
for article in e.options:
try:
article = wikipedia.page(article)
category_keywords = set(list(itertools.chain.from_iterable([category.lower().split() for category in article.categories])))
if len(category_keywords & relevant_categories) > 0:
articlefilename = "content_"+str(article.title.lower())+".txt"
if os.path.isfile(articlefilename):
articlefilename = "content_"+ str(article.title.lower())+'%s.txt' % str(term+time.strftime("%Y%m%d-%H%M%S"))
with codecs.open(os.path.join(data_path,articlefilename),'wb','utf-8') as outfile:
print>>outfile,article.content
articleList.append(str(article.title))
except wikipedia.exceptions.DisambiguationError as f:
pass
示例2: searchWiki
def searchWiki(page):
wikipedia.set_lang("fr")
link = ''
try:
# p = wikipedia.page(page)
# link = p.url
propos = wikipedia.search(page,results=5,suggestion=False)
for choice in propos:
if choice.encode('utf-8') == page.encode('utf-8'):
p = wikipedia.page(page)
link = p.url
break
elif page in choice:
#TODO
print 'There is a proposition containing the keyWord '
print choice
else:
try:
wikipedia.page(page,redirect=False,auto_suggest=False)
except wikipedia.exceptions.RedirectError:
p = wikipedia.page(page)
link = p.url
break
except:
link =''
except:
link = ""
return link#.encode('utf-8')
示例3: info
def info(topic):
response = {}
response["type"] = "wiki"
try:
page = wikipedia.page(topic)
response['title'] = page.title
response['url'] = page.url
response['content'] = wikipedia.summary(page.title,sentences = 5)
if len(response['content']) < 200:
response['content'] = wikipedia.summary(page.title,sentences = 10)
except Exception as error:
ename = type(error).__name__
if ename == 'DisambiguationError':
page = wikipedia.page(error.options[0])
response['title'] = page.title
response['url'] = page.url
response['content'] = wikipedia.summary(page.title,sentences = 2)
if len(response['content']) < 200:
response['content'] = wikipedia.summary(page.title,sentences = 10)
elif ename == 'HTTPTimeoutError':
response['type'] = "error"
response['error'] = "I couldn't reach wikipedia"
elif ename == 'PageError':
response['type'] = "error"
response['error'] = "I couldn't find anything on wikipedia"
else:
response['type'] = "error"
response['error'] = "Unknown error occured while reaching wikipedia"
return response
示例4: wikify
def wikify():
"""Returns the sentences with wikipedia links"""
tag_dict = look_entity()
link_dict = {}
combined = combine()
for item in tag_dict.keys():
if item in combined.keys():
try:
if combined[item] in wikipedia.page(combined[item]).content:
link_dict[item] = wikipedia.page(combined[item]).url
except wikipedia.exceptions.DisambiguationError as disamb:
try:
link_dict[item] = wikipedia.page(disamb.options[0]).url
except:
pass
except wikipedia.exceptions.PageError:
pass
else:
try:
link_dict[item] = wikipedia.page(item).url
except wikipedia.exceptions.DisambiguationError as disamb:
try:
link_dict[item] = wikipedia.page(disamb.options[0]).url
except:
pass
except wikipedia.exceptions.PageError:
pass
return link_dict
示例5: article
def article(self, pageid=None, title=None):
"""
Returns a specific article from Wikipedia,
given its pageid or its title.
Downloads it if necessary
"""
if pageid is None and title is None:
raise Exception('Pageid and title can\'t be None at the same time')
if pageid is None:
d = self.db.articles.find_one({'title': title})
if d is not None:
return d # found it
else:
d = self.db.articles.find_one({'_id': pageid})
if d is not None:
return d # found it
try:
if not(pageid is None):
page = wikipedia.page(pageid=pageid)
else:
page = wikipedia.page(title=title)
except (
wikipedia.exceptions.DisambiguationError,
wikipedia.exceptions.PageError,
wikipedia.exceptions.WikipediaException,
requests.exceptions.RequestException,
ValueError # error decoding JSON response
):
return
try:
time.sleep(0.5)
except:
time.wait(0.5)
# Even if we didn't find pageid or title, it still could be in the DB
# since the title could have changed
try:
d = {
'_id': int(page.pageid),
'title': page.title,
'content': page.content
}
except KeyboardInterrupt: # filter KeyboardInterrupt from here
raise
except Exception:
return # can't add this entry
self.db.articles.update_one(
{'_id': d['_id']},
{'$set': d},
upsert=True
)
return d
示例6: wiki
def wiki(event, bot):
""" wiki \x02searchterm\x02. Will search Wikipedia for \x02searchterm\x02. """
if not event.argument: return bot.say(functionHelp(wiki))
result = search(event.argument, results=1, suggestion=True)
if not result[0]:
if result[1]: return bot.say("No results found. Did you mean \x02%s\x02?" % result[1])
else: return bot.say("No results found.")
errors = []
attempt = 0
p = None
try:
p = page(result[0]) # use preload=True when it's fixed: https://github.com/goldsmith/Wikipedia/issues/78
except DisambiguationError as e:
errors.append("Random disambig page: ")
while attempt < 3:
try: p = page(choice(e.options))
except DisambiguationError: pass
attempt += 1
if not p: return bot.say("Gave up looking for disambiguous entry from disambiguous page.")
if result[1]:
errors.append("(SP: %s?) " % result[1])
content = p.content[:800].replace("\n", " ").replace("====", "").replace("===", "").replace("==", "")
bot.say(RESULT_RPL % ("".join(errors), p.url), strins=[p.title, content], fcfs=True)
示例7: getContentFromLink
def getContentFromLink(link):
try:
linkText = wk.page(link, auto_suggest=False).content.lower()
except wk.exceptions.DisambiguationError as e:
options = filter(lambda x: "(disambiguation)" not in x, e.options)
linkText = wk.page(options[0], auto_suggest=False).content.lower()
return linkText
示例8: disambiguationWikipedia
def disambiguationWikipedia(noun):
"""
Disambiguation for Wikipedia errors
"""
# Try to get wikipedia content
try:
wiki = wikipedia.page(noun)
except wikipedia.exceptions.DisambiguationError as e:
new = e.options[0]
try:
wiki = wikipedia.page(new)
except:
return 'Null'
except wikipedia.exceptions.PageError:
new = wikipedia.search(noun)
try:
wiki = wikipedia.page(new[0])
except:
return 'Null'
except:
return 'Null'
return wiki
示例9: search_wikipedia
def search_wikipedia(word):
searchArr = wikipedia.search(word)
wiki_results = []
try:
try:
for result in searchArr:
#print("result: " + result)
wiki_results.append(wikipedia.page(result, preload=False))
except wikipedia.DisambiguationError as e:
#print("disambiguation error on " + result)
#print(e.with_traceback)
try:
for item in e.options:
#print("disambiguation error on " + item)
wiki_results.append(wikipedia.page(item, preload=False))
except wikipedia.DisambiguationError as i:
try:
for item in i.options:
#print("disambiguation error on " + item)
wiki_results.append(wikipedia.page(item, preload=False))
except wikipedia.DisambiguationError:
pass
except:
print("Something went wrong getting wikipedia results")
pass
return wiki_results
示例10: page
def page(title=None, pageid=None, auto_suggest=True, redirect=True):
"""
The search term from user may not corresponds to a wikipedia page,
due to vagueness. There are 2 alternatives, "redirect"/ "disambiguous".
:param auto_suggest:let Wikipedia find a valid page title for the query
:return:
"""
if pageid is not None:
pageid = int(pageid)
page = WikipediaArticle.objects(pageid=pageid)
else:
page = WikipediaArticle.objects(title=title)
if not page:
results, suggestion = WikipediaWrapper.search(
title,
results=1,
suggestion=True)
suggested_term = suggestion or results[0]
page = WikipediaArticle.objects(title=suggested_term)
if page:
page = page[0]
else:
try:
page = wikipedia.page(title=title,
pageid=pageid,
auto_suggest=auto_suggest,
redirect=redirect)
except UnicodeDecodeError:
page = wikipedia.page(title=str_util.normal_str(title),
pageid=pageid,
auto_suggest=auto_suggest,
redirect=redirect)
if type(page) is wikipedia.WikipediaPage:
page = WikipediaWrapper.save_page(page)
return page
示例11: climb_tree
def climb_tree(self):
"""Climb the tree"""
branch_found = True
cur_branch = self.seed
prev_node = None
while cur_branch is not None:
self.logger.debug('Current branch is %s'%cur_branch)
#Get wikipedia page
try:
cur_page = wikipedia.page(cur_branch)
except wikipedia.PageError:
self.logger.exception('Cannot find page for %s. Ending search.'%cur_branch)
self.tree.node(cur_branch)
self.tree.edge(cur_branch,prev_node)
cur_branch = None
continue
except wikipedia.DisambiguationError:
self.logger.exception('Multiple pages found for query %s. Adding "(physicist)" and searching again.')
cur_page = wikipedia.page(cur_branch+' (physicist)')
#parse the table
html_source = BeautifulSoup(cur_page.html(),'html.parser')
advisor = self._search_info_table(html_source,['Doctoral advisor','Doctoral advisors','Academic advisors','Academic advisor'])
alma_mater = self._search_info_table(html_source,'Alma mater')
students = self._search_info_table(html_source,'Doctoral students')
#add to graph
self.tree.node(cur_branch,cur_branch+'\n'+self._none_filter(alma_mater))
if prev_node is not None:
self.tree.edge(cur_branch,prev_node)
#update
prev_node = cur_branch
cur_branch = self._res_filter(advisor)
示例12: link_checker
def link_checker(ngram):
''' Checks if the word gives a valid wikipedia link '''
try:
page = wikipedia.page(ngram)
link = page.url
return link
except wikipedia.exceptions.DisambiguationError:
#link = ngram.split(" ")
#newlink = "_".join(ngram)
link = 'http://en.wikipedia.org/wiki/' + ngram + '_(disambiguation)'
return link
except wikipedia.exceptions.PageError:
wordlist = ngram.split()
counter = 0
for word in wordlist:
word.lower()
if word in ["prime","minister","president"]:
wordlist.pop(counter)
counter += 1
ngram.join(wordlist)
try:
page = wikipedia.page(ngram)
link = page.url
return link
except wikipedia.exceptions.PageError:
return -1
except wikipedia.exceptions.DisambiguationError:
return -1
示例13: collectFrom
def collectFrom(lang,start,hangang):
wikipedia.set_lang(lang)
lookpa = wikipedia.page(start).links
lookna = [wikipedia.page(start)]
corpus = str(wikipedia.page(start).content)
while len(corpus) < hangang:
random.shuffle(lookpa)
item = lookpa[0]
try:
corpus += str(wikipedia.page(item).content)
except wikipedia.exceptions.PageError:
pass
except wikipedia.exceptions.DisambiguationError:
pass
except KeyError:
pass
lookna.append(item)
lookpa.remove(item)
try:
for page in wikipedia.page(item).links:
if page not in lookpa:
if page not in lookna:
lookpa.append(page)
except wikipedia.exceptions.PageError:
pass
except wikipedia.exceptions.DisambiguationError:
pass
except KeyError:
pass
print('Corpus = ' + str(len(corpus)) + ' Searched = ' + str(len(lookna)) + ' Still = ' + str(len(lookpa)))
f = open(lang + 'FromWikiCorp.txt', 'w')
f.write(corpus)
f.close()
示例14: context_data
def context_data(self):
"""
Gather data from Wikipedia based on user-inputed SUBJECT.
"""
text_list, visited, visitedSeeAlso, queue = [], set(), set(), list()
queue.append((self.subject, self.depth))
while len(queue) > 0:
print("Hi")
next = queue.pop(0)
try:
if next[0] not in visited and next[1] >= 0:
visited.add(next[0])
results = wikipedia.search(next[0], self.max_searches, False)
for pagename in results:
queue.append((pagename, next[1]-1))
text_list.extend(wikipedia.page(next[0]).content.split())
except:
pass
queue.append((self.subject, self.depth))
while len(queue) > 0:
next = queue.pop(0)
try:
if next[0] not in visitedSeeAlso and next[1] >= 0:
visitedSeeAlso.add(next[0])
page = wikipedia.page(next[0])
for reference in page.section("See also").splitlines():
queue.append((reference, next[1] -1))
text_list.extend(wikipedia.page(next[0]).content.split())
except:
pass
return text_list
示例15: getWikiPage
def getWikiPage(title):
try:
page = wikipedia.page(title)
except wikipedia.exceptions.DisambiguationError as e:
print(e.options)
title = random.choice(e.options)
page = wikipedia.page(title)
return page