本文整理汇总了Python中wikipedia.set_lang函数的典型用法代码示例。如果您正苦于以下问题:Python set_lang函数的具体用法?Python set_lang怎么用?Python set_lang使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了set_lang函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add_wiki_data_to_person
def add_wiki_data_to_person(person):
"""function to mine data from a wikipedia page and save it to the person object"""
wikipedia.set_lang("de")
for entry in wikipedia.search(person.name, results=4):
avg_time = int(Regeste.objects.filter(issuer=person).aggregate(Avg("issue_date"))['issue_date__avg'])
avg_year = datetime.datetime.fromtimestamp(int(avg_time)).year
try:
page_de = wikipedia.page(entry)
categories = page_de.categories
for category in categories:
try:
regex_result = re.findall("Kategorie:Gestorben (\d+)|Kategorie:Geboren (\d+)", category)
search_results = list(filter(None, flatten(regex_result)))
for search_result in search_results:
if abs(int(search_result)-avg_year) <= 30:
if len(page_de.images) > 0:
if page_de.images[0][-4:] == ".svg":
person.img_url = page_de.images[1]
else:
person.img_url = page_de.images[0]
person.short_description = page_de.summary
person.save()
except IndexError:
pass
except wikipedia.exceptions.DisambiguationError and wikipedia.exceptions.PageError:
pass
return
示例2: search
def search(self, lang, queries, articles_per_query=10, should_break=None, on_progress=None):
""" Searches for articles.
Args:
lang(str): A language code in ISO 639-1 format.
queries(list of str): A list of queries.
should_break (callback): Callback for breaking the computation before the end.
If it evaluates to True, downloading is stopped and document downloaded till now
are returned in a Corpus.
on_progress (callable): Callback for progress bar.
"""
wikipedia.set_lang(lang)
results = []
for i, query in enumerate(queries):
try:
articles = wikipedia.search(query, results=articles_per_query)
for j, article in enumerate(articles):
if callable(should_break) and should_break():
break
results.extend(self._get(article, query, should_break))
if callable(on_progress):
on_progress((i*articles_per_query + j+1) / (len(queries) * articles_per_query),
len(results))
except (wikipedia.exceptions.HTTPTimeoutError, IOError) as e:
self.on_error(str(e))
break
if callable(should_break) and should_break():
break
return Corpus.from_documents(results, 'Wikipedia', self.attributes,
self.class_vars, self.metas, title_indices=[-1])
示例3: get_info_box_data
def get_info_box_data(word):
wikipedia.set_lang("He")
res = wikipedia.search(word, 10, True)
title = res[0][0]
html_page = wikipedia.page(title).html()
soup = BeautifulSoup(html_page)
info_table = soup.find("table", {"class": "infobox"})
info = []
current_tuple = tuple()
rows = info_table.findChildren(["th", "tr", "td"])
for row in rows:
result = ""
row_title = get_title(row)
values = row.findChildren("a")
if len(values) == 0:
continue
for value in values:
# value = cell['content']
# print "The value in this cell is %s" % value
for content in value.contents:
if "img" in content:
continue
result += " " + (str)(content)
if "img" in result:
continue
示例4: inf
def inf(d, s):
wikipedia.summary("Wikipedia")
wikipedia.set_lang("ru")
types = ['museum', 'park', 'church', 'zoo', 'train_station', 'stadium']
def param(t):
content = urlopen(
'https://maps.googleapis.com/maps/api/place/nearbysearch/json?language=ru&location=' + str(d) + ',' + str(
s) + '&rankby=distance&types=' + t + '&key=' + apikey).read()
c = json.loads(content.decode("utf-8"))
c = c.get('results')
if len(c) != 0:
c = c[0].get('name')
# print(c)
m = wikipedia.search(c, results=5, suggestion=False)
# print(m[0])
if len(m) != 0:
textsong = wikipedia.summary(m, sentences=5, chars=1)
if textsong != '':
return textsong
#print(textsong)
# if len(wikipedia.search(c)) != 0:
# st = wikipedia.page(c)
# if st.content
# print(st.content)
for type in types: #i in range(6):
temp = param(type)
if temp:
return temp
示例5: wiki_func
def wiki_func(paras, infos):
"""中文维基百科查询"""
wikipedia.set_lang("zh")
candidates = wikipedia.search(paras)
if len(candidates) <= 0:
return {
'text': 'not found',
}
else:
summary = None
for keyword in candidates:
try:
summary = wikipedia.summary(keyword, sentences=1)
break
except Exception: # 可能发生歧义异常,见 wikipedia 文档
continue
if summary:
answer = decode_to_unicode(summary) + \
u'\n候选关键词: %s' % u', '.join(candidates)
return {
'text': answer,
}
else:
return {
'text': 'not found',
}
示例6: get_wiki_info
def get_wiki_info(title):
"""This function retrieves information from the Wikipedia API.
:param title: A title of a possible wikipedia page
:type title: str
:returns: an object with information of the retrieved page
:rtype: wikipedia.wikipedia.WikipediaPage object
:Example:
>>> result = get_wiki_info('Cervantes')
>>> print(result.url)
https://es.wikipedia.org/wiki/Miguel_de_Cervantes
>>> print(result.title)
Miguel de Cervantes
>>> print(type(result))
<class 'wikipedia.wikipedia.WikipediaPage'>
>>> result = get_wiki_info('Cervantesssssssssssss')
>>> print(type(result))
<class 'str'>
"""
wp.set_lang('es') # lang is hardcoded as i18n is usefulness
# except in Europe, but I don't have the time to
# translate the app to other European countries.
# OTOH, addresses in anglosaxon
# countries are really boring :-P
try:
info = wp.page(title)
return info
except:
msg = "<H2>¡Lo sentimos!</H2>\n"
msg += "<p>Hemos fallado miserablemente al ofrecerte este servicio.</p>\n"
msg += "<p>Vuelve al mapa e inténtalo de nuevo.</p>"
return msg
示例7: main
def main(path_extracted_wikipedia_text:str,
wikipedia_article_names:List[Tuple[str, str]]):
wikipedia.set_lang('ja')
wikipedia_text_data = {}
extracted_summary_text = []
for article_name in wikipedia_article_names:
text = get_wikipedia_summary(article_name[0])
wikipedia_text_format = {}
wikipedia_text_format["page_title"] = article_name[0]
wikipedia_text_format["text"] = text
wikipedia_text_format["gold_label"] = article_name[1]
logger.info(msg='Got wikipedia page={}'.format(article_name))
if not text is False:
logger.info(msg='It gets text from page-name={}'.format(article_name))
extracted_summary_text.append(wikipedia_text_format)
time.sleep(SLEEP_TIME)
extracted_full_text = []
for article_name in wikipedia_article_names:
text = get_wikipedia_page(article_name[0])
wikipedia_text_format = {}
wikipedia_text_format["page_title"] = article_name[0]
wikipedia_text_format["text"] = text
wikipedia_text_format["gold_label"] = article_name[1]
logger.info(msg='Got wikipedia page={}'.format(article_name))
if not text is False:
logger.info(msg='It gets text from page-name={}'.format(article_name))
extracted_full_text.append(wikipedia_text_format)
time.sleep(SLEEP_TIME)
wikipedia_text_data['summary'] = extracted_summary_text
wikipedia_text_data['full'] = extracted_full_text
with open(path_extracted_wikipedia_text, 'w') as f:
f.write(json.dumps(wikipedia_text_data, ensure_ascii=False, indent=4))
示例8: searchWiki
def searchWiki(page):
wikipedia.set_lang("fr")
link = ''
try:
# p = wikipedia.page(page)
# link = p.url
propos = wikipedia.search(page,results=5,suggestion=False)
for choice in propos:
if choice.encode('utf-8') == page.encode('utf-8'):
p = wikipedia.page(page)
link = p.url
break
elif page in choice:
#TODO
print 'There is a proposition containing the keyWord '
print choice
else:
try:
wikipedia.page(page,redirect=False,auto_suggest=False)
except wikipedia.exceptions.RedirectError:
p = wikipedia.page(page)
link = p.url
break
except:
link =''
except:
link = ""
return link#.encode('utf-8')
示例9: wikipedia_page
def wikipedia_page(message, option, query):
"""
Wikipediaで検索した結果を返す
"""
if query == 'help':
return
# set language
lang = 'ja'
if option:
_, lang = option.split('-')
wikipedia.set_lang(lang)
try:
# search with query
results = wikipedia.search(query)
except:
message.send('指定された言語 `{}` は存在しません'.format(lang))
return
# get first result
if results:
page = wikipedia.page(results[0])
attachments = [{
'fallback': 'Wikipedia: {}'.format(page.title),
'pretext': 'Wikipedia: <{}|{}>'.format(page.url, page.title),
'text': page.summary,
}]
message.send_webapi('', json.dumps(attachments))
else:
message.send('`{}` に該当するページはありません'.format(query))
示例10: search_lang
def search_lang(self, c, e, args):
wikipedia.set_lang(args[0])
p = wikipedia.page(' '.join(args[1:]))
if p:
c.privmsg(get_target(c, e),
'\x02{}\x0f - {} [ {} ]'.format(p.title, smart_truncate(p.summary.replace('\n', ' ')), p.url))
wikipedia.set_lang(self.bot.config.get('wikipedia', 'lang'))
示例11: get
def get(self, keyword):
if check_contain_chinese(keyword):
wikipedia.set_lang("zh")
print("set lang to zh")
result = wikipedia.summary(keyword)
print(result)
return {"result": result}
示例12: _extract_from_wiki
def _extract_from_wiki(self):
title = self.title_line_edit.text()
if title:
page = self.page_combo_box.currentText()
wikipedia.set_lang(self.lang_combo_box.currentText())
self.load_progressbar.setMinimum(0)
self.load_progressbar.setMaximum(0)
class ProgressThread(QThread, QWidget):
content_link_arrived = pyqtSignal([list])
content_text_arrived = pyqtSignal(['QString'])
content_image_arrived = pyqtSignal([list, 'QString'])
error_occurred = pyqtSignal()
valid_images = []
def run(self):
try:
wiki = wikipedia.page(title=title)
f = open('templates/template.html')
if page == 'Content':
self.content_text_arrived.emit(wiki.content)
elif page == 'Images':
print(wiki.images)
self.des_dir = Preferences.output_path + '/' + title
self.valid_images = []
if not os.path.exists(self.des_dir):
print(self.des_dir)
os.mkdir(self.des_dir)
for i in wiki.images:
if PurePath(i).suffix in Preferences.valid_image_formats:
print(i)
print(self.des_dir)
wget.download(i, out=self.des_dir)
self.valid_images.append(i)
self.content_image_arrived.emit(self.valid_images, self.des_dir)
elif page == 'Summary':
self.content_text_arrived.emit(wiki.summary)
elif page == 'Images Links':
self.content_link_arrived.emit(wiki.images)
elif page == 'References Links':
self.content_link_arrived.emit(wiki.references)
except:
self.error_occurred.emit()
self.progress_thread = ProgressThread()
self.progress_thread.content_link_arrived.connect(self.set_content_link)
self.progress_thread.content_text_arrived.connect(self.set_content_text)
self.progress_thread.content_image_arrived.connect(self.set_content_image)
self.progress_thread.error_occurred.connect(self.handle_error_occurred)
self.progress_thread.start()
else:
self.content_text_browser.clear()
self.content_text_browser.setEnabled(False)
示例13: find_definition
def find_definition(command):
if(command.startswith("what is a ")):
command = command[10:len(command)]
elif (command.startswith("what is ") or command.startswith("who are ")):
command = command[8:len(command)]
elif (command.startswith("what are ") or command.startswith("what's a ")):
command = command[9:len(command)]
elif (command.startswith("who is ")):
command = command[7:len(command)]
elif (command.startswith("what's")):
command = command[6:len(command)]
if (command[len(command) - 1] == '?'):
command = command[0:len(command) - 1]
try:
if (wiki_lang != ""):
wikipedia.set_lang(wiki_lang[:-1])
if (wiki_lang != "zh:"):
lang = wiki_lang
else:
lang = "zh-cn:"
command = command.strip()
command = handle_translation(lang + command).lower()
command = command.title()
response = '```' + wikipedia.summary(command, sentences=wiki_line) + '```'
except:
response = "Need a clear one"
return response
示例14: collectFrom
def collectFrom(lang,start,hangang):
wikipedia.set_lang(lang)
lookpa = wikipedia.page(start).links
lookna = [wikipedia.page(start)]
corpus = str(wikipedia.page(start).content)
while len(corpus) < hangang:
random.shuffle(lookpa)
item = lookpa[0]
try:
corpus += str(wikipedia.page(item).content)
except wikipedia.exceptions.PageError:
pass
except wikipedia.exceptions.DisambiguationError:
pass
except KeyError:
pass
lookna.append(item)
lookpa.remove(item)
try:
for page in wikipedia.page(item).links:
if page not in lookpa:
if page not in lookna:
lookpa.append(page)
except wikipedia.exceptions.PageError:
pass
except wikipedia.exceptions.DisambiguationError:
pass
except KeyError:
pass
print('Corpus = ' + str(len(corpus)) + ' Searched = ' + str(len(lookna)) + ' Still = ' + str(len(lookpa)))
f = open(lang + 'FromWikiCorp.txt', 'w')
f.write(corpus)
f.close()
示例15: __init__
def __init__(self, dispatcher):
wikipedia.set_lang("de")
# Last used or searched pageid by the user.
self.last_pageid = {}
# current_topic.get(chat_id, None) returns None if chat_id doesn't exist as key
dispatcher.add_handler(CommandHandler('wiki', self.get_article))