当前位置: 首页>>代码示例>>Python>>正文


Python wikipedia.page函数代码示例

本文整理汇总了Python中wikipedia.page函数的典型用法代码示例。如果您正苦于以下问题:Python page函数的具体用法?Python page怎么用?Python page使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了page函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: findRelevantArticles

def findRelevantArticles(term,data_path='.'):
    articleList = []
    articles = wikipedia.search(term) #Setting suggestion = False (default value); No clear use for it now

    for article in articles:
        try: 
            article = wikipedia.page(article)
            category_keywords = set(list(itertools.chain.from_iterable([category.lower().split() for category in article.categories])))
            if len(category_keywords & relevant_categories) > 0:
                articlefilename = "content_"+str(article.title.lower())+".txt"
                if os.path.isfile(articlefilename):
                     articlefilename = "content_"+ str(article.title.lower())+'%s.txt' % str(term+time.strftime("%Y%m%d-%H%M%S"))
                with codecs.open(os.path.join(data_path,articlefilename),'wb', 'utf-8') as outfile:
                    content = wikipedia.page(article).content
                    print>>outfile,content
                articleList.append(str(article.title))
        except wikipedia.exceptions.PageError as e:
            pass
        except wikipedia.exceptions.DisambiguationError as e:
            for article in e.options:
                try:
                    article = wikipedia.page(article)
                    category_keywords = set(list(itertools.chain.from_iterable([category.lower().split() for category in article.categories])))
                    if len(category_keywords & relevant_categories) > 0:
                        articlefilename = "content_"+str(article.title.lower())+".txt"
                        if os.path.isfile(articlefilename):
                            articlefilename = "content_"+ str(article.title.lower())+'%s.txt' % str(term+time.strftime("%Y%m%d-%H%M%S"))
                        with codecs.open(os.path.join(data_path,articlefilename),'wb','utf-8') as outfile:
                            print>>outfile,article.content
                        articleList.append(str(article.title))
                except wikipedia.exceptions.DisambiguationError as f:
                    pass
开发者ID:mac389,项目名称:computational-medical-knowledge,代码行数:32,代码来源:wikipedia_fetch.py

示例2: searchWiki

def searchWiki(page):
    wikipedia.set_lang("fr")
    link = ''
    try:
#        p = wikipedia.page(page)
#        link = p.url
        propos = wikipedia.search(page,results=5,suggestion=False)
        for choice in propos:
            if choice.encode('utf-8') == page.encode('utf-8'):
                p = wikipedia.page(page)
                link = p.url
                break
            elif page in choice:
                #TODO
                print 'There is a proposition containing the keyWord '
                print choice
            else:
                try:
                    wikipedia.page(page,redirect=False,auto_suggest=False)
                except wikipedia.exceptions.RedirectError:
                    p = wikipedia.page(page)
                    link = p.url
                    break
                except:
                    link =''
    except:
        link = ""
    return link#.encode('utf-8')
开发者ID:Droxef,项目名称:DigitalHumanities,代码行数:28,代码来源:wikipediaSearch.py

示例3: info

def info(topic):
	response = {}
	response["type"] = "wiki"
	try:
		page = wikipedia.page(topic)
		response['title'] = page.title
		response['url'] = page.url
		response['content'] = wikipedia.summary(page.title,sentences = 5)
		if len(response['content']) < 200:
			response['content'] = wikipedia.summary(page.title,sentences = 10)
	except Exception as error:
		ename = type(error).__name__
		if ename == 'DisambiguationError':
			page = wikipedia.page(error.options[0])
			response['title'] = page.title
			response['url'] = page.url
			response['content'] = wikipedia.summary(page.title,sentences = 2)
			if len(response['content']) < 200:
				response['content'] = wikipedia.summary(page.title,sentences = 10)
		elif ename == 'HTTPTimeoutError':
			response['type'] = "error"
			response['error'] = "I couldn't reach wikipedia"
		elif ename == 'PageError':
			response['type'] = "error"
			response['error'] = "I couldn't find anything on wikipedia"
		else:
			response['type'] = "error"
			response['error'] = "Unknown error occured while reaching wikipedia" 

	return response
开发者ID:diptanshuagrawal,项目名称:Darsy,代码行数:30,代码来源:wiki.py

示例4: wikify

def wikify():
    """Returns the sentences with wikipedia links"""
    tag_dict = look_entity()
    link_dict = {}
    combined = combine()
    for item in tag_dict.keys():
        if item in combined.keys():
            try:
                if combined[item] in wikipedia.page(combined[item]).content:
                    link_dict[item] = wikipedia.page(combined[item]).url
            except wikipedia.exceptions.DisambiguationError as disamb:
                try:
                    link_dict[item] = wikipedia.page(disamb.options[0]).url
                except:
                    pass
            except wikipedia.exceptions.PageError:
                pass
        else:
            try:
                link_dict[item] = wikipedia.page(item).url
            except wikipedia.exceptions.DisambiguationError as disamb:
                try:
                    link_dict[item] = wikipedia.page(disamb.options[0]).url
                except:
                    pass
            except wikipedia.exceptions.PageError:
                pass
    return link_dict
开发者ID:willempd,项目名称:wikify,代码行数:28,代码来源:wikify.py

示例5: article

    def article(self, pageid=None, title=None):
        """ 
            Returns a specific article from Wikipedia, 
            given its pageid or its title.
            Downloads it if necessary
        """
        if pageid is None and title is None:
            raise Exception('Pageid and title can\'t be None at the same time')

        if pageid is None:
            d = self.db.articles.find_one({'title': title})

            if d is not None:
                return d # found it
        else:
            d = self.db.articles.find_one({'_id': pageid})

            if d is not None:
                return d # found it
            
        try:
            if not(pageid is None):
                page = wikipedia.page(pageid=pageid)
            else:
                page = wikipedia.page(title=title)

        except (
            wikipedia.exceptions.DisambiguationError,
            wikipedia.exceptions.PageError,
            wikipedia.exceptions.WikipediaException,
            requests.exceptions.RequestException,
            ValueError # error decoding JSON response
        ):
            return

        try:
            time.sleep(0.5)
        except:
            time.wait(0.5)

        # Even if we didn't find pageid or title, it still could be in the DB
        # since the title could have changed
        try:
            d = {
                '_id': int(page.pageid),
                'title': page.title,
                'content': page.content
            }
        except KeyboardInterrupt: # filter KeyboardInterrupt from here
            raise
        except Exception:
            return # can't add this entry

        self.db.articles.update_one(
            {'_id': d['_id']},
            {'$set': d},
            upsert=True
        )

        return d
开发者ID:aparafita,项目名称:news-similarity,代码行数:60,代码来源:wikipedia.py

示例6: wiki

def wiki(event, bot):
	""" wiki \x02searchterm\x02. Will search Wikipedia for \x02searchterm\x02. """
	if not event.argument: return bot.say(functionHelp(wiki))
	result = search(event.argument, results=1, suggestion=True)
	if not result[0]: 
		if result[1]: return bot.say("No results found. Did you mean \x02%s\x02?" % result[1])
		else: return bot.say("No results found.")
	
	errors = []
	attempt = 0
	p = None
	try:
		p = page(result[0]) # use preload=True  when it's fixed: https://github.com/goldsmith/Wikipedia/issues/78
	except DisambiguationError as e:
		errors.append("Random disambig page: ")
		while attempt < 3:
			try: p = page(choice(e.options))
			except DisambiguationError: pass
			attempt += 1
	if not p: return bot.say("Gave up looking for disambiguous entry from disambiguous page.")
	
	if result[1]:
		errors.append("(SP: %s?) " % result[1])
	content = p.content[:800].replace("\n", " ").replace("====", "").replace("===", "").replace("==", "")
	
	bot.say(RESULT_RPL % ("".join(errors), p.url), strins=[p.title, content], fcfs=True)
开发者ID:Clam-,项目名称:pyBurlyBot,代码行数:26,代码来源:pbm_wikipedia.py

示例7: getContentFromLink

def getContentFromLink(link):
	try:
		linkText = wk.page(link, auto_suggest=False).content.lower()
	except wk.exceptions.DisambiguationError as e:
		options = filter(lambda x: "(disambiguation)" not in x, e.options)
		linkText = wk.page(options[0], auto_suggest=False).content.lower()
	return linkText
开发者ID:melissa0831,项目名称:OptimizedEntityLinking,代码行数:7,代码来源:cache.py

示例8: disambiguationWikipedia

def disambiguationWikipedia(noun):

    """
    Disambiguation for Wikipedia errors
    """

        # Try to get wikipedia content
    try:
        wiki = wikipedia.page(noun)

    except wikipedia.exceptions.DisambiguationError as e:
        new = e.options[0]

        try:
            wiki = wikipedia.page(new)

        except:
            return 'Null'

    except wikipedia.exceptions.PageError:
        new = wikipedia.search(noun)

        try:
            wiki = wikipedia.page(new[0])

        except:
            return 'Null'

    except:
        return 'Null'


    return wiki
开发者ID:LaurenceArnold,项目名称:Vanilla-Android,代码行数:33,代码来源:eindopdracht.py

示例9: search_wikipedia

def search_wikipedia(word):
    searchArr = wikipedia.search(word)

    wiki_results = []
    try: 
        try:
            for result in searchArr:
                #print("result: " + result)
                wiki_results.append(wikipedia.page(result, preload=False))
        except wikipedia.DisambiguationError as e:
            #print("disambiguation error on " + result)
            #print(e.with_traceback)
            try:
                for item in e.options:
                    #print("disambiguation error on " + item)
                    wiki_results.append(wikipedia.page(item, preload=False))
            except wikipedia.DisambiguationError as i:
                try:
                    for item in i.options:
                        #print("disambiguation error on " + item)
                        wiki_results.append(wikipedia.page(item, preload=False))
                except wikipedia.DisambiguationError:
                    pass
    except: 
        print("Something went wrong getting wikipedia results")
        pass

    return wiki_results
开发者ID:kottofy,项目名称:PythonDjango-SearchApp,代码行数:28,代码来源:Search_Wikipedia.py

示例10: page

 def page(title=None, pageid=None, auto_suggest=True, redirect=True):
     """
     The search term from user may not corresponds to a wikipedia page,
     due to vagueness. There are 2 alternatives, "redirect"/ "disambiguous".
     :param auto_suggest:let Wikipedia find a valid page title for the query
     :return:
     """
     if pageid is not None:
         pageid = int(pageid)
         page = WikipediaArticle.objects(pageid=pageid)
     else:
         page = WikipediaArticle.objects(title=title)
         if not page:
             results, suggestion = WikipediaWrapper.search(
                 title,
                 results=1,
                 suggestion=True)
             suggested_term = suggestion or results[0]
             page = WikipediaArticle.objects(title=suggested_term)
     if page:
         page = page[0]
     else:
         try:
             page = wikipedia.page(title=title,
                                   pageid=pageid,
                                   auto_suggest=auto_suggest,
                                   redirect=redirect)
         except UnicodeDecodeError:
             page = wikipedia.page(title=str_util.normal_str(title),
                                   pageid=pageid,
                                   auto_suggest=auto_suggest,
                                   redirect=redirect)
     if type(page) is wikipedia.WikipediaPage:
         page = WikipediaWrapper.save_page(page)
     return page
开发者ID:imoonkey,项目名称:eduwiki,代码行数:35,代码来源:wikipedia_util.py

示例11: climb_tree

    def climb_tree(self):
        """Climb the tree"""

        branch_found = True
        cur_branch = self.seed
        prev_node = None
        while cur_branch is not None:
            self.logger.debug('Current branch is %s'%cur_branch)
            #Get wikipedia page
            try:
                cur_page = wikipedia.page(cur_branch)
            except wikipedia.PageError:
                self.logger.exception('Cannot find page for %s. Ending search.'%cur_branch)
                self.tree.node(cur_branch)
                self.tree.edge(cur_branch,prev_node)
                cur_branch = None
                continue
            except wikipedia.DisambiguationError:
                self.logger.exception('Multiple pages found for query %s. Adding "(physicist)" and searching again.')
                cur_page = wikipedia.page(cur_branch+' (physicist)')

            #parse the table
            html_source = BeautifulSoup(cur_page.html(),'html.parser')
            advisor = self._search_info_table(html_source,['Doctoral advisor','Doctoral advisors','Academic advisors','Academic advisor'])
            alma_mater = self._search_info_table(html_source,'Alma mater')
            students = self._search_info_table(html_source,'Doctoral students')
            #add to graph
            self.tree.node(cur_branch,cur_branch+'\n'+self._none_filter(alma_mater))
            if prev_node is not None:
                self.tree.edge(cur_branch,prev_node)
            #update
            prev_node = cur_branch
            cur_branch = self._res_filter(advisor)
开发者ID:wtbarnes,项目名称:arbordemia,代码行数:33,代码来源:harvest.py

示例12: link_checker

def link_checker(ngram):
	''' Checks if the word gives a valid wikipedia link '''
	try:
		page = wikipedia.page(ngram)
		link = page.url
		return link
	except wikipedia.exceptions.DisambiguationError: 
		#link = ngram.split(" ") 
		#newlink = "_".join(ngram)
		link = 'http://en.wikipedia.org/wiki/' + ngram + '_(disambiguation)'
		return link
	except wikipedia.exceptions.PageError:
		wordlist = ngram.split()
		counter = 0
		for word in wordlist:
			word.lower()
			if word in ["prime","minister","president"]:
				wordlist.pop(counter)
			counter += 1  
		ngram.join(wordlist)
		try:
			page = wikipedia.page(ngram)
			link = page.url
			return link 
		except wikipedia.exceptions.PageError:
			return -1
		except wikipedia.exceptions.DisambiguationError:  
			return -1
开发者ID:svansuylekom,项目名称:Project-Tekstanalyse,代码行数:28,代码来源:PTA.py

示例13: collectFrom

def collectFrom(lang,start,hangang):
    wikipedia.set_lang(lang)
    lookpa = wikipedia.page(start).links
    lookna = [wikipedia.page(start)]
    corpus = str(wikipedia.page(start).content)
    while len(corpus) < hangang:
        random.shuffle(lookpa)
        item = lookpa[0]
        try:
            corpus += str(wikipedia.page(item).content)
        except wikipedia.exceptions.PageError:
            pass
        except wikipedia.exceptions.DisambiguationError:
            pass
        except KeyError:
            pass
        lookna.append(item)
        lookpa.remove(item)
        try: 
            for page in wikipedia.page(item).links:
                if page not in lookpa:
                    if page not in lookna:
                        lookpa.append(page)
        except wikipedia.exceptions.PageError:
            pass
        except wikipedia.exceptions.DisambiguationError:
            pass
        except KeyError:
            pass
        print('Corpus = ' + str(len(corpus)) + '   Searched = ' + str(len(lookna)) + '  Still = ' + str(len(lookpa)))
    
    f = open(lang + 'FromWikiCorp.txt', 'w')
    f.write(corpus)
    f.close()
开发者ID:mcooper,项目名称:Philippines-Languages,代码行数:34,代码来源:wikipediasearch.py

示例14: context_data

    def context_data(self):
        """
        Gather data from Wikipedia based on user-inputed SUBJECT. 
        """
        text_list, visited, visitedSeeAlso, queue = [], set(), set(), list() 
        queue.append((self.subject, self.depth))

        while len(queue) > 0:
            print("Hi")
            next = queue.pop(0)
            try:
                if next[0] not in visited and next[1] >= 0:
                    visited.add(next[0])
                    results = wikipedia.search(next[0], self.max_searches, False)
                    for pagename in results:
                        queue.append((pagename, next[1]-1))
                    text_list.extend(wikipedia.page(next[0]).content.split())
            except:
                pass

        queue.append((self.subject, self.depth))
        while len(queue) > 0: 
            next = queue.pop(0)
            try: 

                if next[0] not in visitedSeeAlso and next[1] >= 0: 
                    visitedSeeAlso.add(next[0])
                    page = wikipedia.page(next[0])
                    for reference in page.section("See also").splitlines():
                        queue.append((reference, next[1] -1))
                    text_list.extend(wikipedia.page(next[0]).content.split())
            except:
                pass          
        return text_list
开发者ID:simster7,项目名称:WordBlock,代码行数:34,代码来源:SubjectContext.py

示例15: getWikiPage

def getWikiPage(title):
  try:
    page = wikipedia.page(title)
  except wikipedia.exceptions.DisambiguationError as e:
    print(e.options)
    title = random.choice(e.options)
    page = wikipedia.page(title)
  return page
开发者ID:sarahgarcin,项目名称:SpatialLab,代码行数:8,代码来源:import-wiki-article.py


注:本文中的wikipedia.page函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。