本文整理汇总了Python中wikipedia.getSite函数的典型用法代码示例。如果您正苦于以下问题:Python getSite函数的具体用法?Python getSite怎么用?Python getSite使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了getSite函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: addCoords
def addCoords(sourceWiki, lang, article, lat, lon, region, type, dim):
'''
Add the coordinates to article.
'''
if (article and lang and type):
coordTemplate = 'Coordinate'
site = wikipedia.getSite(lang, 'wikipedia')
page = wikipedia.Page(site, article)
try:
text = page.get()
except wikipedia.NoPage: # First except, prevent empty pages
logging.warning('Page empty: %s', article)
return False
except wikipedia.IsRedirectPage: # second except, prevent redirect
logging.warning('Page is redirect: %s', article)
wikipedia.output(u'%s is a redirect!' % article)
return False
except wikipedia.Error: # third exception, take the problem and print
logging.warning('Some error: %s', article)
wikipedia.output(u"Some error, skipping..")
return False
if coordTemplate in page.templates():
logging.info('Already has Coordinate template: %s', article)
return False
if 'Linn' in page.templates():
logging.info('Linn template without coords: %s', article)
return False
newtext = text
replCount = 1
coordText = u'{{Coordinate |NS=%s |EW=%s |type=%s |region=%s' % (lat, lon, type, region)
if (dim):
coordText += u' |dim=%s' % ( int(dim),)
coordText += '}}'
localCatName = wikipedia.getSite().namespace(WP_CATEGORY_NS)
catStart = r'\[\[(' + localCatName + '|Category):'
catStartPlain = u'[[' + localCatName + ':'
replacementText = u''
replacementText = coordText + '\n\n' + catStartPlain
# insert coordinate template before categories
newtext = re.sub(catStart, replacementText, newtext, replCount, flags=re.IGNORECASE)
if text != newtext:
logging.info('Adding coords to: %s', article)
comment = u'lisan artikli koordinaadid %s.wikist' % (sourceWiki)
wikipedia.showDiff(text, newtext)
modPage = wikipedia.input(u'Modify page: %s ([y]/n) ?' % (article) )
if (modPage.lower == 'y' or modPage == ''):
page.put(newtext, comment)
return True
else:
logging.info('Nothing to change: %s', article)
return False
else:
return False
示例2: __init__
def __init__(self, pageToUnlink, namespaces, always):
self.pageToUnlink = pageToUnlink
gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
if namespaces != []:
gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
self.generator = pagegenerators.PreloadingGenerator(gen)
linktrail = pywikibot.getSite().linktrail()
# The regular expression which finds links. Results consist of four
# groups:
#
# group title is the target page title, that is, everything
# before | or ].
#
# group section is the page section.
# It'll include the # to make life easier for us.
#
# group label is the alternative link title, that's everything
# between | and ].
#
# group linktrail is the link trail, that's letters after ]] which are
# part of the word.
# note that the definition of 'letter' varies from language to language.
self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
% linktrail)
self.always = always
self.done = False
self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
self.pageToUnlink.title())
示例3: store_wikipedia
def store_wikipedia(self):
s = ''
for k in sorted(self.replace.keys()):
s += '* %s : %s\n' % (k, self.replace[k])
mypage = pywikibot.Page(pywikibot.getSite(), '%s/replaced' % self.prefix)
mypage.put_async( s )
s = ''
for k in sorted(self.correctPerPage.keys()):
vlist = self.correctPerPage[k]
for v in sorted(vlist):
s += '* %s : %s\n' % (k, v)
mypage = pywikibot.Page(pywikibot.getSite(), '%s/correctPerPage' % self.prefix)
mypage.put_async( s )
s = ''
for k in sorted(self.noall):
s += '* %s \n' % (k)
mypage = pywikibot.Page(pywikibot.getSite(), '%s/correct' % self.prefix)
mypage.put_async( s )
s = ''
for k in sorted(self.rcount.keys()):
if self.rcount[k] > 0: s += '* %s : %s\n' % (k, self.rcount[k])
mypage = pywikibot.Page(pywikibot.getSite(), '%s/replacCount' % self.prefix)
mypage.put_async( s )
s = ''
示例4: __iter__
def __iter__(self):
"""
Yield page objects until the entire XML dump has been read.
"""
import xmlreader
mysite = pywikibot.getSite()
dump = xmlreader.XmlDump(self.xmlfilename)
# regular expression to find the original template.
# {{vfd}} does the same thing as {{Vfd}}, so both will be found.
# The old syntax, {{msg:vfd}}, will also be found.
# TODO: check site.nocapitalize()
templatePatterns = []
for template in self.templates:
templatePattern = template.title(withNamespace=False)
if not pywikibot.getSite().nocapitalize:
templatePattern = (
"[" + templatePattern[0].upper() + templatePattern[0].lower() + "]" + templatePattern[1:]
)
templatePattern = re.sub(" ", "[_ ]", templatePattern)
templatePatterns.append(templatePattern)
templateRegex = re.compile(
r"\{\{ *([mM][sS][gG]:)?(?:%s) *(?P<parameters>\|[^}]+|) *}}" % "|".join(templatePatterns)
)
for entry in dump.parse():
if templateRegex.search(entry.text):
page = pywikibot.Page(mysite, entry.title)
yield page
示例5: run
def run(self):
for page in self.generator:
if page.isRedirectPage():
page = page.getRedirectTarget()
page_t = page.title()
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(u"\n>>> \03{lightpurple}%s\03{default} <<<"
% page_t)
page_cap = wikipedia.Page(wikipedia.getSite(), page_t.title().capitalize())
if not page_cap.exists():
wikipedia.output(u'%s doesn\'t exist' % page_cap.title())
if not self.acceptall:
choice = wikipedia.inputChoice(
u'Do you want to create a redirect?',
['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
if choice == 'a':
self.acceptall = True
if self.acceptall or choice == 'y':
try:
wikipedia.setAction(
wikipedia.translate(wikipedia.getSite(), msg)
% page_t)
page_cap.put(u"#REDIRECT [[%s]]" % page_t)
print
except:
wikipedia.output(
u"An error occurred. Retrying in 15 seconds...")
time.sleep(15)
continue
else:
wikipedia.output(u'%s already exists, skipping...\n'
% page_t.title())
示例6: main
def main():
'''
The main loop
'''
wikipedia.setSite(wikipedia.getSite(u'nl', u'wikipedia'))
conn = None
cursor = None
(conn, cursor) = connectDatabase()
items = getNumberOfItems(cursor)
images = getNumberOfImages(cursor)
addresses = {}
names = {}
pages = list(set(items.keys() + images.keys()))
pages.sort()
for key in pages:
print key
page = wikipedia.Page(wikipedia.getSite(), key)
text = page.get()
addresses[key] = getNumberOfAddresses(text)
names[key] = getNumberOfNames(text)
#print key + u' - ' + str(addresses[key]) + u' - ' + str(names[key])
updateStats(pages, items, addresses, names, images)
示例7: main
def main():
all = False
language = None
fam = None
wikimedia = False
for arg in pywikibot.handleArgs():
if arg == "-all":
all = True
elif arg[0:7] == "-langs:":
language = arg[7:]
elif arg[0:10] == "-families:":
fam = arg[10:]
elif arg[0:10] == "-wikimedia":
wikimedia = True
mySite = pywikibot.getSite()
if wikimedia:
families = [
"commons",
"incubator",
"mediawiki",
"meta",
"species",
"test",
"wikibooks",
"wikidata",
"wikinews",
"wikipedia",
"wikiquote",
"wikisource",
"wikiversity",
"wikivoyage",
"wiktionary",
]
elif fam is not None:
families = fam.split(",")
else:
families = [mySite.family.name]
for family in families:
try:
fam = pywikibot.Family(family)
except ValueError:
pywikibot.output(u"No such family %s" % family)
continue
if all:
for lang in fam.langs.iterkeys():
testSite(pywikibot.getSite(lang, family))
elif language is None:
lang = mySite.lang
if not lang in fam.langs.keys():
lang = fam.langs.keys()[-1]
testSite(pywikibot.getSite(lang, family))
else:
languages = language.split(",")
for lang in languages:
try:
testSite(pywikibot.getSite(lang, family))
except pywikibot.NoSuchSite:
pywikibot.output(u"No such language %s in family %s" % (lang, family))
示例8: facatlist
def facatlist(facat):
wikipedia.config.put_throttle = 0
wikipedia.put_throttle.setDelay()
count=0
listenpageTitle=[]
PageTitle=facat.replace(u'[[',u'').replace(u']]',u'').strip()
language='fa'
PageTitles =[PageTitle]
for PageTitle in PageTitles:
cat = catlib.Category( wikipedia.getSite(language),PageTitle )
listacategory=[cat]
listacategory=categorydown(listacategory)
for enpageTitle in listacategory:
enpageTitle=str(enpageTitle).split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip()
cat = catlib.Category( wikipedia.getSite(language),enpageTitle )
gent = pagegenerators.CategorizedPageGenerator( cat )
for pagework in gent:
count+=1
try:
link=str(pagework).split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip()
except:
pagework=unicode(str(pagework),'UTF-8')
link=pagework.split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip()
wikipedia.output(link)
fapagetitle=link
wikipedia.output(u'adding '+fapagetitle+u' to fapage lists')
listenpageTitle.append(fapagetitle)
if listenpageTitle==[]:
return False
return listenpageTitle
示例9: main
def main():
#page generator
gen = None
# This temporary array is used to read the page title if one single
# page to work on is specified by the arguments.
pageTitle = []
# Which namespaces should be processed?
# default to [] which means all namespaces will be processed
namespaces = []
# Never ask before changing a page
always = False
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
for arg in pywikibot.handleArgs():
if arg.startswith('-xml'):
if len(arg) == 4:
xmlFilename = i18n.input('pywikibot-enter-xml-filename')
else:
xmlFilename = arg[5:]
gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
elif arg.startswith('-namespace:'):
try:
namespaces.append(int(arg[11:]))
except ValueError:
namespaces.append(arg[11:])
elif arg == '-always':
always = True
else:
if not genFactory.handleArg(arg):
pageTitle.append(arg)
if pageTitle:
page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
gen = iter([page])
if not gen:
gen = genFactory.getCombinedGenerator()
if not gen:
site = pywikibot.getSite()
try:
cat = maintenance_category[site.family.name][site.lang]
except:
pass
else:
import catlib
if not namespaces:
namespaces = [0]
cat = catlib.Category(site, "%s:%s" % (site.category_namespace(),
cat))
gen = pagegenerators.CategorizedPageGenerator(cat)
if not gen:
pywikibot.showHelp('noreferences')
else:
if namespaces:
gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
bot = NoReferencesBot(preloadingGen, always)
bot.run()
示例10: main
def main():
'''
The main loop
'''
wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
conn = None
cursor = None
(conn, cursor) = connectDatabase()
imagerecat.initLists()
generator = None;
genFactory = pagegenerators.GeneratorFactory()
mark = True
for arg in wikipedia.handleArgs():
if arg.startswith('-dontmark'):
mark = False
elif arg.startswith('-page'):
if len(arg) == 5:
generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
else:
generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
elif arg.startswith('-yesterday'):
generator = [wikipedia.Page(wikipedia.getSite(), u'Category:Media_needing_categories_as_of_' + getYesterday())]
else:
generator = genFactory.handleArg(arg)
if generator:
for page in generator:
if((page.namespace() == 14) and (page.title().startswith(u'Category:Media needing categories as of'))):
wikipedia.output(u'Working on ' + page.title())
for (image, gals, cats) in getImagesToCategorize(cursor, page.titleWithoutNamespace()):
categorizeImage(image, gals, imagerecat.applyAllFilters(cats))
if (mark):
categoriesChecked(page.title())
示例11: processImage
def processImage(self, fields):
'''
Work on a single image
'''
if self.autonomous:
# Check if the image already exists. Do nothing if the name is already taken.
CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename'))
if CommonsPage.exists():
return False
else:
while True:
# Do the Tkdialog to accept/reject and change te name
fields=Tkdialog(fields).getnewmetadata()
if fields.get('skip'):
pywikibot.output(u'Skipping %s : User pressed skip.' % fields.get('imagepage').title())
return False
# Check if the image already exists
CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename'))
if not CommonsPage.exists():
break
else:
pywikibot.output('Image already exists, pick another name or skip this image')
# We dont overwrite images, pick another name, go to the start of the loop
# Put the fields in the queue to be uploaded
self.uploadQueue.put(fields)
示例12: main
def main():
wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
bigcategory = u''
target = u''
generator = None
for arg in wikipedia.handleArgs():
if arg.startswith('-page'):
if len(arg) == 5:
generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
else:
generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
elif arg.startswith('-bigcat'):
if len(arg) == 7:
bigcategory = wikipedia.input(u'What category do you want to split out?')
else:
bigcategory = arg[8:]
elif arg.startswith('-target'):
if len(arg) == 7:
target = wikipedia.input(u'What category is the target category?')
else:
target = arg[8:]
if not bigcategory==u'':
splitOutCategory(bigcategory, target)
else:
if not generator:
generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:Intersect categories'), onlyTemplateInclusion=True), [14])
for cat in generator:
intersectCategories(cat)
示例13: loadPagesWiki
def loadPagesWiki(wr, correctWords_page, ignorePages_page):
"""
Load list of correct words and ignored pages
"""
# Load correct words
mypage = pywikibot.Page(pywikibot.getSite(), correctWords_page)
text = mypage.get()
lines = text.split('* ')[1:]
correctWords = {}
for l in lines:
spl = l.split(' : ')
tmp = correctWords.get( spl[0], [] )
tmp.append( spl[1].strip() )
correctWords[spl[0]] = tmp
print "loaded %s correct words" % len(correctWords)
# Load ignore pages
mypage = pywikibot.Page(pywikibot.getSite(), ignorePages_page)
text = mypage.get()
lines = text.split('* ')[1:]
ignorePages = []
for l in lines:
ignorePages.append(l.strip())
print "loaded %s ignored pages " % len(ignorePages)
wr.ignorePages = ignorePages
wr.ignorePerPages = correctWords
示例14: __init__
def __init__(self, page, filename, summary, dry, always):
self.page = pywikibot.Page( pywikibot.getSite(), page )
self.filename = filename
self.summary = summary
if not self.summary:
self.summary = pywikibot.translate(pywikibot.getSite(), self.msg)
pywikibot.setAction( self.summary )
示例15: main
def main():
countrycode = u''
# Connect database, we need that
(conn, cursor) = connectDatabase()
(conn2, cursor2) = connectDatabase2()
generator = None
genFactory = pagegenerators.GeneratorFactory()
for arg in wikipedia.handleArgs():
if arg.startswith('-countrycode:'):
countrycode = arg [len('-countrycode:'):]
lang = wikipedia.getSite().language()
wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
if countrycode:
if not mconfig.countries.get((countrycode, lang)):
wikipedia.output(u'I have no config for countrycode "%s" in language "%s"' % (countrycode, lang))
return False
wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang))
locateCountry(countrycode, lang, mconfig.countries.get((countrycode, lang)), conn, cursor, conn2, cursor2)
else:
for (countrycode, lang), countryconfig in mconfig.countries.iteritems():
if not countryconfig.get('autoGeocode'):
wikipedia.output(u'"%s" in language "%s" is not supported in auto geocode mode (yet).' % (countrycode, lang))
else:
wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang))
locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)