本文整理汇总了Python中mechanize.Browser.set_proxies方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.set_proxies方法的具体用法?Python Browser.set_proxies怎么用?Python Browser.set_proxies使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mechanize.Browser
的用法示例。
在下文中一共展示了Browser.set_proxies方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testPx
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def testPx(px):
B=Browser()
B.addheaders = [('User-agent', userAgents[randint(0,len(userAgents)-1)])]
B.set_proxies(px)
try:
B.open('http://graphicriver.net/',timeout=5)
pxQ.put(px)
print(px['http']+" ok")
B.open('http://graphicriver.net/category/all',timeout=5)
except:
print(px['http']+" error")
page = pageQ.get()
try:
# pass
# finally:
count=0
while(count<5):
O = B.open('http://graphicriver.net/category/all?page='+str(page),timeout=8)
turls = lxml.html.document_fromstring(O.get_data()).xpath('//div[@class="item-info"]/h3/a/@href')
for url in turls:
urlsQ.put(url)
print(str(page)+" got")
pageDoneQ.put(page)
page = pageQ.get()
count+=1
except:
pageQ.put(page)
print(str(page)+" error")
示例2: DOMScanner
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class DOMScanner(threading.Thread):
def __init__(self, engine, queue):
threading.Thread.__init__(self)
self.queue = queue
self.engine = engine
self.errors = {}
self.results = []
self.javascript = []
self.whitelist = []
self.browser = Browser()
self._setProxies()
self._setHeaders()
def _setHeaders(self):
if self.engine.getOption('ua') is not None:
if self.engine.getOption('ua') is "RANDOM":
self.browser.addheaders = [('User-Agent', random.choice(USER_AGENTS))]
else:
self.browser.addheaders = [('User-Agent', self.engine.getOption('ua'))]
if self.engine.getOption("cookie") is not None:
self.browser.addheaders = [("Cookie", self.engine.getOption("cookie"))]
def _setProxies(self):
if self.engine.getOption('http-proxy') is not None:
self.browser.set_proxies({'http': self.engine.getOption('http-proxy')})
def _addError(self, key, value):
if self.errors.has_key(key):
self.errors[key].append(value)
else:
self.errors[key] = [value]
def _parseJavascript(self, target):
if self.engine.getOption("ua") is "RANDOM": self._setHeaders()
url = target.getFullUrl()
try:
to = 10 if self.engine.getOption('http-proxy') is None else 20
response = self.browser.open(url, timeout=to) #urlopen(req, timeout=to)
except HTTPError, e:
self._addError(e.code, target.getAbsoluteUrl())
return
except URLError, e:
self._addError(e.reason, target.getAbsoluteUrl())
return
示例3: init_browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def init_browser():
browser = Browser()
browser.addheaders = (
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
('User-agent', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)') # We're Firefox! :P
)
# browser.set_handle_gzip(True) # Currently experimental in mechanize
browser.set_handle_redirect(True)
browser.set_handle_refresh(False)
browser.set_handle_robots(True)
browser.set_handled_schemes(['http', 'https'])
browser.set_proxies({})
return browser
示例4: browseUrlList
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def browseUrlList(self, urlList, proxies = {}):
browseData = {}
from mechanize import Browser
br = Browser()
# Explicitly configure proxies (Browser will attempt to set good defaults).
br.set_proxies(proxies)
curUrlList = []
for i in urlList:
print 'connecting:',i
response = br.open(i)
browseData[i] = response.read()
#Check cache first.
curUrlList.append(i)
self.cache(curUrlList, browseData[i])
return browseData
示例5: getsoup
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def getsoup(URL, proxy = None):
br = Browser()
if proxy is not None:
br.set_proxies(proxy)
br.open(URL)
try:
title_URL = br.find_link(url_regex = re.compile(r'/title/tt.*'))
except LinkNotFoundError:
return None
try:
res = br.follow_link(title_URL)
except URLError:
return None
soup = BeautifulSoup(res.read())
return soup
示例6: Crawler
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class Crawler(threading.Thread):
def __init__(self, engine, queue, crawl_links = False, crawl_forms = False):
threading.Thread.__init__(self)
self.engine = engine
self.queue = queue
self.results = []
self.errors = {}
self.crawl_links = crawl_links
self.crawl_forms = crawl_forms
self.browser = Browser()
self._setProxies()
self._setHeaders()
def _setHeaders(self):
if self.engine.getOption('ua') is not None:
if self.engine.getOption('ua') is "RANDOM":
self.browser.addheaders = [('User-Agent', random.choice(USER_AGENTS))]
else:
self.browser.addheaders = [('User-Agent', self.engine.getOption('ua'))]
if self.engine.getOption("cookie") is not None:
self.browser.addheaders = [("Cookie", self.engine.getOption("cookie"))]
def _setProxies(self):
if self.engine.getOption('http-proxy') is not None:
self.browser.set_proxies({'http': self.engine.getOption('http-proxy')})
def _addError(self, key, value):
if self.errors.has_key(key):
self.errors[key].append(value)
else:
self.errors[key] = [value]
def _crawlLinks(self, target):
# If UA is RANDOM we need to refresh browser's headers
if self.engine.getOption("ua") is "RANDOM": self._setHeaders()
try: self.browser.open(target.getAbsoluteUrl())
except HTTPError, e:
self._addError(e.code, target.getAbsoluteUrl())
return False
except URLError, e:
self._addError(e.reason, target.getAbsoluteUrl())
return False
示例7: BeautifulSoup
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
import urllib
from bs4 import BeautifulSoup
import urllib2
from mechanize import Browser
url = "http://www.google.com"
#URLLIB PROXY SUPPORT.
#proxies = {'http': 'http://localhost:8008'}
#urlhandle = urllib.urlopen(url, proxies=proxies)
#print urlhandle.read()
#URLLIB2 PROXY SUPPORT AND THEN USING BEAUTIFULSOUP
proxy = urllib2.ProxyHandler( {'http': 'localhost:8008'} )
opener = urllib2.build_opener( proxy )
urllib2.install_opener( opener )
request = urllib2.Request( url )
response = urllib2.urlopen( request )
html = response.read()
soup = BeautifulSoup(html, "lxml")
div = soup.find_all( 'div', id="gs_lc0" )
#print div
#MECHANIZE PROXY SUPPORT.
br = Browser()
br.set_proxies({"http": "localhost:8008"})
response = br.open(url)
print response
示例8: getSolutions
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def getSolutions (path_prefix, path_proxy):
global br, username, password
# create a browser object
br = Browser()
# add proxy support to browser
if len(path_proxy) != 0:
protocol,proxy = options.proxy.split("://")
br.set_proxies({protocol:proxy})
# let browser fool robots.txt
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; \
rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.set_handle_robots(False)
print "Enter yout SPOJ username :",
username = raw_input()
password = getpass.getpass()
# authenticate the user
print "Authenticating " + username
br.open ("http://spoj.com/login")
#br.select_form (name="login")
#the form no longer is named "login" therefore to access it by id:
formcount=0
for frm in br.forms():
if str(frm.attrs["id"])=="login-form":
break
formcount=formcount+1
br.select_form(nr=formcount)
br["login_user"] = username
br["password"] = password
# sign in for a day to avoid timeouts
#br.find_control(name="autologin").items[0].selected = True
#this attribute is missing in the new spoj format
br.form.action = "http://www.spoj.com/login"
response = br.submit()
verify = response.read()
if (verify.find("Authentication failed!") != -1):
print "Error authenticating - " + username
exit(0)
# grab the signed submissions list
print "Grabbing siglist for " + username
siglist = br.open("http://www.spoj.pl/status/" + username + "/signedlist")
# dump first nine useless lines in signed list for formatting
for i in xrange(9):
siglist.readline()
# make a list of all AC's and challenges
print "Filtering siglist for AC/Challenge solutions..."
mysublist = list()
while True:
temp = siglist.readline()
if temp=='\------------------------------------------------------------------------------/\n':
# reached end of siglist
break
if not len(temp) :
print "Reached EOF, siglist format has probably changed," + \
" contact author."
exit(1)
entry = [x.strip() for x in temp.split('|')]
if entry[4] == 'AC' or entry[4].isdigit():
mysublist.append (entry)
print "Done !!!"
return mysublist
示例9: __init__
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class FlickrBot:
# use favorite authors as contacts.
__url = "http://www.flickr.com"
__favorites_url = "http://www.flickr.com/photos/%s/favorites"
__favorite_regex = "/photos/[[email protected]]+/[0-9]+/"
__favorite_prefix = ""
__favorite_sufix = ""
__contact_regex = __favorite_regex
__complete_name_regex = __contact_regex
__complete_name_prefix = "/photos/"
__complete_name_sufix = "/"
__tag_regex = "/photos/tags/[a-zA-Z_0-9\-]+/"
__tag_prefix = "/photos/tags/"
__tag_sufix = "/"
__forb_tags = ["the", "and", "their", "at", "is", "in", "of", "a", "on", "for", "an", "with"]
def __init__(self, proxies_per_proto={}, debug=False):
self.__br = Browser()
self.__br.set_proxies(proxies_per_proto)
self.__br.set_debug_http(debug)
# no respect for robots.txt
self.__br.set_handle_robots(False)
self.__sleep_secs = 0
self.__sleep_module = 1
self.__gets = 0
# no sign in
# but i have a dummy user
# user: [email protected]
# password: zarasa123
pass
def set_sleep_secs(self, secs):
self.__sleep_secs = secs
def set_sleep_module(self, iterations):
self.__sleep_module = iterations
def __try_sleep(self):
self.__gets += 1
if self.__gets % self.__sleep_module == 0:
print "Sleeping for %f seconds, every %d GETs" % (self.__sleep_secs, self.__sleep_module)
time.sleep(self.__sleep_secs)
# most_viewed
# top_rated
# recently_featured
# watch_on_mobile
def seeds(self):
self.__try_sleep()
resp = self.__br.open("http://www.flickr.com/")
cont = resp.read()
matches = re.findall(self.__contact_regex, cont)
users = map(self.__strip_complete_name, matches)
return users
def search(self, query):
br = self.__br
# check if name exists.
try:
url = "http://www.flickr.com/photos/%s/favorites/" % query
print url
self.__try_sleep()
resp = self.__br.open(url)
cont = resp.read()
if not "favorites" in cont:
return []
except Exception, e:
if str(e) == "HTTP Error 404: Not Found":
return []
else:
raise e
return [cont]
示例10: Runner
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class Runner(QThread):
'''
Authenticates to polimi and downloads cert
'''
statusChanged = Signal(int,unicode)
error = Signal(unicode)
def __init__(self,nm,user,password,anonuser,certPass,ptext,pbar):
'''
Constructor
'''
QThread.__init__(self)
self.triumph = True
self.user = user
self.nm_iface = nm.applet
self.anonuser = anonuser
self.password = password
self.passphrase = certPass
self.certLocation = os.path.expanduser(CERT_LOCATION)
self.certFolder = os.path.split(self.certLocation)[0]
self.bro = Browser()
if PROXY:
self.bro.set_proxies({"http":PROXY,"https":PROXY})
self.bro.set_handle_robots(0)
def run(self):
self.statusChanged.emit(0,self.tr("Connecting to ASICT..."))
self.bro.open(START_URL)
self.bro.follow_link(text='logon')
self.statusChanged.emit(15,self.tr("Logging in..."))
self.bro.select_form('')
self.bro.form['login'] = str(self.user)
self.bro.form['password'] = str(self.password)
self.bro.submit()
self.response = self.bro.open(START_URL)
#Controlliamo se e' loggato
if "logon" in self.response.read().split('<div id="preamble">')[-1].split("</div>")[0]:
self.error.emit(self.tr("Wrong username or password"))
return
self.statusChanged.emit(40,self.tr("Downloading certificate.."))
self.response = self.bro.follow_link(text='nuovo certificato')
self.bro.select_form(name='exists')
self.bro.submit()
self.bro.select_form(name='passphrase')
self.bro.form['passphrase'] = self.passphrase
self.bro.form['passphraseCheck'] = self.passphrase
tempfile = self.bro.retrieve(self.bro.form.click('_qf_passphrase_next'))
response = self.bro.open(DOWNLOAD_URL)
del tempfile
if not os.path.exists(self.certFolder):
os.mkdir(self.certFolder, FOLDER_MODE)
try:
f = open(self.certLocation, "w")
f.write(response.read())
f.close()
except IOError:
print "I/O Error during file writing"
self.error.emit(self.tr("Can not save certificate"))
return
self.statusChanged.emit(70,self.tr("Creating CA certificate..."))
popen_obj=Popen(CMD_OSSL.format(self.certLocation,os.path.join(self.certFolder,"asi.cer")), shell=True, stdin=PIPE, stdout=PIPE)
popen_obj.communicate(self.passphrase+"\n"+self.passphrase)
if popen_obj.poll() != 0:
self.error.emit(self.tr("OpenSSL error: can not generate CA file"))
return
self.statusChanged.emit(90,self.tr("Creating NM connection..."))
c = settings.WiFi(CLOSED_AP)
c["connection"]["autoconnect"]=False
c["802-11-wireless"]["security"]="802-11-wireless-security"
c["802-11-wireless-security"]={}
c["802-11-wireless-security"]["key-mgmt"]="wpa-eap"
c["802-11-wireless-security"]["auth-alg"]="open"
c["802-1x"]={}
c["802-1x"]["eap"]=['tls']
c["802-1x"]["client-cert"]=dbus.ByteArray("file://"+self.certLocation+"\0")
c["802-1x"]["anonymous-identity"]= self.anonuser
c["802-1x"]["ca-cert"]=dbus.ByteArray("file://"+os.path.join(self.certFolder,"asi.cer")+"\0")
c["802-1x"]["private-key"]=dbus.ByteArray("file://"+self.certLocation+"\0")
c["802-1x"]["private-key-password"]=self.passphrase
c["802-1x"]["phase2-auth"]="mschapv2"
try:
self.nm_iface.AddConnection(c.conmap)
except:
self.error.emit(self.tr("Can not create network profile"))
return
self.statusChanged.emit(100,self.tr("Done"))
Popen("gksu python2 /usr/share/poliwireless/quirks.py",shell=True).communicate("")
示例11: __init__
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class YouTubeBot:
#__name_regex = 'href="http://profile.myspace.com/index.cfm?fuseaction=user.viewprofile&friendID=[0-9]+" linkindex='
# use favorite authors as contacts.
__contact_regex = 'http://www.youtube.com/profile\?user=[a-zA-Z_0-9]+'
__complete_name_regex = 'http://www.youtube.com/profile\?user=[a-zA-Z_0-9]+'
__complete_name_prefix = 'http://www.youtube.com/profile?user='
__complete_name_sufix = ''
__favorite_regex = 'http://www.youtube.com/watch\?v=[a-zA-Z_0-9\-]+'
__favorite_regex = 'http://www.youtube.com/watch\?v=[a-zA-Z_0-9\-]+'
__favorite_prefix = 'http://www.youtube.com/watch\?v='
__favorite_sufix = ''
__tag_regex = ' term=\'[a-zA-Z_0-9\-]+\'/>'
__tag_regex = ' term=\'[a-zA-Z_0-9\-]+\'/>'
__tag_prefix = ' term=\''
__tag_sufix = '\'/>'
__forb_tags = ['the', 'and', 'their', 'at', 'is', 'in', 'of',
'a', 'on', 'for', 'an', 'with', 'to']
def __init__(self, proxies_per_proto={}, debug=False):
self.__br = Browser()
self.__br.set_proxies(proxies_per_proto)
self.__br.set_debug_http(debug)
# no respect for robots.txt
self.__br.set_handle_robots(False)
self.__sleep_secs = 0.0
self.__sleep_module = 9999999
self.__sleep_failure = 120.0
self.__gets = 0
# no sign in
# but i have a dummy user
# user: [email protected]
# password: zarasa123
pass
def set_sleep_secs(self, secs):
self.__sleep_secs = float(secs)
def set_sleep_module(self, iterations):
self.__sleep_module = iterations
def set_sleep_failure(self, secs):
self.__sleep_failure = float(secs)
def __try_sleep(self):
self.__gets += 1
if self.__gets % self.__sleep_module == 0:
print 'Sleeping for %f seconds, every %d GETs' % (self.__sleep_secs, self.__sleep_module)
time.sleep(self.__sleep_secs)
#most_viewed
#top_rated
#recently_featured
#watch_on_mobile
def seeds(self, type='most_viewed'):
self.__try_sleep()
resp = self.__br.open('http://gdata.youtube.com/feeds/standardfeeds/' + type)
cont = resp.read()
matches = re.findall(self.__contact_regex, cont)
featured_users = map(self.__strip_complete_name, matches)
return featured_users
def search(self, query):
br = self.__br
# check if name exists.
try:
print 'http://gdata.youtube.com/feeds/users/%s/favorites' % query
self.__try_sleep()
resp = self.__br.open('http://gdata.youtube.com/feeds/users/%s/favorites' % query)
except Exception, e:
if str(e) == 'HTTP Error 404: Not Found':
return []
else:
raise e
return [resp.read()]
示例12: DOMScanner
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class DOMScanner(threading.Thread):
def __init__(self, engine, queue):
threading.Thread.__init__(self)
self.queue = queue
self.engine = engine
self.errors = {}
self.results = []
self.javascript = []
self.whitelisted_js = []
self.whitelist = []
self.browser = Browser()
self._setProxies()
self._setHeaders()
self._getWhitelist()
def _setHeaders(self):
if self.engine.getOption('ua') is not None:
if self.engine.getOption('ua') is "RANDOM":
self.browser.addheaders = [('User-Agent', random.choice(USER_AGENTS))]
else:
self.browser.addheaders = [('User-Agent', self.engine.getOption('ua'))]
if self.engine.getOption("cookie") is not None:
self.browser.addheaders = [("Cookie", self.engine.getOption("cookie"))]
def _setProxies(self):
if self.engine.getOption('http-proxy') is not None:
self.browser.set_proxies({'http': self.engine.getOption('http-proxy')})
def _addError(self, key, value):
if self.errors.has_key(key):
self.errors[key].append(value)
else:
self.errors[key] = [value]
def _getWhitelist(self):
path = os.path.split(os.path.realpath(__file__))[0]
path = os.path.join(path, "../lib/whitelist.xml")
f = open(path, "rb")
xml = f.read()
root = etree.XML(xml)
for element in root.iterfind("javascript"):
el = {
'hash' : element.find("hash").text,
'description': element.find("description").text,
'reference': element.find("reference").text
}
self.whitelist.append(el)
def _parseJavascript(self, target):
if self.engine.getOption("ua") is "RANDOM": self._setHeaders()
url = target.getFullUrl()
try:
to = 10 if self.engine.getOption('http-proxy') is None else 20
response = self.browser.open(url, timeout=to) #urlopen(req, timeout=to)
except HTTPError, e:
self._addError(e.code, target.getAbsoluteUrl())
return
except URLError, e:
self._addError(e.reason, target.getAbsoluteUrl())
return
示例13: CourseraDownloader
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class CourseraDownloader(object):
"""
Class to download content (videos, lecture notes, ...) from coursera.org for
use offline.
https://github.com/dgorissen/coursera-dl
"""
BASE_URL = 'http://class.coursera.org/%s'
HOME_URL = BASE_URL + '/class/index'
LECTURE_URL = BASE_URL + '/lecture/index'
LOGIN_URL = BASE_URL + '/auth/auth_redirector?type=login&subtype=normal'
QUIZ_URL = BASE_URL + '/quiz/index'
DEFAULT_PARSER = "lxml"
def __init__(self,username,password,proxy=None,parser=DEFAULT_PARSER):
"""Requires your coursera username and password.
You can also specify the parser to use (defaults to lxml), see http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser
"""
self.username = username
self.password = password
self.parser = parser
self.browser = Browser()
if proxy:
self.browser.set_proxies({"http":proxy})
self.browser.set_handle_robots(False)
def login(self,course_name):
print "* Authenticating as %s..." % self.username
# open the course login page
page = self.browser.open(self.LOGIN_URL % course_name)
# check if we are already logged in by checking for a password field
bs = BeautifulSoup(page,self.parser)
pwdfield = bs.findAll("input",{"id":"password_login"})
if pwdfield:
self.browser.form = self.browser.forms().next()
self.browser['email'] = self.username
self.browser['password'] = self.password
r = self.browser.submit()
# check that authentication actually succeeded
bs2 = BeautifulSoup(r.read(),self.parser)
title = bs2.title.string
if title.find("Login Failed") > 0:
raise Exception("Failed to authenticate as %s" % (self.username,))
else:
# no login form, already logged in
print "* Already logged in"
def course_name_from_url(self,course_url):
"""Given the course URL, return the name, e.g., algo2012-p2"""
return course_url.split('/')[3]
def lecture_url_from_name(self,course_name):
"""Given the name of a course, return the video lecture url"""
return self.LECTURE_URL % course_name
def get_downloadable_content(self,course_url):
"""Given the video lecture URL of the course, return a list of all
downloadable resources."""
cname = self.course_name_from_url(course_url)
print "* Collecting downloadable content from " + course_url
# get the course name, and redirect to the course lecture page
vidpage = self.browser.open(course_url)
# extract the weekly classes
soup = BeautifulSoup(vidpage,self.parser)
headers = soup.findAll("div", { "class" : "course-item-list-header" })
weeklyTopics = []
allClasses = {}
# for each weekly class
for header in headers:
h3 = header.findNext('h3')
sanitisedHeaderName = sanitiseFileName(h3.text)
weeklyTopics.append(sanitisedHeaderName)
ul = header.next_sibling
lis = ul.findAll('li')
weekClasses = {}
# for each lecture in a weekly class
classNames = []
for li in lis:
className = sanitiseFileName(li.a.text)
classNames.append(className)
classResources = li.find('div', {'class':'course-lecture-item-resource'})
#.........这里部分代码省略.........
示例14: getSolutions
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def getSolutions (path_prefix, path_proxy):
global br, username, password
# create a browser object
br = Browser()
# add proxy support to browser
if len(path_proxy) != 0:
protocol,proxy = options.proxy.split("://")
br.set_proxies({protocol:proxy})
# let browser fool robots.txt
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; \
rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.set_handle_robots(False)
print ("Enter yout SPOJ username :"),
username = raw_input()
password = getpass.getpass()
# authenticate the user
print ("Authenticating " + username)
br.open ("http://www.spoj.com/login")
br.select_form (nr = 0)
br["login_user"] = username
br["password"] = password
response = br.submit()
verify = response.read()
if (verify.find("Authentication failed!") != -1):
print ("Error authenticating - " + username)
exit(0)
# grab the signed submissions list
print ("Grabbing siglist for " + username)
siglist = br.open("http://www.spoj.com/status/" + username + "/signedlist")
# dump first nine useless lines in signed list for formatting
for i in xrange(9):
siglist.readline()
# make a list of all AC's and challenges
print ("Filtering siglist for AC/Challenge solutions...")
mysublist = list()
while True:
temp = siglist.readline()
if temp=='\------------------------------------------------------------------------------/\n':
# reached end of siglist
break
if not len(temp) :
print ("Reached EOF, siglist format has probably changed," + \
" contact author.")
exit(1)
entry = [x.strip() for x in temp.split('|')]
if entry[4] == 'AC' or entry[4].isdigit():
mysublist.append (entry)
print ("Done !!!")
return mysublist
示例15: __init__
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class TwitterBot:
__contact_regex = '<a href="http://twitter.com/[a-zA-Z_]+" rel="contact"><img alt=".*" class='
__complete_name_regex = 'alt=".*" class='
__complete_name_prefix = 'alt="'
__complete_name_sufix = '" class='
__url_regex = 'href=".*" rel'
__url_prefix = 'href="'
__url_sufix = '" rel'
def __init__(self, user='zarasa123', passw='zarasa123', proxies_per_proto={}, debug=False):
self.__br = Browser()
self.__br.set_proxies(proxies_per_proto)
self.__br.set_debug_http(debug)
# sign in
self.__br.open("http://twitter.com/")
forms = self.__br.forms()
form = forms.next()
self.__br.select_form(nr=0)
self.__br['username_or_email'] = user
self.__br['password'] = passw
resp = self.__br.submit()
def search(self, query):
br = self.__br
self.__br.select_form(name='user_search_form')
self.__br['q'] = query
resp = self.__br.submit()
links_urls = []
for link in br.links(url_regex="twitter.com/[a-zA-Z_]+"):
if not 'index.php' in link.url and \
not 'twitter.com/blog' in link.url and \
not 'twitter.com/home' in link.url:
links_urls.append(link.url)
br.back()
return links_urls
def __strip_complete_name(self, html_match):
match = re.search(self.__complete_name_regex, html_match)
match = match.group()[len(self.__complete_name_prefix):-len(self.__complete_name_sufix)]
return match
def __strip_url(self, html_match):
match = re.search(self.__url_regex, html_match)
match = match.group()[len(self.__url_prefix):-len(self.__url_sufix)]
return match
def contacts(self, name):
br = self.__br
# check if name exists.
results = self.search(name)
if len(results) == 0:
raise Exception('name "%s" doesn\'t exist in Twitter' % name)
# assume the first person that matches
name_link = results[0]
# retrieve the first n (20?) contacts as tuples (complete_name, twitter_url).
resp = self.__br.open(name_link + '/friends')
cont = resp.read()
matches = re.findall(self.__contact_regex, cont)
complete_names = map(self.__strip_complete_name, matches)
urls = map(self.__strip_url, matches)
return zip(complete_names, urls)