本文整理汇总了Python中mechanize.Browser.set_handle_gzip方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.set_handle_gzip方法的具体用法?Python Browser.set_handle_gzip怎么用?Python Browser.set_handle_gzip使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mechanize.Browser
的用法示例。
在下文中一共展示了Browser.set_handle_gzip方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def get_browser():
# Browser
br = Browser()
# Cookie Jar
#cj = cookielib.LWPCookieJar()
#br.set_cookiejar(cj)
# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
#br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
# Want debugging messages?
#
#br.set_debug_http(True)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)
# User-Agent (this is cheating, ok?)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
return br
示例2: createbrowser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def createbrowser(self):
br = Browser()
br.set_handle_gzip(True)
br.set_handle_robots(False)
br.set_handle_redirect(True)
br.addheaders = [('User-agent', 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 5_1 like Mac OS X; en-US) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B179 Safari/7534.48.3')]
return br
示例3: check
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def check(acs):
for a in acs:
try:
a = a.rsplit()[0]
except:
pass
try:
if a:
a = a.split(':')
user = a[0]
passw = a[1]
br = Browser()
br.set_handle_gzip(True)
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.open('http://m.facebook.com/login.php')
br.select_form(nr=0)
br.form['email'] = user
br.form['pass'] = passw
br.submit()
if 'm.facebook.com/login.php' in br.geturl() or 'checkpoint' in br.geturl() or 'to confirm your account with Facebook.' in br.response().read():
print "Could not login with " + str(a)
else:
print "Logged in with " + user
opn = open(newfile, 'a')
opn.write(user + ":" + passw + '\n')
opn.close()
except:
print "Could not login with " + str(a)
示例4: lockNloadBrowser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def lockNloadBrowser():
br = Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
return br
示例5: create_browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def create_browser(debug=False):
browser = Browser(factory=mechanize.RobustFactory())
if debug:
# Maybe enable this if you want even more spam...
# logger = logging.getLogger("mechanize")
# logger.addHandler(logging.StreamHandler(sys.stdout))
# logger.setLevel(logging.DEBUG)
browser.set_debug_http(True)
browser.set_debug_responses(True)
browser.set_debug_redirects(True)
browser.set_handle_equiv(True)
browser.set_handle_gzip(True)
browser.set_handle_redirect(True)
browser.set_handle_referer(True)
browser.set_handle_robots(False)
browser.addheaders = HEADERS
return browser
示例6: letv
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def letv(page_url, target_dir):
browser = Browser()
browser.set_handle_robots(False)
browser.set_handle_gzip(True)
browser.addheaders = [('User-Agent', USER_AGENT)]
resp = browser.open(page_url)
resp_body = resp.read()
tree = html.fromstring(resp_body)
for script in tree.xpath('/html/head/script'):
match_info = []
start = False
if not script.text:
continue
for line in script.text.split('\n'):
if not start:
match = re.match('var\s+__INFO__\s?=(.+)', line)
if match:
start = True
match_info.append(match.group(1))
else:
if line.startswith('var'):
start = False
break
hp = line.find('://')
p = line.rfind('//')
if p != -1 and p != hp+1:
match_info.append(line[:p])
else:
match_info.append(line)
if match_info:
break
match_info = '\n'.join(match_info)
match_info = to_dict(match_info)
vid = match_info['video']['vid']
nextvid = match_info['video']['nextvid']
print '%s' % match_info['video']['title']
play_json = get_playjson(vid, nextvid, target_dir)
示例7: fetch_transactions
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def fetch_transactions(startdate=None, enddate=None, visa=False):
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.open(LOGIN_URL)
d = pq(br.response().read())
labels = d('td strong')
char1 = int(labels[2].text.strip())
char2 = int(labels[3].text.strip())
num1 = int(labels[5].text.strip())
num2 = int(labels[6].text.strip())
br.form = list(br.forms())[0]
br['globalKeyCode'] = settings.CODE
br['ctl001password1'] = settings.PASS[char1-1:char1]
br['ctl001password2'] = settings.PASS[char2-1:char2]
br['ctl001passcode1'] = settings.NUM[num1-1:num1]
br['ctl001passcode2'] = settings.NUM[num2-1:num2]
br.submit()
br.open(FILTER)
br.form = list(br.forms())[0]
br['periodoption'] = ["byDate"]
br['startdate'] = startdate.strftime("%d/%m/%Y")
br['enddate'] = enddate.strftime("%d/%m/%Y")
if visa:
br['visa'] = ["True"]
br['all'] = False
else:
br['all'] = ["True"]
br.submit()
result = br.response().read()
return result
示例8: LconnectScraper
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
class LconnectScraper(ClassDataScraper):
LCONNECT_URL = 'http://leopardweb.wit.edu/'
USERAGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) ' \
+ 'Gecko/20100122 firefox/3.6.1'
def __init__(self):
# Create a cookie jar and a browser
self._cookieJar = LWPCookieJar()
self._browser = Browser()
self._browser.set_cookiejar(self._cookieJar)
# Set Browser options
self._browser.set_handle_equiv(True)
self._browser.set_handle_gzip(True)
self._browser.set_handle_redirect(True)
self._browser.set_handle_referer(True)
self._browser.set_handle_robots(False)
self._browser.set_handle_refresh(_http.HTTPRefreshProcessor(),
max_time=1)
self._browser.addheaders = [('User-agent', LconnectScraper.USERAGENT)]
# Debugging
self._browser.set_debug_http(True)
self._browser.set_debug_redirects(True)
self._browser.set_debug_responses(True)
def getName(self):
return "Lconnect Scraper"
def connect(self):
"""
Attempts to connect to the data source
"""
try:
# Try to open a connection. 8 Second timeout
self._browser.open(LconnectScraper.LCONNECT_URL, timeout=8)
return True
except URLError:
return False
def disconnect(self):
"""
Disconnects from the data source
"""
self._browser.close()
def requiresAuthentication(self):
"""
Returns whether or not the scraper requires authentication information
"""
return True
def authenticate(self, username, password):
"""
Attempts to authenticate the scraper using username and password
"""
# If we're on the sign in page, try to sign in
if self._browser.title() == 'Sign In':
for form in self._browser.forms():
if form.name is None:
self._browser.form = list(self._browser.forms())[0]
self._browser['username'] = username
self._browser['password'] = password
self._browser.submit()
# If the browser's title is 'Main Menu',
# we've either successfully logged in, or we were already
if self._browser.title() == 'Main Menu':
return True
else:
return False
def getClassData(self):
"""
Returns a list of ClassData objects
"""
return []
示例9: login
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def login():
creds = get_credentials()
if not creds:
return None
b = Browser()
b.set_handle_robots(False)
b.addheaders = [
('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'),
('Connection', 'keep-alive'),
('Cache-Control', 'max-age=0'),
('Accept-Encoding', 'gzip, deflate, br')
]
b.set_handle_equiv(True)
b.set_handle_gzip(True)
b.set_handle_redirect(True)
b.set_handle_referer(True)
b.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
b.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=1)
# Want debugging messages?
# b.set_debug_http(True)
b.set_debug_redirects(True)
b.set_debug_responses(True)
print 'Opening main page...'
b.open('http://www.nab.com.au')
print 'OK'
print 'Opening login redir page...'
b.open('http://www.nab.com.au/cgi-bin/ib/301_start.pl?browser=correct')
print 'OK'
print 'Opening real login page...'
b.open('https://ib.nab.com.au/nabib/index.jsp')
print 'OK'
b.select_form(nr=0)
try:
webKeyCtrl = b.form.find_control(id='webKey')
webAlphaCtrl = b.form.find_control(id='webAlpha')
except ControlNotFoundError:
print 'Cannot find necessary login controls, quitting'
return
webKey = webKeyCtrl.value
webAlpha = webAlphaCtrl.value
newPassword = make_password(creds[1], webKey, webAlpha)
usernameCtrl = b.form.find_control(name='userid')
passwordCtrl = b.form.find_control(name='encoded-password')
passwordCtrl.readonly = False
usernameCtrl.value = creds[0]
passwordCtrl.value = newPassword
rawPassword = b.form.find_control(name='password')
rawPassword.value = ''
b_data = b.form.find_control(name='browserData')
b_data.readonly = False
b_data.value = '1496488636702;z=-600*-600;s=1440x900x24;l=en-GB;p=MacIntel;h=1Z3uS;i=33;j=117;k=16;c=d3d3Lm5hYi5jb20uYXUvc3RhdGljL0lCL2xvZ2luQmFubmVyLw;n=bG9naW5Gb3Jt,bG9naW5UaXBz;e=Y3ZpZXcz;b=1JE4yQ,24uNEg,2wDBVE;a=1GeUEa,1TaPsP,1ZO-16,1rEqxh,2.jbKy,21b2P5,2Jrfu6,2LmSef,2TqVCf,2Ubrnm,2dgqqB,3MkcJZ,JIGdn,eqyBa,lTM8m;o=Y29uc29sZQ,Y2hyb21l,YW5ndWxhcg,YXBpTG9nb3V0QXBw,Z2V0QnJvd3Nlcg,alF1ZXJ5MTEwMjA4MzYwNzIxMDQ4NTY0MjY0;t=fo4f0ot8-600.j3h6ekzf.877;d=YWNz,Ym9keWNvbnRhaW5lcg,Ym9keWNvbnRhaW5lcl9pbnNpZGU,YmFubmVy,ZXJyb3JNZXNzYWdl,ZXJyb3JOdW1iZXI,Zm9vdGVyX2xvZ2lu,ZmFuY3ktYmctZQ,ZmFuY3ktYmctbg,ZmFuY3ktYmctbmU,ZmFuY3ktYmctbnc,ZmFuY3ktYmctc2U,ZmFuY3ktYmctc3c,ZmFuY3ktYmctcw,ZmFuY3ktYmctdw,ZmFuY3lib3gtY2xvc2U,ZmFuY3lib3gtaW5uZXI,ZmFuY3lib3gtb3V0ZXI,ZmFuY3lib3gtb3ZlcmxheQ,ZmFuY3lib3gtbG9hZGluZw,ZmFuY3lib3gtbGVmdA,ZmFuY3lib3gtbGVmdC1pY28,ZmFuY3lib3gtcmlnaHQ,ZmFuY3lib3gtcmlnaHQtaWNv,ZmFuY3lib3gtd3JhcA,ZmFuY3lib3gtdG1w,aGVhZGVy,aWItdXNlci10ZXh0,bG9naW5Gb3Jt,bGlua3Mtc29jaWFsLW1lZGlh,bWFpblBhZ2U;u=ZHVtbXk,ZW5jb2RlZC1wYXNzd29yZA,d2ViQWxwaGE,d2ViS2V5;v=bmVlZC1oZWxw;x=1IVClf,1KxWAP,1SURBl,1Wl6jj,1vhE2s,1vstXM,1wlzQT,1yYwT1,2-PmTs,2APt-x,2FOxw2,2Lnxl,2ceYJE,2feZ0x,2g4LgQ,2h079f,2oK-0A,2ueFc7,34liSK,39CTWT,3GxyfT,3T6P3H,3XvqP.,3kcnCG,3ktPLw,3l39dK,660SR,68npD,8Vcav,JOS8B,cTezC,dwOmq,ix9Ek,s-ZAp;q=ZnJhdWQ;w=428866'
b.form.new_control('text', 'login', {'value': ''})
b.form.fixup()
b['login'] = 'Login'
print('Logging in...')
b.submit()
if not check_url(b, logged_in_urls):
print('Error logging in.')
return None
print('OK')
return b
示例10: Browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
#!/usr/bin/env python
from mechanize import Browser
from bs4 import BeautifulSoup as bs
# set up mechanize header
headers = [('User-Agent', 'Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1')]
# define target URL
url = "http://www.bseindia.com/getquote.htm"
br = Browser()
# browser parameters
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.addheaders = headers
# make request
main_page = br.open(url)
# select the default form
br.select_form(nr=0)
br.find_control(id="suggestBoxEQ").value = "CAREERP"
# submit form
br.submit()
示例11: len
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
from BeautifulSoup import BeautifulSoup
if len(sys.argv) != 2: # require a URL to scan
sys.exit("Must specify a URL")
url = sys.argv[1]
print "Scanning: %s " % url
print
mech = Browser()
cj = LWPCookieJar()
mech.set_cookiejar(cj)
mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
mech.set_handle_robots(False)
mech.set_handle_equiv(True)
mech.set_handle_gzip(True)
mech.set_handle_referer(True)
mech.set_debug_http(True)
mech.set_debug_redirects(True)
mech.set_debug_responses(True)
page = mech.open(url)
html = page.read()
soup = BeautifulSoup(html)
# Extract all anchors on the page that include the string ".mp3"
anchors = soup.findAll(attrs={'href' : re.compile(".mp3")})
for a in anchors:
mp3link = a['href'] # Get the value of the href, not the whole tag/container!
# To get an output filename, split the URL on slashes and grab the last array item
urlfrags = mp3link.split('/')
示例12: get_pages
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
return option_list
def get_pages():
soup2 = BeautifulSoup(browser.response().read())
table = soup2.find("table",{'class': 'gridViewJudgementsResults'})
rows = table.find("tr")
if(rows.has_key("class")):
print "nopaginations"
else:
print rows
browser = Browser()
cj = cookielib.LWPCookieJar()
browser.set_cookiejar(cj)
browser.set_handle_equiv(True)
browser.set_handle_gzip(True)
browser.set_handle_redirect(True)
browser.set_handle_referer(True)
browser.set_handle_robots(False)
browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
url = 'http://www.justiceservices.gov.mt/courtservices/Judgements/search.aspx?func=all'
r = browser.open(url)
browser.select_form(nr=0)
court_list = get_court_values()
#for option in court_list:
browser.form['ctl00$ContentPlaceHolderMain$search_judgement_panel$dd_court']= ['126']
browser.submit()#.submit is used to press the submit button on the form
#print browser.response().read()
pages_list = get_pages()