当前位置: 首页>>代码示例>>Python>>正文


Python Browser.set_handle_gzip方法代码示例

本文整理汇总了Python中mechanize.Browser.set_handle_gzip方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.set_handle_gzip方法的具体用法?Python Browser.set_handle_gzip怎么用?Python Browser.set_handle_gzip使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mechanize.Browser的用法示例。


在下文中一共展示了Browser.set_handle_gzip方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def get_browser():
    # Browser
    br = Browser()

    # Cookie Jar
    #cj = cookielib.LWPCookieJar()
    #br.set_cookiejar(cj)

    # Browser options
    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)

    # Follows refresh 0 but not hangs on refresh > 0
    #br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

    # Want debugging messages?
    #
    #br.set_debug_http(True)
    #br.set_debug_redirects(True)
    #br.set_debug_responses(True)

    # User-Agent (this is cheating, ok?)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    return br
开发者ID:jmnavarro,项目名称:Mapa_es_Scraper,代码行数:30,代码来源:scraper.py

示例2: createbrowser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
 def createbrowser(self):
     br = Browser()
     br.set_handle_gzip(True)
     br.set_handle_robots(False)
     br.set_handle_redirect(True)
     br.addheaders = [('User-agent', 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 5_1 like Mac OS X; en-US) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B179 Safari/7534.48.3')]
     return br
开发者ID:Rasmus-Riis,项目名称:Huge_py,代码行数:9,代码来源:hotaccount.py

示例3: check

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def check(acs):
    for a in acs:
        try:
            a = a.rsplit()[0]
        except:
            pass
        try:
            if a:
                a = a.split(':')
                user = a[0]
                passw = a[1]
                br = Browser()
                br.set_handle_gzip(True)
                br.set_handle_robots(False)
                br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
                br.open('http://m.facebook.com/login.php')
                br.select_form(nr=0)
                br.form['email'] = user
                br.form['pass'] = passw
                br.submit()
                if 'm.facebook.com/login.php' in br.geturl() or 'checkpoint' in br.geturl() or 'to confirm your account with Facebook.' in br.response().read():
                            print "Could not login with " + str(a)

                else:
                    print "Logged in with " + user
                    opn = open(newfile, 'a')
                    opn.write(user + ":" + passw + '\n')
                    opn.close()

        except:
            print "Could not login with " + str(a)
开发者ID:Rasmus-Riis,项目名称:Huge_py,代码行数:33,代码来源:facebookconnects.py

示例4: lockNloadBrowser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def lockNloadBrowser():
    br = Browser()
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)
    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)

    # Follows refresh 0 but not hangs on refresh > 0
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    return br
开发者ID:Daiver,项目名称:jff,代码行数:16,代码来源:parsing.py

示例5: create_browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def create_browser(debug=False):
    browser = Browser(factory=mechanize.RobustFactory())
    if debug:
        # Maybe enable this if you want even more spam...
        # logger = logging.getLogger("mechanize")
        # logger.addHandler(logging.StreamHandler(sys.stdout))
        # logger.setLevel(logging.DEBUG)
        browser.set_debug_http(True)
        browser.set_debug_responses(True)
        browser.set_debug_redirects(True)
    browser.set_handle_equiv(True)
    browser.set_handle_gzip(True)
    browser.set_handle_redirect(True)
    browser.set_handle_referer(True)
    browser.set_handle_robots(False)
    browser.addheaders = HEADERS
    return browser
开发者ID:kevinwu06,项目名称:scraping_stuff,代码行数:19,代码来源:headless_browser.py

示例6: letv

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def letv(page_url, target_dir):
    browser = Browser()
    browser.set_handle_robots(False)
    browser.set_handle_gzip(True)
    browser.addheaders = [('User-Agent', USER_AGENT)]

    resp = browser.open(page_url)
    resp_body = resp.read()
    tree = html.fromstring(resp_body)
    for script in tree.xpath('/html/head/script'):
        match_info = []
        start = False
        if not script.text:
            continue
        for line in script.text.split('\n'):
            if not start:
                match = re.match('var\s+__INFO__\s?=(.+)', line)
                if match:
                    start = True
                    match_info.append(match.group(1))
            else:
                if line.startswith('var'):
                    start = False
                    break
                hp = line.find('://')
                p = line.rfind('//')
                if p != -1 and p != hp+1:
                    match_info.append(line[:p])
                else:
                    match_info.append(line)
        if match_info:
            break
    match_info = '\n'.join(match_info)
    match_info = to_dict(match_info)
    vid = match_info['video']['vid']
    nextvid = match_info['video']['nextvid']
    print '%s' % match_info['video']['title']
    play_json = get_playjson(vid, nextvid, target_dir)
开发者ID:lvqier,项目名称:crawlers,代码行数:40,代码来源:letv.py

示例7: fetch_transactions

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def fetch_transactions(startdate=None, enddate=None, visa=False):
    br = Browser()
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)
    br.open(LOGIN_URL)

    d = pq(br.response().read())
    labels = d('td strong')
    char1 = int(labels[2].text.strip())
    char2 = int(labels[3].text.strip())
    num1 = int(labels[5].text.strip())
    num2 = int(labels[6].text.strip())
    br.form = list(br.forms())[0]
    br['globalKeyCode'] = settings.CODE
    br['ctl001password1'] = settings.PASS[char1-1:char1]
    br['ctl001password2'] = settings.PASS[char2-1:char2]
    br['ctl001passcode1'] = settings.NUM[num1-1:num1]
    br['ctl001passcode2'] = settings.NUM[num2-1:num2]
    br.submit()

    br.open(FILTER)
    br.form = list(br.forms())[0]
    br['periodoption'] = ["byDate"]
    br['startdate'] = startdate.strftime("%d/%m/%Y")
    br['enddate'] = enddate.strftime("%d/%m/%Y")
    if visa:
        br['visa'] = ["True"]
        br['all'] = False 
    else:
        br['all'] = ["True"]
    br.submit()
    result = br.response().read()
    return result
开发者ID:sebbacon,项目名称:oneaccount2qif,代码行数:39,代码来源:oneaccount.py

示例8: LconnectScraper

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
class LconnectScraper(ClassDataScraper):
    LCONNECT_URL = 'http://leopardweb.wit.edu/'
    USERAGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) ' \
                + 'Gecko/20100122 firefox/3.6.1'

    def __init__(self):
        # Create a cookie jar and a browser
        self._cookieJar = LWPCookieJar()
        self._browser = Browser()
        self._browser.set_cookiejar(self._cookieJar)

        # Set Browser options
        self._browser.set_handle_equiv(True)
        self._browser.set_handle_gzip(True)
        self._browser.set_handle_redirect(True)
        self._browser.set_handle_referer(True)
        self._browser.set_handle_robots(False)
        self._browser.set_handle_refresh(_http.HTTPRefreshProcessor(),
                                         max_time=1)
        self._browser.addheaders = [('User-agent', LconnectScraper.USERAGENT)]

        # Debugging
        self._browser.set_debug_http(True)
        self._browser.set_debug_redirects(True)
        self._browser.set_debug_responses(True)

    def getName(self):
        return "Lconnect Scraper"

    def connect(self):
        """
        Attempts to connect to the data source
        """
        try:
            # Try to open a connection. 8 Second timeout
            self._browser.open(LconnectScraper.LCONNECT_URL, timeout=8)
            return True
        except URLError:
            return False

    def disconnect(self):
        """
        Disconnects from the data source
        """

        self._browser.close()

    def requiresAuthentication(self):
        """
        Returns whether or not the scraper requires authentication information
        """

        return True

    def authenticate(self, username, password):
        """
        Attempts to authenticate the scraper using username and password
        """

        # If we're on the sign in page, try to sign in
        if self._browser.title() == 'Sign In':
            for form in self._browser.forms():
                if form.name is None:
                    self._browser.form = list(self._browser.forms())[0]
                    self._browser['username'] = username
                    self._browser['password'] = password

                    self._browser.submit()

        # If the browser's title is 'Main Menu',
        # we've either successfully logged in, or we were already
        if self._browser.title() == 'Main Menu':
            return True
        else:
            return False

    def getClassData(self):
        """
        Returns a list of ClassData objects
        """

        return []
开发者ID:mitranog,项目名称:lana,代码行数:84,代码来源:LconnectScraper.py

示例9: login

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
def login():

    creds = get_credentials()
    if not creds:
        return None

    b = Browser()
    b.set_handle_robots(False)
    b.addheaders = [
        ('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'),
        ('Connection', 'keep-alive'),
        ('Cache-Control', 'max-age=0'),
        ('Accept-Encoding', 'gzip, deflate, br')
    ]

    b.set_handle_equiv(True)
    b.set_handle_gzip(True)
    b.set_handle_redirect(True)
    b.set_handle_referer(True)
    b.set_handle_robots(False)

    # Follows refresh 0 but not hangs on refresh > 0
    b.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=1)

    # Want debugging messages?
    # b.set_debug_http(True)
    b.set_debug_redirects(True)
    b.set_debug_responses(True)
    print 'Opening main page...'
    b.open('http://www.nab.com.au')
    print 'OK'

    print 'Opening login redir page...'
    b.open('http://www.nab.com.au/cgi-bin/ib/301_start.pl?browser=correct')
    print 'OK'

    print 'Opening real login page...'
    b.open('https://ib.nab.com.au/nabib/index.jsp')
    print 'OK'

    b.select_form(nr=0)
    try:
        webKeyCtrl = b.form.find_control(id='webKey')
        webAlphaCtrl = b.form.find_control(id='webAlpha')
    except ControlNotFoundError:
        print 'Cannot find necessary login controls, quitting'
        return

    webKey = webKeyCtrl.value
    webAlpha = webAlphaCtrl.value
    newPassword = make_password(creds[1], webKey, webAlpha)

    usernameCtrl = b.form.find_control(name='userid')
    passwordCtrl = b.form.find_control(name='encoded-password')
    passwordCtrl.readonly = False
    usernameCtrl.value = creds[0]
    passwordCtrl.value = newPassword

    rawPassword = b.form.find_control(name='password')
    rawPassword.value = ''

    b_data = b.form.find_control(name='browserData')
    b_data.readonly = False
    b_data.value = '1496488636702;z=-600*-600;s=1440x900x24;l=en-GB;p=MacIntel;h=1Z3uS;i=33;j=117;k=16;c=d3d3Lm5hYi5jb20uYXUvc3RhdGljL0lCL2xvZ2luQmFubmVyLw;n=bG9naW5Gb3Jt,bG9naW5UaXBz;e=Y3ZpZXcz;b=1JE4yQ,24uNEg,2wDBVE;a=1GeUEa,1TaPsP,1ZO-16,1rEqxh,2.jbKy,21b2P5,2Jrfu6,2LmSef,2TqVCf,2Ubrnm,2dgqqB,3MkcJZ,JIGdn,eqyBa,lTM8m;o=Y29uc29sZQ,Y2hyb21l,YW5ndWxhcg,YXBpTG9nb3V0QXBw,Z2V0QnJvd3Nlcg,alF1ZXJ5MTEwMjA4MzYwNzIxMDQ4NTY0MjY0;t=fo4f0ot8-600.j3h6ekzf.877;d=YWNz,Ym9keWNvbnRhaW5lcg,Ym9keWNvbnRhaW5lcl9pbnNpZGU,YmFubmVy,ZXJyb3JNZXNzYWdl,ZXJyb3JOdW1iZXI,Zm9vdGVyX2xvZ2lu,ZmFuY3ktYmctZQ,ZmFuY3ktYmctbg,ZmFuY3ktYmctbmU,ZmFuY3ktYmctbnc,ZmFuY3ktYmctc2U,ZmFuY3ktYmctc3c,ZmFuY3ktYmctcw,ZmFuY3ktYmctdw,ZmFuY3lib3gtY2xvc2U,ZmFuY3lib3gtaW5uZXI,ZmFuY3lib3gtb3V0ZXI,ZmFuY3lib3gtb3ZlcmxheQ,ZmFuY3lib3gtbG9hZGluZw,ZmFuY3lib3gtbGVmdA,ZmFuY3lib3gtbGVmdC1pY28,ZmFuY3lib3gtcmlnaHQ,ZmFuY3lib3gtcmlnaHQtaWNv,ZmFuY3lib3gtd3JhcA,ZmFuY3lib3gtdG1w,aGVhZGVy,aWItdXNlci10ZXh0,bG9naW5Gb3Jt,bGlua3Mtc29jaWFsLW1lZGlh,bWFpblBhZ2U;u=ZHVtbXk,ZW5jb2RlZC1wYXNzd29yZA,d2ViQWxwaGE,d2ViS2V5;v=bmVlZC1oZWxw;x=1IVClf,1KxWAP,1SURBl,1Wl6jj,1vhE2s,1vstXM,1wlzQT,1yYwT1,2-PmTs,2APt-x,2FOxw2,2Lnxl,2ceYJE,2feZ0x,2g4LgQ,2h079f,2oK-0A,2ueFc7,34liSK,39CTWT,3GxyfT,3T6P3H,3XvqP.,3kcnCG,3ktPLw,3l39dK,660SR,68npD,8Vcav,JOS8B,cTezC,dwOmq,ix9Ek,s-ZAp;q=ZnJhdWQ;w=428866'

    b.form.new_control('text', 'login', {'value': ''})
    b.form.fixup()
    b['login'] = 'Login'

    print('Logging in...')
    b.submit()

    if not check_url(b, logged_in_urls):
        print('Error logging in.')
        return None

    print('OK')
    return b
开发者ID:ArtS,项目名称:nab-export,代码行数:80,代码来源:browser.py

示例10: Browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
#!/usr/bin/env python

from mechanize import Browser
from bs4 import BeautifulSoup as bs

# set up mechanize header
headers = [('User-Agent', 'Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1')]

# define target URL
url = "http://www.bseindia.com/getquote.htm"

br = Browser()

# browser parameters
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False) 

br.addheaders = headers

# make request
main_page = br.open(url)

# select the default form
br.select_form(nr=0)
br.find_control(id="suggestBoxEQ").value = "CAREERP"

# submit form
br.submit()
开发者ID:martiansideofthemoon,项目名称:stock-marker,代码行数:33,代码来源:get_quotes.py

示例11: len

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
from BeautifulSoup import BeautifulSoup

if len(sys.argv) != 2: # require a URL to scan
    sys.exit("Must specify a URL")

url = sys.argv[1]
print "Scanning: %s " % url
print

mech = Browser()
cj = LWPCookieJar()
mech.set_cookiejar(cj)
mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
mech.set_handle_robots(False)
mech.set_handle_equiv(True)
mech.set_handle_gzip(True)
mech.set_handle_referer(True)
mech.set_debug_http(True)
mech.set_debug_redirects(True)
mech.set_debug_responses(True)
page = mech.open(url)
html = page.read()
soup = BeautifulSoup(html)
 
# Extract all anchors on the page that include the string ".mp3"
anchors = soup.findAll(attrs={'href' : re.compile(".mp3")})
for a in anchors:
    mp3link = a['href'] # Get the value of the href, not the whole tag/container!
    
    # To get an output filename, split the URL on slashes and grab the last array item
    urlfrags = mp3link.split('/')
开发者ID:atmlvs,项目名称:MyLib,代码行数:33,代码来源:scrapemp3s.py

示例12: get_pages

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_gzip [as 别名]
    return option_list

def get_pages():
    soup2 = BeautifulSoup(browser.response().read())
    table = soup2.find("table",{'class': 'gridViewJudgementsResults'})
    rows = table.find("tr")
    if(rows.has_key("class")):
        print "nopaginations"
    else:
        print rows

browser = Browser()
cj = cookielib.LWPCookieJar()
browser.set_cookiejar(cj)
browser.set_handle_equiv(True)
browser.set_handle_gzip(True)
browser.set_handle_redirect(True)
browser.set_handle_referer(True)
browser.set_handle_robots(False)

browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

url = 'http://www.justiceservices.gov.mt/courtservices/Judgements/search.aspx?func=all'
r = browser.open(url)
browser.select_form(nr=0)
court_list = get_court_values()
#for option in court_list:
browser.form['ctl00$ContentPlaceHolderMain$search_judgement_panel$dd_court']= ['126']
browser.submit()#.submit is used to press the submit button on the form
    #print browser.response().read()
pages_list = get_pages()
开发者ID:cassar1,项目名称:Crawler,代码行数:33,代码来源:crawler6.py


注:本文中的mechanize.Browser.set_handle_gzip方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。