当前位置: 首页>>代码示例>>Python>>正文


Python Browser.set_handle_referer方法代码示例

本文整理汇总了Python中mechanize.Browser.set_handle_referer方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.set_handle_referer方法的具体用法?Python Browser.set_handle_referer怎么用?Python Browser.set_handle_referer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mechanize.Browser的用法示例。


在下文中一共展示了Browser.set_handle_referer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
    def scrape(self):
        """
        Opens the html page and parses the pdf links.
        """
        browser = Browser()

        #-----------
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        values1 = {'name' : 'Michael Foord',
                   'location' : 'Northampton',
                   'language' : 'Python' }
        headers = { 'User-Agent' : user_agent }
        browser.set_handle_redirect(True)
        browser.set_handle_referer(True)
        browser.set_handle_robots(False)
        browser.addheaders = [('User-Agent', 'Firefox')]
        #-------------

        browser.set_handle_robots(False)

        html = browser.open(self.site)

        lines = html.read().splitlines()

        for line in lines:
            urls = re.findall('<a href="?\'?([^"\'>]*)', line)
            for url in urls:
                if '.pdf"' in url:
                    self.pdf_urls.append(url)
开发者ID:manishc1,项目名称:DySeCor,代码行数:31,代码来源:easy_scholar.py

示例2: find_first_article

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def find_first_article():
    mech = Browser()
    cj = cookielib.LWPCookieJar()

    mech.set_handle_equiv(True)
    # mech.set_handle_gzip(True)
    mech.set_handle_redirect(True)
    mech.set_handle_referer(True)
    mech.set_handle_robots(False)
    # mech.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    mech.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    page = mech.open("https://bitcointalk.org/index.php?board=77.0")
    html = page.read()

    soup = BeautifulSoup(html)

    first_article_tag = soup.find("td", class_="windowbg")

    global startingpost
    startingpost = first_article_tag.span.a.get("href")
    print startingpost
开发者ID:jgomezfr,项目名称:bitcoin-reporters,代码行数:29,代码来源:python-scrape.py

示例3: extract_article_url

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def extract_article_url(posturl):
    mech = Browser()
    cj = cookielib.LWPCookieJar()

    mech.set_handle_equiv(True)
    # mech.set_handle_gzip(True)
    mech.set_handle_redirect(True)
    mech.set_handle_referer(True)
    mech.set_handle_robots(False)
    # mech.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    mech.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    page = mech.open(posturl)
    html = page.read()

    global soup
    soup = BeautifulSoup(html)

    global articleURL
    # print soup.prettify()

    for item in soup.find_all("div", class_="post"):
        for link in item.find_all("a"):
            string = link.get("href")
            if prog.match(string):
                # find the link that is to the article (link outside of bitcointalk.org forum)
                articleURL = link.get("href")
                return link.get("href")
    return "No article url"
开发者ID:jgomezfr,项目名称:bitcoin-reporters,代码行数:36,代码来源:python-scrape.py

示例4: get_browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
    def get_browser(self):
        """
        Each FAUrl object stores it's own browser instance. On the first call
         it is created and if the username and password is set it will
         authenticate you.

        :return: mechanize.Browser instance.
        :raise: FAiler.FAError if FA is down. Time to F5!
        :raise: FAiler.FAAuth Your username and password failed
        """
        if self._br is None:
            br = Browser()
            br.set_handle_robots(False)
            br.set_handle_redirect(True)
            br.set_handle_referer(True)
            br.set_handle_equiv(True)
            br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
            if self._username is not None and self._password is not None:
                loginPage = 'https://www.furaffinity.net/login'
                try:
                    br.open(loginPage)
                except urllib2.HTTPError:
                    raise FAError("FA's down, F5 time.")
                br.form = br.global_form()
                br.form['name'] = self._username
                br.form['pass'] = self._password
                br.form.method = 'POST'
                br.submit()
                if br.geturl() == loginPage + '/?msg=1':
                    raise FAAuth('Username & Password Incorrect')
            self._br = br
        return self._br
开发者ID:onlyhavecans,项目名称:FAiler,代码行数:34,代码来源:faurl.py

示例5: get_browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def get_browser():
    # Browser
    br = Browser()

    # Cookie Jar
    #cj = cookielib.LWPCookieJar()
    #br.set_cookiejar(cj)

    # Browser options
    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)

    # Follows refresh 0 but not hangs on refresh > 0
    #br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

    # Want debugging messages?
    #
    #br.set_debug_http(True)
    #br.set_debug_redirects(True)
    #br.set_debug_responses(True)

    # User-Agent (this is cheating, ok?)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    return br
开发者ID:jmnavarro,项目名称:Mapa_es_Scraper,代码行数:30,代码来源:scraper.py

示例6: browserInit

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def browserInit():
    '''
    Set standard, initial browser configuration.
    '''
    browser = Browser()
    browser.set_handle_equiv(True)
    browser.set_handle_redirect(True)
    browser.set_handle_referer(True)
    browser.set_handle_robots(False)
    browser.addheaders = [('user-agent', '   Mozilla/5.0 (X11; U; Linux x86_64; en-US) Mechanize/0.2.4 Fedora/16 (Verne) Pytane/0.2'),
('accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')]
    return browser
开发者ID:fandingo,项目名称:Pytane,代码行数:14,代码来源:Pytane.py

示例7: _startbrowser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def _startbrowser(value_id,value,save_response=None):
  b=Browser()
  b.set_handle_referer(False)
  r=b.open(URL)
  if type(save_response)==type('aoeu'):
    out=open(save_response,'w')
    out.write(r.read())
    out.close()

  b.select_form(name='Form1')
  b["Centralcolum3$drpFAQ"]=[value_id]
  b["Centralcolum3$txtown"]=value
  return b
开发者ID:mshron,项目名称:DWB---MIX-Scraping,代码行数:15,代码来源:mech.py

示例8: lockNloadBrowser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def lockNloadBrowser():
    br = Browser()
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)
    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)

    # Follows refresh 0 but not hangs on refresh > 0
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    return br
开发者ID:Daiver,项目名称:jff,代码行数:16,代码来源:parsing.py

示例9: __init__

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
class Api:
    appid = '5415093'
    token = None
    query_pattern = 'https://api.vk.com/method/%s?%s&access_token='

    ua = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
    br = None
    cl = None

    def __init__(self, login, password, scope, testmode=False):
        self.br = Browser()
        self.cl = cookielib.LWPCookieJar()
        self.br.set_cookiejar(self.cl)

        self.br.set_handle_equiv(True)
        self.br.set_handle_redirect(True)
        self.br.set_handle_referer(True)
        self.br.set_handle_robots(False)
        self.br.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=1)
        self.br.addheaders = [('User-agent', self.ua)]

        self.br.open('https://oauth.vk.com/authorize?client_id=' + self.appid +
                     '&scope=' + scope + '&redirect_uri=http://oauth.vk.com/blank.html' +
                     '&display=mobile&response_type=token')

        self.br.select_form(nr=0)
        self.br.form['email'] = login
        self.br.form['pass'] = password
        self.br.submit()

        if len(list(self.br.forms())) > 0:
            self.br.select_form(nr=0)
            self.br.submit()

        params = urlparse.urlparse(self.br.geturl()).fragment
        params = params.split('&')

        for val in params:
            tp = val.split('=')
            if tp[0] == 'access_token':
                self.token = tp[1]
                self.query_pattern += self.token
                if testmode:
                    self.query_pattern += '&test_mode=1'
                break

    def query(self, func, data):
        response = self.br.open(self.query_pattern % (func, data))
        return response.read()
开发者ID:Mu57Di3,项目名称:vkGetNews,代码行数:51,代码来源:vkApi.py

示例10: create_browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def create_browser(debug=False):
    browser = Browser(factory=mechanize.RobustFactory())
    if debug:
        # Maybe enable this if you want even more spam...
        # logger = logging.getLogger("mechanize")
        # logger.addHandler(logging.StreamHandler(sys.stdout))
        # logger.setLevel(logging.DEBUG)
        browser.set_debug_http(True)
        browser.set_debug_responses(True)
        browser.set_debug_redirects(True)
    browser.set_handle_equiv(True)
    browser.set_handle_gzip(True)
    browser.set_handle_redirect(True)
    browser.set_handle_referer(True)
    browser.set_handle_robots(False)
    browser.addheaders = HEADERS
    return browser
开发者ID:kevinwu06,项目名称:scraping_stuff,代码行数:19,代码来源:headless_browser.py

示例11: login

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
 def login(self):
     br = Browser()
     cj = cookielib.LWPCookieJar()
     br.set_cookiejar(cj)
     
     br.set_handle_equiv(True)
     br.set_handle_redirect(True)
     br.set_handle_referer(True)
     br.set_handle_robots(False)
     br.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=2)
     
     br.open('http://www.fatsecret.com/Auth.aspx?pa=s')
     br.select_form(nr=0)
     #name attr of login tr
     #PLEASE input your username and password here!!!!
     br['_ctl0:_ctl7:Logincontrol1:Name'] = 'username'
     br['_ctl0:_ctl7:Logincontrol1:Password'] = 'password'
     br.submit()
     return br
开发者ID:exsonic,项目名称:FatSecret_Crawler,代码行数:21,代码来源:DataExtractor.py

示例12: get_br

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def get_br():
    #todo low
    #headers
    #Accept-Encoding: identity
    # Host: _login.weibo.cn
    # Referer: http://weibo.cn/
    # Connection: close
    # User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)
    br = Browser(factory=RobustFactory(), history=NoHistory(),)
    cj = cookielib.LWPCookieJar()
    br.back = back_func
    br.set_cookiejar(cj)
    br.set_handle_equiv(True)
    #br.set_handle_gzip(True) #gzip在mechanize里面还不是正式功能
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)
    br.set_handle_refresh(HTTPRefreshProcessor(), max_time=10)
    br.addheaders = [('User-agent', USER_AGENT)]
    return br
开发者ID:hackrole,项目名称:scrapy-utils,代码行数:22,代码来源:mechanize_br.py

示例13: grablinks

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def grablinks(pageurl):
	dllinks = []
	br = Browser()
	br2 = Browser()
	br.set_handle_referer(True)
	br.set_handle_robots(False)
	br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
	br2.set_handle_referer(True)
	br2.set_handle_robots(False)
	br2.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
	
	br.open(sys.argv[1])
	grabbed = 0
	for link in br.links(url_regex='/download/'):
		print "Working..."
		req = br.click_link(url=link.url)
		br2.open(req)
		dlpagetext = br2.response().read()
		dllinks.append(str.replace(str.replace(re.search('var hqurl = \'.*\'',dlpagetext).group(0),"var hqurl =",""),"'",""))
		print "Grabbed link "+str(grabbed+1)
		grabbed = grabbed + 1
	return dllinks
开发者ID:Fruity-Grebbles,项目名称:song365,代码行数:24,代码来源:song365.py

示例14: fetch_transactions

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
def fetch_transactions(startdate=None, enddate=None, visa=False):
    br = Browser()
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)
    br.open(LOGIN_URL)

    d = pq(br.response().read())
    labels = d('td strong')
    char1 = int(labels[2].text.strip())
    char2 = int(labels[3].text.strip())
    num1 = int(labels[5].text.strip())
    num2 = int(labels[6].text.strip())
    br.form = list(br.forms())[0]
    br['globalKeyCode'] = settings.CODE
    br['ctl001password1'] = settings.PASS[char1-1:char1]
    br['ctl001password2'] = settings.PASS[char2-1:char2]
    br['ctl001passcode1'] = settings.NUM[num1-1:num1]
    br['ctl001passcode2'] = settings.NUM[num2-1:num2]
    br.submit()

    br.open(FILTER)
    br.form = list(br.forms())[0]
    br['periodoption'] = ["byDate"]
    br['startdate'] = startdate.strftime("%d/%m/%Y")
    br['enddate'] = enddate.strftime("%d/%m/%Y")
    if visa:
        br['visa'] = ["True"]
        br['all'] = False 
    else:
        br['all'] = ["True"]
    br.submit()
    result = br.response().read()
    return result
开发者ID:sebbacon,项目名称:oneaccount2qif,代码行数:39,代码来源:oneaccount.py

示例15: __init__

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_referer [as 别名]
 def __init__(self,browser,cookies):
     from mechanize import Browser
     br = Browser()
     # Cookie Jar
     if cookies == "firefox": cj = getFirefoxCookie()
     else: cj=set_cookies_from_text(cookies)
     br.set_cookiejar(cj)
     # Browser options 
     br.set_handle_equiv(True) 
     #br.set_handle_gzip(True) 
     br.set_handle_redirect(True) 
     br.set_handle_referer(True) 
     br.set_handle_robots(False)
     # Debug Options
     #br.set_debug_http(True) 
     #br.set_debug_redirects(True) 
     #br.set_debug_responses(True)
     # Follows refresh 0 but not hangs on refresh > 0 
     br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
     # User-Agent
     br.addheaders = [('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.45 Safari/537.36'),
     ('accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8')]
     self.browser = br
     self.browserType = 'mechanize'
开发者ID:sethc23,项目名称:seamless_yelp_scraping,代码行数:26,代码来源:webpage_scrape.py


注:本文中的mechanize.Browser.set_handle_referer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。