当前位置: 首页>>代码示例>>Python>>正文


Python Browser.addheaders方法代码示例

本文整理汇总了Python中mechanize.Browser.addheaders方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.addheaders方法的具体用法?Python Browser.addheaders怎么用?Python Browser.addheaders使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mechanize.Browser的用法示例。


在下文中一共展示了Browser.addheaders方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_data

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def get_data ():

    html = scraperwiki.scrape (edd_url)
    process_ex_dividend_data  (html)
    
    br = Browser()
    br.set_handle_robots (False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    br.open (edd_url)    
    
    links = {}
    for link in br.links():
        if link.text in ['2', '3', '4']:
            links [link.text] = link.url
    for k, link in links.items():
        m = re.search (edd_pat, link)

        br = Browser()
        br.set_handle_robots (False)
        br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
        br.open (edd_url)    
        br.select_form(nr=0)
        br.set_all_readonly(False)
        br["__EVENTTARGET"] = m.group(1)
        br["__EVENTARGUMENT"] = ''
        for c in br.controls:
            if c.type == 'submit':
                c.disabled = True
        response = br.submit()
        process_ex_dividend_data (response.read())
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:32,代码来源:dividendexdividenddate.py

示例2: get_google_news_by_url

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def get_google_news_by_url(url):

    # Construct browser object
    browser = Browser()
    ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36'
    browser.addheaders = [('User-Agent', ua), ('Accept', '*/*')]

    # Do not observe rules from robots.txt
    browser.set_handle_robots(False)

    # Create HTML document
    html = fromstring(browser.open(url).read())

    # get number of pages
    xpath_pages = '//a[@class="fl"]'
    page_num = len(html.xpath(xpath_pages)) + 1

    # get all pages url
    urls = generate_url_pages(url, page_num)
    print 'On ' + str(len(urls)) + ' pages:'

    df = [None] * page_num

    # iterate through all pages of this url
    for index, url in enumerate(urls):
        page_html = fromstring(browser.open(url).read())
        df[index] = get_google_news_in_page(page_html)

    return pd.concat(df, ignore_index=True)
开发者ID:Ellen625,项目名称:News_Analysis,代码行数:31,代码来源:GoogleNews_Crawler.py

示例3: getRandomXKCDComic

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def getRandomXKCDComic(urlBase):
    br = Browser()
    br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9')]
    br.set_handle_robots(False) 


    #XKCD Comics are enumerated in the following type by URL: http://www.xkcd.com/1, http://www.xkcd.com/2, ..., http://www.xkcd.com/n
    upperBound = 1
    lowerBound = 1

    #Multiply by two until address no longer exists
    while True:
        link = urlBase + str(upperBound) + "/"
        try:
            response = br.open(link)
        except:
            break

        lowerBound = upperBound
        upperBound = upperBound * 2

    #Binary Search for last Comic
    while True:
        pivot = (upperBound + lowerBound)/2
        link = urlBase + str(pivot) + "/"

        if lowerBound == upperBound or pivot == lowerBound:
            randomComicID = random.randint(1, pivot)
            randPageLink = urlBase + str(randomComicID) + "/"
            return br.open(randPageLink)
        try:
            response = br.open(link)
            lowerBound = pivot
        except:
            upperBound = pivot
开发者ID:djoeman84,项目名称:XKCD-Fetch-random-image,代码行数:37,代码来源:xkcdFetch.py

示例4: scrape

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
    def scrape(self):
        """
        Opens the html page and parses the pdf links.
        """
        browser = Browser()

        #-----------
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        values1 = {'name' : 'Michael Foord',
                   'location' : 'Northampton',
                   'language' : 'Python' }
        headers = { 'User-Agent' : user_agent }
        browser.set_handle_redirect(True)
        browser.set_handle_referer(True)
        browser.set_handle_robots(False)
        browser.addheaders = [('User-Agent', 'Firefox')]
        #-------------

        browser.set_handle_robots(False)

        html = browser.open(self.site)

        lines = html.read().splitlines()

        for line in lines:
            urls = re.findall('<a href="?\'?([^"\'>]*)', line)
            for url in urls:
                if '.pdf"' in url:
                    self.pdf_urls.append(url)
开发者ID:manishc1,项目名称:DySeCor,代码行数:31,代码来源:easy_scholar.py

示例5: find_first_article

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def find_first_article():
    mech = Browser()
    cj = cookielib.LWPCookieJar()

    mech.set_handle_equiv(True)
    # mech.set_handle_gzip(True)
    mech.set_handle_redirect(True)
    mech.set_handle_referer(True)
    mech.set_handle_robots(False)
    # mech.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    mech.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    page = mech.open("https://bitcointalk.org/index.php?board=77.0")
    html = page.read()

    soup = BeautifulSoup(html)

    first_article_tag = soup.find("td", class_="windowbg")

    global startingpost
    startingpost = first_article_tag.span.a.get("href")
    print startingpost
开发者ID:jgomezfr,项目名称:bitcoin-reporters,代码行数:29,代码来源:python-scrape.py

示例6: extract_article_url

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def extract_article_url(posturl):
    mech = Browser()
    cj = cookielib.LWPCookieJar()

    mech.set_handle_equiv(True)
    # mech.set_handle_gzip(True)
    mech.set_handle_redirect(True)
    mech.set_handle_referer(True)
    mech.set_handle_robots(False)
    # mech.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    mech.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    page = mech.open(posturl)
    html = page.read()

    global soup
    soup = BeautifulSoup(html)

    global articleURL
    # print soup.prettify()

    for item in soup.find_all("div", class_="post"):
        for link in item.find_all("a"):
            string = link.get("href")
            if prog.match(string):
                # find the link that is to the article (link outside of bitcointalk.org forum)
                articleURL = link.get("href")
                return link.get("href")
    return "No article url"
开发者ID:jgomezfr,项目名称:bitcoin-reporters,代码行数:36,代码来源:python-scrape.py

示例7: login_url

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def login_url(
                url,
                login,
                passwd,
                form_nomber,
                login_name,
                paswd_name,
                submit_nomber
            ):
    br = Browser(); showMessage('Создаю интерфейс браузера')
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    br.open(url); showMessage('Загружаю сайт и произвожу вход')
    br.select_form(nr = form_nomber)
    br[login_name] = login
    br[paswd_name] = passwd

    res = br.submit(nr = submit_nomber)
    content = res.read()
    #определить число страниц
    maxPage = int(max_page(content)); showMessage('Определяю количество страниц и перехожу на последнюю')
    curPage = 84
    while curPage < maxPage:
        res = br.open('http://forum.rsload.net/cat-kryaki-seriyniki-varez/topic-4820-page-%d.html' % (maxPage))
        curPage = maxPage
        maxPage = int(max_page(content))
        content = res.read()
    #парсинг ключей
    if get_all_keys(content):
        webbrowser.open_new_tab('http://forum.rsload.net/cat-kryaki-seriyniki-varez/topic-4820-page-%d.html' % (maxPage)) # Вернет True и откроет вкладку
开发者ID:wiom,项目名称:keys_grabber,代码行数:34,代码来源:test.py

示例8: parseFeeds

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
 def parseFeeds(self):
     mech = Browser()
     mech.addheaders = [ ('User-agent', 'Mozilla/5.0 (compatible)') ]
     mech.set_handle_robots(False)
     for url in self.feedUrls:
     #url = "http://feeds.feedburner.com/PurdueEngNews?format=xml"
         page = mech.open(url)
         html = page.read()
         soup = BeautifulStoneSoup(html)
         headlines = []
         descriptions = []
         i=0
         self.newsList = []
         for item in soup.findAll('item'):
             if (i > 20):
                 break
             date = item.find('pubdate')
             title = item.find('title')
             link = item.find('link')
             desc = item.find('description')
             if (len(title.contents) > 0):
                 title2 = title.contents[0]
             else:
                 title2 = 'None'
             self.newsList.append(NewsStory(date.contents[0], title2, link.contents[0], \
                 desc.contents[0]))
             i+=1
         for story in self.newsList:
             headlines.append(story.title)
             descriptions.append(story.link)
             #story.display()
         self.headlineList.append(headlines)
         self.descList.append(descriptions)
     self.populateTopicList()
开发者ID:jevinskie,项目名称:purdue-kiosk,代码行数:36,代码来源:newshelper.py

示例9: scrap_query

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def scrap_query(query, bang=None):

    r = ddg_query('imbd ' + query, bang=bang)
    if 'redirect' in dir(r) and 'primary' in dir(r.redirect):
        url = r.redirect.primary
    else:
        logger.info('Could not find imdb searchpage from DuckDuckGo bang')
        return None

    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2;\
                        WOW64) AppleWebKit/537.11 (KHTML, like Gecko)\
                        Chrome/23.0.1271.97 Safari/537.11')]

    r = br.open(url)
    soup = BeautifulSoup(r)


    for link in soup.find_all('a'):
        href = link.get('href','')
        match = re.search(r"imdb\.com/.*tt(?P<number>[^/]*)", href)
        if match:
            imdb_id = check_imdb(match.group('number'))
            return imdb_id

    return None
开发者ID:getzze,项目名称:imdbfetcher,代码行数:29,代码来源:searchengine_api.py

示例10: respond

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def respond(permalink, text):
    br = Browser()
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1'
    br.addheaders = [('User-agent', user_agent)]

    soup = BeautifulSoup(br.open(permalink).read())

    urlopen = urllib2.urlopen
    Request = urllib2.Request
    encode = urllib.urlencode
    cj = cookielib.LWPCookieJar()

    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)

    root_comment = soup.find('form', attrs={'class': 'usertext border'})
    thing_id = root_comment.find('input', attrs={'name': 'thing_id'})['value']
    print 'thing_id', thing_id

    # LOG THE FUCK IN
    req = Request('http://www.reddit.com/api/login/username', encode({'user': 'acid-trip-bot', 'passwd': 'hackny', 'api_type': 'json'}), {'User-Agent': user_agent})
    req_open = urlopen(req)
    read = json.loads(req_open.read())

    modhash = read['json']['data']['modhash']

    # POST THE FUCKING COMMENT
    req = Request('http://www.reddit.com/api/comment', encode({'thing_id': thing_id, 'text': text + '\n\n*This is an automated response.*', 'uh': modhash}), {'User-Agent': user_agent})
    req_open = urlopen(req)
    read = json.dumps(req_open.read())
开发者ID:artursapek,项目名称:reddit-bot-hackNYspring2012,代码行数:32,代码来源:hackny_bot.py

示例11: scrape_info

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def scrape_info():
	browser = Browser()
	browser.set_handle_robots(False)
	browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

	parkIds = []
	for name in config['names']:
		browser.open("https://www.recreation.gov")
		browser.select_form(nr=0)
		browser['locationCriteria'] = name

		response = browser.submit()
		content = response.read()

		soup = BeautifulSoup(content, 'html.parser')
		scripts = soup.select('script')
		for script in scripts:
			if 'SuggestedPlaces' in str(script):
				jsonStr = str(script).strip('<script>var SuggestedPlaces = ').strip(';</script>')
				places = json.loads(jsonStr)
				query = urlparse.parse_qs(places[0]['value'])
				if 'parkId' in query:
					print('FOUND!: ' + unicode(query['parkId'][0]))
					parkIds.append(unicode(query['parkId'][0]))
				else:
					print('No results for ' + name + ': ' + places[0]['value'])

	pprint(parkIds)
开发者ID:ai2160,项目名称:recreation-gov-scraper,代码行数:30,代码来源:get_park_ids.py

示例12: createbrowser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
 def createbrowser(self):
     br = Browser()
     br.set_handle_gzip(True)
     br.set_handle_robots(False)
     br.set_handle_redirect(True)
     br.addheaders = [('User-agent', 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 5_1 like Mac OS X; en-US) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B179 Safari/7534.48.3')]
     return br
开发者ID:Rasmus-Riis,项目名称:Huge_py,代码行数:9,代码来源:hotaccount.py

示例13: searchTitle

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def searchTitle(rawtitle):
	br = Browser()
	# Ignore robots.txt
	br.set_handle_robots( False )
	# Google demands a user-agent that isn't a robot
	br.addheaders = [('User-agent', 'Firefox')]
	
	br.open( "http://www.google.com " )
	br.select_form( 'f' )
	s='imdb'+' + '+' '.join(re.compile('[\.]').split(rawtitle))
	br.form[ 'q' ] = s
	br.submit()

	resp = None
	for link in br.links():
		siteMatch = re.compile( 'www.imdb.com/title/tt[0-9]*/$' ).search( link.url )
		if siteMatch:
		    resp = br.follow_link( link )
		    print link.url
		    break

	soup = BeautifulSoup(resp.get_data())
	
	title = re.sub(' - IMDb','',soup.find('title').string)
	title = re.sub('\([0-9]*\)','',title)
	
	return title
开发者ID:ragib06,项目名称:imdbsearch,代码行数:29,代码来源:imdbsearch.py

示例14: generateSentence

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def generateSentence(var):
    br = Browser()
    br.set_handle_robots(False)
    br.set_handle_equiv(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    url= 'http://www.oxforddictionaries.com/definition/english/' + str(var)
    # url= 'https://www.google.co.in/search?q=define+utilitarian'
    try:
        br.open(url)
    except:
        print "what word is this, man? " + var
        return
    soup = BeautifulSoup(br.response().read())
    sentence=""
    counter=0
    for i in soup.find_all('ul',{'class':'sentence_dictionary'}):
        if i is not None:
            soup2 = BeautifulSoup(str(i))
            for j in soup2.find_all('li',{'class':'sentence'}):
                if j is not None:
                    sentence = sentence + str(counter+1)+") "+j.string.replace(',',' ').strip()+"\n"
                    counter+=1
                    if counter == 2:
                        return sentence
    return sentence
开发者ID:amukho14,项目名称:Attempts,代码行数:27,代码来源:starQLetToDiffProject.py

示例15: process

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import addheaders [as 别名]
def process(time):
    br = Browser()
    # Ignore robots.txt
    br.set_handle_robots( False )
    # Google demands a user-agent that isn't a robot
    br.addheaders = [('User-agent', 'Firefox')]
    br.open("http://heasarc.gsfc.nasa.gov/cgi-bin/Tools/xTime/xTime.pl")

    br.select_form("form")

    br["time_in_i"] = time # Enter your time in here in the format "2015-06-27 04:23:23.68"

    response=br.submit()

    html=response.read()
    soup = BeautifulSoup(html)


    table =soup.find("table", border=5)

    g = table.findAll('tr')
    row= g[7] #Select the correct row

    cols = row.findAll('td')
    value = cols[1].string #This is the MET time

    return value
开发者ID:tomkimpson,项目名称:Fermi-met-convertor,代码行数:29,代码来源:met_converter.py


注:本文中的mechanize.Browser.addheaders方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。