当前位置: 首页>>代码示例>>Python>>正文


Python Browser.links方法代码示例

本文整理汇总了Python中mechanize.Browser.links方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.links方法的具体用法?Python Browser.links怎么用?Python Browser.links使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mechanize.Browser的用法示例。


在下文中一共展示了Browser.links方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: regex_find_links

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
    def regex_find_links(self, page, regex):
        br = Browser()
        br.set_handle_robots(False)
        try:
            br.open(page)
        except URLError:
            self.output_q.put('URL Not Found: %s' % page)

        # Number of matching links found
        num_links = 0
        # Max length of printed links
        max_len = 0
        #print page, regex
        for link in br.links():
            if regex.match(link.url):
                num_links += 1
                max_len = max(len(basename(link.url)), max_len)
                # Put the found links on the download queue
                self.download_q.put(link.url)
                # Put the output messages on the respective queue.
                # Implemented this way just in case this method is also threaded/multi-processed.
                # NOTE: Try to keep the number of output messages the same as the links
                self.output_q.put('Found Link: %s' % link.url)

        return num_links, max_len
开发者ID:on2valhalla,项目名称:NgramViewerScraper,代码行数:27,代码来源:downloader.py

示例2: searchTitle

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def searchTitle(rawtitle):
	br = Browser()
	# Ignore robots.txt
	br.set_handle_robots( False )
	# Google demands a user-agent that isn't a robot
	br.addheaders = [('User-agent', 'Firefox')]
	
	br.open( "http://www.google.com " )
	br.select_form( 'f' )
	s='imdb'+' + '+' '.join(re.compile('[\.]').split(rawtitle))
	br.form[ 'q' ] = s
	br.submit()

	resp = None
	for link in br.links():
		siteMatch = re.compile( 'www.imdb.com/title/tt[0-9]*/$' ).search( link.url )
		if siteMatch:
		    resp = br.follow_link( link )
		    print link.url
		    break

	soup = BeautifulSoup(resp.get_data())
	
	title = re.sub(' - IMDb','',soup.find('title').string)
	title = re.sub('\([0-9]*\)','',title)
	
	return title
开发者ID:ragib06,项目名称:imdbsearch,代码行数:29,代码来源:imdbsearch.py

示例3: fetch

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def fetch():
    br = Browser()                # Create a browser
    map = {};
    
    # br.open(login_url)            # Open the login page
    # br.select_form(id="signform")  # Find the login form
    # br['username'] = username     # Set the form values
    # br['password'] = password
    # resp = br.submit()            # Submit the form
    
    br.open('http://www.verycd.com/sto/music/china/')
    nice_links = [l for l in br.links()
                    if 'topics' in l.url]
    if not nice_links:
        return None
    
    for link in nice_links:
        if link.url in map.keys():
            continue
        
        try:
            response = br.follow_link(link)
            map[link.url] = br.title()
        except Exception, e:
            print >> sys.stderr, e
开发者ID:mahone3297,项目名称:hades,代码行数:27,代码来源:EgMechanize.py

示例4: get_articlelist

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def get_articlelist():
    entry_page = "http://www.ausleisure.com.au/default.asp?PageID=2&n=Latest+News"

    br = Browser()
    br.open(entry_page)
    entries = []

    for l in br.links():
        if l.url.startswith("default.asp?PageID=2&ReleaseID=") and not l.text == '[IMG]':
            date = l.text[:10]
            title = l.text[15:]
            url = l.absolute_url
            data = get_article(url)

            entry = {
                'title':    simple_escape(title),
                'link':     simple_escape(url),
                'id':       simple_escape(url),
                'updated':  '%s-%s-%sT00:00:00Z' % (date[6:], date[3:5], date[:2]),
                'content':  simple_escape(data),
            }

            entries.append(entry)

    return entries
开发者ID:pwae,项目名称:feeds,代码行数:27,代码来源:ausleisure.py

示例5: begin_scraper

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def begin_scraper():
  br = Browser()
  br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_8; rv:16:0) Gecko/20100101 Firefox/16.0')]
  br.set_handle_robots(False)
  br.open("https://wwws.mint.com/login.event")
  assert br.viewing_html()
  formcount=0
  for f in br.forms():
    if str(f.attrs["id"]) == "form-login":
      break
    formcount = formcount+1
  
  br.select_form(nr=formcount)

  br["username"] = "[email protected]" #Put your username here
  br["password"] = getpass()
  
  
  #import pdb; pdb.set_trace()
  # Submit the user credentials to login to mint 
  response = br.submit()
  response = br.follow_link(text="Transactions")
  links_to_transactions = br.links(text_regex="Export all \d+ transactions")
  link = ""
  for f in links_to_transactions:
    link = f

  response2 = br.follow_link(link)
  text_file = open("transactions.csv", "w")
  text_file.write(response2.read())
  text_file.close()
开发者ID:rhintz42,项目名称:mint-scraper,代码行数:33,代码来源:scrape.py

示例6: get_data

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def get_data ():

    html = scraperwiki.scrape (edd_url)
    process_ex_dividend_data  (html)
    
    br = Browser()
    br.set_handle_robots (False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    br.open (edd_url)    
    
    links = {}
    for link in br.links():
        if link.text in ['2', '3', '4']:
            links [link.text] = link.url
    for k, link in links.items():
        m = re.search (edd_pat, link)

        br = Browser()
        br.set_handle_robots (False)
        br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
        br.open (edd_url)    
        br.select_form(nr=0)
        br.set_all_readonly(False)
        br["__EVENTTARGET"] = m.group(1)
        br["__EVENTARGUMENT"] = ''
        for c in br.controls:
            if c.type == 'submit':
                c.disabled = True
        response = br.submit()
        process_ex_dividend_data (response.read())
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:32,代码来源:dividendexdividenddate.py

示例7: downloadAll

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def downloadAll(url ,pattern = "", saveto = "", overwrite = 2, suffix = "") :
	br = Browser()
	br.open(url)
	for link in br.links(url_regex=pattern) :
		if(link.url.startswith("http://")) :
			download(link.url, "", saveto, overwrite, suffix)
		elif(link.url.startswith("/")) :
			download(link.base_url[:link.base_url.find("/",8)] + link.url, "", saveto , overwrite, suffix)
		else :
			download(link.base_url[:link.base_url.rfind("/")+1] + link.url, "", saveto, overwrite, suffix)
开发者ID:yforster,项目名称:pythomat,代码行数:12,代码来源:pythomat.py

示例8: downloadAll

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def downloadAll(username, courseName):
    br = Browser()
    br.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9",
        )
    ]
    br.set_handle_robots(False)
    br.open("https://myvideosu.stanford.edu/oce/currentquarter.aspx")
    assert br.viewing_html()
    br.select_form(name="login")
    br["username"] = username
    br["password"] = getpass()

    # Open the course page for the title you're looking for
    print "Logging in to myvideosu.stanford.edu..."
    response = br.submit()
    print "Logged in, going to course link."
    response = br.follow_link(text=courseName)
    # print response.read()

    # response = br.follow_link(text="HERE")
    # print response.read()
    # Build up a list of lectures

    print "Loading video links."
    links = []
    for link in br.links(text="WMP"):
        links.append(re.search(r"'(.*)'", link.url).group(1))
    link_file = open("links.txt", "w")
    # So we download the oldest ones first.
    links.reverse()

    print "Found %d links, getting video streams." % (len(links))
    videos = []
    for link in links:
        response = br.open(link)
        soup = BeautifulSoup(response.read())
        video = soup.find("object", id="WMPlayer")["data"]
        video = re.sub("http", "mms", video)
        video = video.replace(" ", "%20")  # remove spaces, they break urls
        output_name = re.search(r"[a-z]+[0-9]+[a-z]?/[0-9]+", video).group(0).replace("/", "_")  # + ".wmv"
        output_wmv = output_name + ".wmv"
        link_file.write(video + "\n")
        print video
        output_mp4 = output_name + ".mp4"
        videos.append((video, output_wmv, output_mp4))
    link_file.close()

    print "Downloading %d video streams." % (len(videos))
    for video in videos:
        download(video)

    print "Done!"
开发者ID:EllenSebastian,项目名称:scpd-scraper,代码行数:57,代码来源:scrape.py

示例9: read_all_result_page_links_for

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def read_all_result_page_links_for(mainurl):
    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
            
    br.open(mainurl)
    nice_links = [l for l in br.links()
            if 'company' in l.url]        
        
    for link in nice_links:
        read_detail_page(link.url)
开发者ID:carriercomm,项目名称:scraperwiki-scraper-vault,代码行数:13,代码来源:ddd-organic-bio_1.py

示例10: upload_gallery

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def upload_gallery(url):
    from mechanize import Browser
    br = Browser()
    br.set_handle_robots(False)
    br.open(url)

    urls = set()

    for link in br.links(url_regex=".jpg$"):
        urls.add(link.url)

    upload_images(urls)
开发者ID:EArmour,项目名称:pyfibot,代码行数:14,代码来源:module_imgur.py

示例11: get_soup

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def get_soup(movie): 
    movie = '+'.join(movie.split())
    url = "http://www.imdb.com/find?ref_=nv_sr_fn&q="+movie+"&s=all"
    br = Browser()
    br.open(url)
    try:
        link = list(br.links(url_regex = re.compile(r"/title/tt*")))[0]
    except:
        return ""
    else:
        res = br.follow_link(link)
        soup = BeautifulSoup(res.read())
        return soup
开发者ID:hamimraavi,项目名称:imdpapi,代码行数:15,代码来源:search.py

示例12: getMapping

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def getMapping():
	# get the mapping of cellphone numbers to IP addresses
	mapping = dict()
	br = Browser()
	br.open("http://mahan.webfactional.com/sms/freedns/sms/cellphonedomains/")

	# words to exclude irrelevant links
	words = ['Name','Last modified','Size','Description','Parent Directory']

	# get all the cellphone numbers with registered IP addresses
	# .links() optionally accepts the keyword args of .follow_/.find_link()
	for link in br.links():
		if link.text not in words:
			print link.text
			# get the corresponding IP address
			ipaddr = urllib2.urlopen('http://mahan.webfactional.com/sms/freedns/sms/cellphonedomains/' + link.text).read()
			mapping[link.text] = ipaddr
	print mapping
	log.msg("mapping: " + str(mapping))
	log.msg("links: " + str(br.links()))
	log.msg("set mapping 1: " + str(p2presolver.mapping))
	p2presolver.mapping = mapping
	log.msg("set mapping 2: " + str(p2presolver.mapping))
开发者ID:jyale,项目名称:freedns,代码行数:25,代码来源:secondtestserver.py

示例13: main

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def main(page, regex, path):
    start_time = time.time()
    br = Browser()
    br.set_handle_robots(False)
    br.open(page)
    #br.open('http://storage.googleapis.com/books/ngrams/books/datasetsv2.html')

    eng_all = re.compile(regex)
    #eng_all = re.compile('.*googlebooks-eng-all.*20120701.*')

    #print page, regex, path
    n = 0
    maxlen = 0
    link_list = []
    for link in br.links():
        if eng_all.match(link.url):
            n += 1
            maxlen = max(len(os.path.basename(link.url)), maxlen)
            link_list.append(link.url)
            sys.stderr.write('Found Link: %s\n' % link.url)

    answer = raw_input("\n\nAre you sure you want to download the above %i file(s)? (Y/N):  " % n)
    if answer == 'N' or answer == 'n':
        sys.exit(0)

    sys.stderr.write('\n\nDownloading files to: %s\n' % path)

    digits = len('%d' % n)
    disp_time = datetime.datetime.now

    for i, link in enumerate(link_list):
        download_start = time.time()
        file_name = os.path.basename(link)
        full_path = os.path.join(path, file_name)
        if os.path.exists(full_path):
            sys.stderr.write('%s exists, not downloading\n' % full_path)
            continue
        try:
            sys.stderr.write('[%s] Downloading(%-*i of %i): %*s' % (str(disp_time().time())[:8], digits, i+1, n,
                                                                   maxlen + 2, file_name))
            br.retrieve(link, filename=full_path)
        except:
            sys.stderr.write('\n\nSomething happened, deleting last file: %s\n' % full_path)
            os.remove(full_path)
            sys.exit(0)
        sys.stderr.write(' of size %s MB in %5.2f min\n' % ("{:7.2f}".format(float(os.stat(full_path).st_size)/1000000),
                                                            (time.time() - download_start)/60))
        br.clear_history()

    sys.stderr.write('\ndownloaded %i files to %s directory in %15f seconds\n' % (n, path, time.time()-start_time))
开发者ID:on2valhalla,项目名称:NgramViewerScraper,代码行数:52,代码来源:ngrams_scraper.py

示例14: test_no_external_login

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
    def test_no_external_login(self):
        """websession - openid, oauth1 or oauth2 external login option in log in page"""
        base_url = CFG_SITE_SECURE_URL + '/youraccount'
        login_url = base_url + '/login'
        browser = Browser()
        response = browser.open(login_url)
        #Check all the links and see if any of them is of class openid (external login button)
        for link in browser.links():
            for value in link.attrs:
                if (value[0] == 'class'):
                    if value[1] == 'openid_url':
                        self.fail("Openid external login in login page: %s" % link.attrs)

        return
开发者ID:DZielke,项目名称:invenio,代码行数:16,代码来源:websession_regression_tests.py

示例15: downloadAll

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def downloadAll(username, courseName):
  br = Browser()
  br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9')]
  br.set_handle_robots(False)
  br.open('https://myvideosu.stanford.edu/oce/currentquarter.aspx')
  assert br.viewing_html()
  br.select_form(name='login')
  br['username'] = username
  br['password'] = getpass()

  # Open the course page for the title you're looking for
  print 'Logging in to myvideosu.stanford.edu...'
  response = br.submit()
  print 'Logged in, going to course link.'
  response = br.follow_link(text=courseName)

  # Build up a list of lectures.

  print 'Loading video links.'
  links = []
  for link in br.links(text='WMP'):
    links.append(re.search(r"'(.*)'", link.url).group(1))

  # So we download the oldest ones first.
  links.reverse()

  print 'Found %d links, getting video streams.' % len(links)

  videos = []
  for link in links:
    response = br.open(link)
    soup = BeautifulSoup(response.read())
    video = soup.find('object', id='WMPlayer')['data']
    video = re.sub('http', 'mms', video)
    video = video.replace(' ', '%20') # remove spaces, they break urls

    output_name = re.search(r'[a-z]+[0-9]+[a-z]?/[0-9]+', video).group(0).replace('/', '_')
    output_wmv = output_name + '.wmv'

    print video
    videos.append((video, output_wmv))

  print 'Downloading %d video streams.' % (len(videos))
  for video in videos:
    download(video)

  print 'Done!'
开发者ID:aquach,项目名称:scpd-scraper,代码行数:49,代码来源:scrape.py


注:本文中的mechanize.Browser.links方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。