当前位置: 首页>>代码示例>>Python>>正文


Python Browser.click_link方法代码示例

本文整理汇总了Python中mechanize.Browser.click_link方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.click_link方法的具体用法?Python Browser.click_link怎么用?Python Browser.click_link使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mechanize.Browser的用法示例。


在下文中一共展示了Browser.click_link方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: grablinks

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import click_link [as 别名]
def grablinks(pageurl):
	dllinks = []
	br = Browser()
	br2 = Browser()
	br.set_handle_referer(True)
	br.set_handle_robots(False)
	br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
	br2.set_handle_referer(True)
	br2.set_handle_robots(False)
	br2.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
	
	br.open(sys.argv[1])
	grabbed = 0
	for link in br.links(url_regex='/download/'):
		print "Working..."
		req = br.click_link(url=link.url)
		br2.open(req)
		dlpagetext = br2.response().read()
		dllinks.append(str.replace(str.replace(re.search('var hqurl = \'.*\'',dlpagetext).group(0),"var hqurl =",""),"'",""))
		print "Grabbed link "+str(grabbed+1)
		grabbed = grabbed + 1
	return dllinks
开发者ID:Fruity-Grebbles,项目名称:song365,代码行数:24,代码来源:song365.py

示例2: DoAuth

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import click_link [as 别名]
def DoAuth(address, password) :
	br = Browser()
	br.open(address)
	#find first form
	br.select_form(nr=0)
	#filling fields
	br["user_name"] = "user"
	br["user_pass"] = password
	br.submit()
	#find all links on page by regexp
	for link in br.links(url_regex="side"):
		link_url = '';
		link_text = '';
		page_text = '';
		req = br.click_link(link)
		link_url = link.url
		print('........... ............ ............')
		print("IN " + link_url)
		
		time.sleep(1)
		link_text = link.text;
		print("Link text: " + link.text)
		#create object for parsing page
		soup = BeautifulSoup(br.open(req))
		cols = soup.findAll('iframe')
		if (cols) :
			#create object for frame body
			fr = Browser()
			s = address;
			#remove index.php
			s = s[:-9]
			soupframe = BeautifulSoup(fr.open(s + cols[0]['src']))
			cols = soupframe.findAll('h3')
			if (cols) :
				page_text = cols[0].renderContents()
				print 'page text: ' + page_text
		RecordToFile(link_url, link_text, page_text)
	f1.close()
开发者ID:msakhnik,项目名称:site-parsing,代码行数:40,代码来源:auth.py

示例3: Gmtkn24

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import click_link [as 别名]
class Gmtkn24(object):
    """
    Interact with the online web pages of GMTKN24.
    """
    BASE_URL = 'http://toc.uni-muenster.de/GMTKN/GMTKN24/'
    def __init__(self):
        # initialization
        self._browser = Browser()
        self._browser.set_handle_robots(False)
        self._subsets = self._list_subsets()

    _subset_link_re = re.compile("The (.+) subset")
    #_subset_link_re = re.compile("here")
    def _list_subsets(self):
        """Return dictionary mapping GMTKN24 subset names to download URLs."""
        html = BeautifulSoup(self._browser.open(Gmtkn24.BASE_URL + 'GMTKN24main.html'))
        links = html.findAll(name="a")
        result = { }
        for a in links:
	     if a.string is not None:
                match = Gmtkn24._subset_link_re.match(a.string)
                if match is not None:
                    print a
		    # if a subset has several names, add all of them
                    for name in match.group(1).split(' '):
                        if name == 'and':
                            continue
                        result[name] = Gmtkn24.BASE_URL + a['href']
        print result
	#result = ['google.com', 'cnn.com']
	return result

    def list(self):
        """Return dictionary mapping GMTKN24 subset names to download URLs."""
        return self._subsets

    def get_geometries(self, subset, output_dir='geometries'):
        """
        Download geometry files for the specified GMTKN24 subset,
        and save them into the 'geometries/' subdirectory of the
        current working directory.

        Return list of extracted molecules/filenames.
        """
        subset_url = self._subsets[subset]
        page = self._browser.open(subset_url)
        # must download the zip to a local file -- zipfiles are not stream-friendly ...
        geometries_url = self._browser.click_link(text_regex=re.compile("^Geometries"))
        (filename, headers) = self._browser.retrieve(geometries_url)
        logger.info("%s geometries downloaded into file '%s'", subset, filename)
        geometries_zip = ZipFile(filename, 'r')
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        molecules = self.get_molecule_names(subset)
        extracted = list()
        names = geometries_zip.namelist()
        for name in names:
            basename = os.path.basename(name)
            if basename not in molecules and basename != 'README':
                continue
            # zipfile's `extract` method preserves full pathname, 
            # so let's get the data from the archive and write
            # it in the file WE want...
            content = geometries_zip.read(name)
            output_path = os.path.join(output_dir, basename)
            output = open(output_path, 'w')
            output.write(content)
            output.close()
            if not ('README' == basename):
                extracted.append(basename)
            logger.info("Extracted '%s' into '%s'", basename, output_path)
        geometries_zip.close()
        return extracted

    def get_reference_data(self, subset):
        """
        Iterate over stoichiometry reference data in a given GMTKN24
        subset.  Each returned value is a pair `(r, d)`, where `r` is
        a dictionary mapping compound names (string) to their
        stoichiometric coefficient (integer), and `d` is a (float)
        number representing the total energy.
        """
        subset_url = self._subsets[subset]
        subset_page = self._browser.open(subset_url)
        if subset in ['BH76', 'BH76RC']:
            # special case
            self._browser.follow_link(text=("Go to the %s subset" % subset))
        refdata_page = self._browser.follow_link(text="Reference data")
        table = HtmlTable(refdata_page.read())
        for row in table.rows_as_dict():
            if subset == 'W4-08woMR':
                # The 16 entries marked with an asterisk (*) are not
                # part of the W4-08woMR subset.
                if row['#'] and row['#'][0].endswith('*'):
                    continue
            reactants = row['Systems']
            if len(reactants) == 0:
                continue # ignore null rows
            qtys = row['Stoichiometry']
            refdata = float(row['Ref.'][0])
#.........这里部分代码省略.........
开发者ID:TissueMAPS,项目名称:gc3pie,代码行数:103,代码来源:import-gmtkn24.py

示例4: Browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import click_link [as 别名]
        interpreter.process_page(page)
        outfp.write("END PAGE %d\n" % i)

    device.close()
    fp.close()

    return outfp.getvalue()

db = cdb.cdbmake('omega/cdb/pdfurl','pdfurl.tmp')
b = Browser()
for round in range(1,4):
    url = 'http://taxreview.treasury.gov.au/content/submission.aspx?round=' + str(round)
    b.open(url)
    for link in b.links(url_regex='pdf$'):

        u = b.click_link(link).get_full_url()
        #print "link: ", u
        try:
            f = urllib2.urlopen(u)
        except:
            continue

        remotefile = re.search('[^/]+$',u).group(0)
        remotetime = time.mktime(f.info().getdate('Last-Modified'))

        #base = re.search('[^\.]+',remotefile).group(0)
        base = re.search('(.+)\.pdf$',remotefile).group(1)
        print base
        localhtml = 'www/html/' + str(round) + '/' + base + '.html'
        localpdf = 'pdf/' + str(round) + '/' + base + '.pdf'
        localtime = 0
开发者ID:rhaleblian,项目名称:afts,代码行数:33,代码来源:convert.py


注:本文中的mechanize.Browser.click_link方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。