本文整理汇总了Python中mechanize.Browser.click_link方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.click_link方法的具体用法?Python Browser.click_link怎么用?Python Browser.click_link使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mechanize.Browser
的用法示例。
在下文中一共展示了Browser.click_link方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: grablinks
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import click_link [as 别名]
def grablinks(pageurl):
dllinks = []
br = Browser()
br2 = Browser()
br.set_handle_referer(True)
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br2.set_handle_referer(True)
br2.set_handle_robots(False)
br2.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.open(sys.argv[1])
grabbed = 0
for link in br.links(url_regex='/download/'):
print "Working..."
req = br.click_link(url=link.url)
br2.open(req)
dlpagetext = br2.response().read()
dllinks.append(str.replace(str.replace(re.search('var hqurl = \'.*\'',dlpagetext).group(0),"var hqurl =",""),"'",""))
print "Grabbed link "+str(grabbed+1)
grabbed = grabbed + 1
return dllinks
示例2: DoAuth
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import click_link [as 别名]
def DoAuth(address, password) :
br = Browser()
br.open(address)
#find first form
br.select_form(nr=0)
#filling fields
br["user_name"] = "user"
br["user_pass"] = password
br.submit()
#find all links on page by regexp
for link in br.links(url_regex="side"):
link_url = '';
link_text = '';
page_text = '';
req = br.click_link(link)
link_url = link.url
print('........... ............ ............')
print("IN " + link_url)
time.sleep(1)
link_text = link.text;
print("Link text: " + link.text)
#create object for parsing page
soup = BeautifulSoup(br.open(req))
cols = soup.findAll('iframe')
if (cols) :
#create object for frame body
fr = Browser()
s = address;
#remove index.php
s = s[:-9]
soupframe = BeautifulSoup(fr.open(s + cols[0]['src']))
cols = soupframe.findAll('h3')
if (cols) :
page_text = cols[0].renderContents()
print 'page text: ' + page_text
RecordToFile(link_url, link_text, page_text)
f1.close()
示例3: Gmtkn24
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import click_link [as 别名]
class Gmtkn24(object):
"""
Interact with the online web pages of GMTKN24.
"""
BASE_URL = 'http://toc.uni-muenster.de/GMTKN/GMTKN24/'
def __init__(self):
# initialization
self._browser = Browser()
self._browser.set_handle_robots(False)
self._subsets = self._list_subsets()
_subset_link_re = re.compile("The (.+) subset")
#_subset_link_re = re.compile("here")
def _list_subsets(self):
"""Return dictionary mapping GMTKN24 subset names to download URLs."""
html = BeautifulSoup(self._browser.open(Gmtkn24.BASE_URL + 'GMTKN24main.html'))
links = html.findAll(name="a")
result = { }
for a in links:
if a.string is not None:
match = Gmtkn24._subset_link_re.match(a.string)
if match is not None:
print a
# if a subset has several names, add all of them
for name in match.group(1).split(' '):
if name == 'and':
continue
result[name] = Gmtkn24.BASE_URL + a['href']
print result
#result = ['google.com', 'cnn.com']
return result
def list(self):
"""Return dictionary mapping GMTKN24 subset names to download URLs."""
return self._subsets
def get_geometries(self, subset, output_dir='geometries'):
"""
Download geometry files for the specified GMTKN24 subset,
and save them into the 'geometries/' subdirectory of the
current working directory.
Return list of extracted molecules/filenames.
"""
subset_url = self._subsets[subset]
page = self._browser.open(subset_url)
# must download the zip to a local file -- zipfiles are not stream-friendly ...
geometries_url = self._browser.click_link(text_regex=re.compile("^Geometries"))
(filename, headers) = self._browser.retrieve(geometries_url)
logger.info("%s geometries downloaded into file '%s'", subset, filename)
geometries_zip = ZipFile(filename, 'r')
if not os.path.exists(output_dir):
os.mkdir(output_dir)
molecules = self.get_molecule_names(subset)
extracted = list()
names = geometries_zip.namelist()
for name in names:
basename = os.path.basename(name)
if basename not in molecules and basename != 'README':
continue
# zipfile's `extract` method preserves full pathname,
# so let's get the data from the archive and write
# it in the file WE want...
content = geometries_zip.read(name)
output_path = os.path.join(output_dir, basename)
output = open(output_path, 'w')
output.write(content)
output.close()
if not ('README' == basename):
extracted.append(basename)
logger.info("Extracted '%s' into '%s'", basename, output_path)
geometries_zip.close()
return extracted
def get_reference_data(self, subset):
"""
Iterate over stoichiometry reference data in a given GMTKN24
subset. Each returned value is a pair `(r, d)`, where `r` is
a dictionary mapping compound names (string) to their
stoichiometric coefficient (integer), and `d` is a (float)
number representing the total energy.
"""
subset_url = self._subsets[subset]
subset_page = self._browser.open(subset_url)
if subset in ['BH76', 'BH76RC']:
# special case
self._browser.follow_link(text=("Go to the %s subset" % subset))
refdata_page = self._browser.follow_link(text="Reference data")
table = HtmlTable(refdata_page.read())
for row in table.rows_as_dict():
if subset == 'W4-08woMR':
# The 16 entries marked with an asterisk (*) are not
# part of the W4-08woMR subset.
if row['#'] and row['#'][0].endswith('*'):
continue
reactants = row['Systems']
if len(reactants) == 0:
continue # ignore null rows
qtys = row['Stoichiometry']
refdata = float(row['Ref.'][0])
#.........这里部分代码省略.........
示例4: Browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import click_link [as 别名]
interpreter.process_page(page)
outfp.write("END PAGE %d\n" % i)
device.close()
fp.close()
return outfp.getvalue()
db = cdb.cdbmake('omega/cdb/pdfurl','pdfurl.tmp')
b = Browser()
for round in range(1,4):
url = 'http://taxreview.treasury.gov.au/content/submission.aspx?round=' + str(round)
b.open(url)
for link in b.links(url_regex='pdf$'):
u = b.click_link(link).get_full_url()
#print "link: ", u
try:
f = urllib2.urlopen(u)
except:
continue
remotefile = re.search('[^/]+$',u).group(0)
remotetime = time.mktime(f.info().getdate('Last-Modified'))
#base = re.search('[^\.]+',remotefile).group(0)
base = re.search('(.+)\.pdf$',remotefile).group(1)
print base
localhtml = 'www/html/' + str(round) + '/' + base + '.html'
localpdf = 'pdf/' + str(round) + '/' + base + '.pdf'
localtime = 0