当前位置: 首页>>代码示例>>Python>>正文


Python mechanize.Browser类代码示例

本文整理汇总了Python中mechanize.Browser的典型用法代码示例。如果您正苦于以下问题:Python Browser类的具体用法?Python Browser怎么用?Python Browser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Browser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: lookup_offers_isbn

def lookup_offers_isbn(item_id):
    offers = []
    br = Browser()
    res = br.open("http://books.half.ebay.com/ws/web/HalfISBNSearch?isbn=%s" % item_id)
    soup = BeautifulSoup(res.read())
    ratings = soup.findAll('span',{'class': 'Header'})
    for r in ratings:
        rating = r.text
        prices= r.parent.parent.parent.findNextSibling('table').findAll('tr')[1:]
        linktext  = r.parent.parent.parent.findNextSiblings('table')[1].find(text=re.compile('View all.*'))
        if linktext:
            all = linktext.parent['href']
            # get link
            res2 = br.open(all)
            soup = BeautifulSoup(res2.read())
            rating2 = soup.findAll('span',{'class': 'Header'})
            prices = rating2[0].parent.parent.parent.parent.findAll('table')[3].findAll('tr')[1:]
        for row in prices:
            m = re.search("itemid=(\d+)",row.find('a',href=re.compile("itemid=\d+"))['href'])
            itemid=m.group(1)
            seller = row.find('a',{'class':'SellerDisplayLink'}).text
            price = row.find('span',{'class':'ItemPrice'}).text
            price = string.replace(price,",","")
            if price.startswith("$"):
                price = price[1:]
            offers.append({ 'rating' : rating, 'seller' : seller, 'listing_id' : itemid, 'price' : str(price) })
            print rating,seller,itemid,price
    return offers
开发者ID:clarsen,项目名称:booksell,代码行数:28,代码来源:halfcom.py

示例2: down_image

 def down_image(self, img):
     print "down image from " + img
     down_br = Browser()
     down_cj = CookieJar()
     down_br.set_cookiejar(down_cj)
     fn = tempfile.mktemp(suffix='.png')
     return down_br.retrieve(img, filename = fn)[0]
开发者ID:lite,项目名称:yebob_utils,代码行数:7,代码来源:Yebob.py

示例3: on_task_start

    def on_task_start(self, task, config):
        try:
            from mechanize import Browser
        except ImportError:
            raise PluginError('mechanize required (python module), please install it.', log)

        userfield = config.get('userfield', 'username')
        passfield = config.get('passfield', 'password')

        url = config['url']
        username = config['username']
        password = config['password']

        br = Browser()
        br.set_handle_robots(False)
        try:
            br.open(url)
        except Exception as e:
            # TODO: improve error handling
            raise PluginError('Unable to post login form', log)

        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        #br.set_debug_http(True)

        for form in br.forms():
            loginform = form

            try:
                loginform[userfield] = username
                loginform[passfield] = password
                break
            except Exception as e:
                pass
        else:
            received = os.path.join(task.manager.config_base, 'received')
            if not os.path.isdir(received):
                os.mkdir(received)
            filename = os.path.join(received, '%s.formlogin.html' % task.name)
            with open(filename, 'w') as f:
                f.write(br.response().get_data())
            log.critical('I have saved the login page content to %s for you to view' % filename)
            raise PluginError('Unable to find login fields', log)

        br.form = loginform

        br.submit()

        cookiejar = br._ua_handlers["_cookies"].cookiejar

        # Add cookiejar to our requests session
        task.requests.add_cookiejar(cookiejar)
        # Add handler to urllib2 default opener for backwards compatibility
        handler = urllib2.HTTPCookieProcessor(cookiejar)
        if urllib2._opener:
            log.debug('Adding HTTPCookieProcessor to default opener')
            urllib2._opener.add_handler(handler)
        else:
            log.debug('Creating new opener and installing it')
            urllib2.install_opener(urllib2.build_opener(handler))
开发者ID:Anaerin,项目名称:Flexget,代码行数:60,代码来源:plugin_formlogin.py

示例4: login_to_kaggle

    def login_to_kaggle(self):  
        """ Login to Kaggle website
        Parameters:
        -----------
        None
        
        Returns:
        browser: Browser
            a mechanizer Browser object to be used for further access to site
        """          
        
        if self.verbose:
            print("Logging in to Kaggle..."),

        br = Browser()
        cj = cookielib.LWPCookieJar()
        br.set_cookiejar(cj)
        
        br.open(self.kag_login_url)
        
        br.select_form(nr=0)
        br['UserName'] = self.kag_username
        br['Password'] = self.kag_password
        br.submit(nr=0)
        
        if br.title() == "Login | Kaggle":
            raise KaggleError("Unable to login Kaggle with username %s (response title: %s)" % (self.kag_username,br.title()))
        
        if self.verbose:
            print("done!")
        
        return br
开发者ID:joostgp,项目名称:ml_toolbox,代码行数:32,代码来源:kaggle.py

示例5: GetXboxLiveFriends

  def GetXboxLiveFriends(self):
    """Return a list of tuples (gamer_tag, gamer_presence)."""
    br = Browser()
    br.open('http://live.xbox.com/en-US/profile/Friends.aspx')
    br.select_form(name='f1')
    br['login'] = self.login
    br['passwd'] = self.passwd
    br.submit()  # Submit login form.
    br.select_form(name='fmHF')
    response = br.submit()  # Submit redirect form.
    friend_list = response.read()
    response.close()

    soup = BeautifulSoup(friend_list)
    friend_table = soup.find('table', {'class': FRIEND_TABLE_CLASS})
    if friend_table is None:
      raise XboxLiveError('Parsing failure.')

    friends = []
    for row in friend_table.contents[1:]:  # Skip header row.
      gamer_tag = row.find('td', {'class': GAMER_TAG_CLASS})
      gamer_tag = str(gamer_tag.find('a').contents[0])
      gamer_presence = row.find('td', {'class': GAMER_PRESENCE_CLASS})
      gamer_presence = str(gamer_presence.find('h4').contents[0])
      friends.append((gamer_tag, gamer_presence))
    return friends
开发者ID:damonkohler,项目名称:pypert,代码行数:26,代码来源:xbox_live.py

示例6: newBrowser

 def newBrowser(self):
   # Create new browsers all the time because its data structures grow
   # unboundedly (texas#135)
   br = Browser()
   br.add_password(self.hostname, self.username, self.password)
   br.set_handle_robots(None)
   return br
开发者ID:abubeck,项目名称:cob_bringup_overlays,代码行数:7,代码来源:ddwrt.py

示例7: fetch_laws_page_from_year

def fetch_laws_page_from_year(year, temporaryDirectory):  
    lawsDirectory = os.path.join(temporaryDirectory, 'all_laws');
    if not os.path.exists(lawsDirectory):
        os.makedirs(lawsDirectory)
        print('The laws directory did not exist so I created it')
        print(lawsDirectory)

    fileToWriteLawsListIn = os.path.join(lawsDirectory, year + '.html')
    print('File to write in is ' + fileToWriteLawsListIn)
    lawWasNotDownloaded = not os.path.isfile(fileToWriteLawsListIn)
    if lawWasNotDownloaded:
        startDownload = int(round(time.time() * 1000))
        
        print('Getting laws from year ' + year)
        url = get_ugly_url_for_laws(year)
        browser = Browser()
        browser.open(url)
        html = browser.response().get_data()

        with open(fileToWriteLawsListIn, 'a') as f: 
            f.write (html)

        endDownload = int(round(time.time() * 1000))
        print('Finished downloading laws for year ' + year + '. It took only ' 
              + str(endDownload - startDownload) + ' milliseconds')
    else:
        print('This year was already fetched ' + year 
              + '. Skipping to the next year')
开发者ID:andreicristianpetcu,项目名称:pubdocs,代码行数:28,代码来源:1_clrro_fetch_laws_per_year.py

示例8: __init__

 def __init__(self):
     Browser.__init__(self)
     self.set_handle_robots(False)
     self.addheaders = [(
         'Accept',
         'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
     )]
开发者ID:vinceau,项目名称:browserplus,代码行数:7,代码来源:browserplus.py

示例9: mrsc

def mrsc(gid):
	mech = Browser()
	url = "http://espn.go.com/ncf/playbyplay?gameId="+gid+"&period=0"
	#print url
	page = mech.open(url)
	html = page.read()
	print url
	if html.count('Play-by-play not currently available.') == 0:
		soup = BeautifulSoup(html)
		table = soup.findAll("table")[-1]
		rows = table.findAll('tr')[::-1]
		c=0
		toret=''
		keepgoing=True
		cup=html[::-1][:html[::-1].find(' left; font: 700 14px/25px Helvetica,Arial,sans-serif;" colspan="3"><div style="margin-right: 6px;"'[::-1])][::-1]
		cup=cup[cup.find('a name="')+len('a name="'):]
		cup=cup[:cup.find('"')]
		while c < 7 and keepgoing and c < len(rows):
			cols = rows[c].findAll('td')
			#print rows[c]
			if len(cols) > 2:
				#if str(cols[2]) != '<td>&nbsp;</td>' and str(cols[3]) != '<td>&nbsp;</td>':
				toret=str(' '.join(cols[0].findAll(text=True)))+'. '+str(' '.join(cols[1].findAll(text=True)))
				keepgoing=False
			c=c+1
		toret=toret.replace('  ',' ').strip()
		if toret != '': toret=toret+' '
		poss=''
		if cup != '' and len(cup) < 30: poss=cup
	else:
		toret=''
		poss=''
	return [toret,poss]
开发者ID:epmatsw,项目名称:FootballBot,代码行数:33,代码来源:following.py

示例10: getLastEntries

    def getLastEntries(self, url, lastDate):
        """ get all entries from an HTML table list if it is newer 
        than prevEntry. Format is from graz FF site """

        mech = Browser()
        mech.set_handle_robots(False)
        try:
            page = mech.open(url)
        except urllib2.HTTPError:
            if url == None:
                url = "(empty url)"
            self.logger.error("Could not read url "+url)
            return []
        html = page.read()
        soup = BeautifulSoup(html)
        link = soup.findAll('a')
        if len(link) == 0:
            logger.error('No links in the page: %s', url)
            return []
        returnLinks = []

        for l in link:
            try:
                date = datetime.strptime(l.string, "topo-%Y-%m-%d-%H:%M.tsv.gz")
            except ValueError:
                continue
            if date > lastDate:
                returnLinks.append(url+l.string)
            else:
                break

        return returnLinks
开发者ID:leonardomaccari,项目名称:communityNetworkMonitor,代码行数:32,代码来源:FFWien.py

示例11: main

def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--total-jobs', metavar='<total-jobs>', help='total number of jobs downloading documents', type=int)
    parser.add_argument('--job', metavar='<job>', help='job number between 1 and <total-jobs>', type=int)

    args = parser.parse_args()
    check_args(parser, args)

    br = Browser()
    br.set_handle_robots(False)
#    br.set_debug_responses(True)

    data = urlencode({'user': USERNAME, 'pass': getpass()})

    document_urls = [LOGIN_PREFIX + url.strip() + '&view=etext' for url in file(DOCUMENT_URLS_FILE)]

    start = args.job - 1
    step = args.total_jobs

    for url in iterview(document_urls[start::step]):
        try:
            get_document_pages(br, url, data)
        except Exception as e:
            print >> sys.stderr, '\n', (url, e)
开发者ID:hannawallach,项目名称:declassified-documents,代码行数:26,代码来源:get_document_pages.py

示例12: gen_path

def gen_path(request):
    x = json.loads(request.POST['data'])    #fetches data
    print x
    adj_mat = []    #creates empty adjacency matrix
    i1 = j1 = 0
    num_cities = len(x)
    for i in x:
        tmp_mat = []
        for j in x:
            if i!=j:
                API_KEY = "AIzaSyDBOSr6_XxvISPGX54P9bPnooE3RUpRTp0"
                orig_coord = x[i]
                dest_coord = x[j]
                br = Browser()  #creates mechanize instance
                br.set_handle_robots(False)
                # print "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&key={2}".format(orig_coord, dest_coord, API_KEY)
                result = br.open("https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&key={2}".format(orig_coord, dest_coord, API_KEY)).read()    #makes a call to GoogleMapsAPI
                json_result = json.loads(result)
                tmp_mat.append(int(json_result['rows'][0]['elements'][0]['distance']['value']))
            else:
                tmp_mat.append(0)
        adj_mat.append(tmp_mat)



    obj = ArpanDaErCode()
    ans = ""
    ans = ArpanDaErCode.solve(obj, adj_mat, num_cities) #gets sequence from model
    print ans
    ret = {'data': [str(ii) for ii in ans]}

    return HttpResponse(str(json.dumps(ret)))   #returns the sequens in JSON format for the JS to handle
开发者ID:RijuSen1996,项目名称:bppimt_hackon,代码行数:32,代码来源:views.py

示例13: scrap_query

def scrap_query(query, bang=None):

    r = ddg_query('imbd ' + query, bang=bang)
    if 'redirect' in dir(r) and 'primary' in dir(r.redirect):
        url = r.redirect.primary
    else:
        logger.info('Could not find imdb searchpage from DuckDuckGo bang')
        return None

    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2;\
                        WOW64) AppleWebKit/537.11 (KHTML, like Gecko)\
                        Chrome/23.0.1271.97 Safari/537.11')]

    r = br.open(url)
    soup = BeautifulSoup(r)


    for link in soup.find_all('a'):
        href = link.get('href','')
        match = re.search(r"imdb\.com/.*tt(?P<number>[^/]*)", href)
        if match:
            imdb_id = check_imdb(match.group('number'))
            return imdb_id

    return None
开发者ID:getzze,项目名称:imdbfetcher,代码行数:27,代码来源:searchengine_api.py

示例14: __init__

    def __init__(self,options):
        (self.configfile,self.config,self.moduleconfig) = self.initialize_config(options)
        # If we have a particular log level for this module, use that,
        # otherwise use the global log level. If that isn't defined
        # either, use the INFO loglevel.
        if 'log' in self.moduleconfig:
            loglevel = self.moduleconfig['log']
        else:
            loglevel = self.config.get('log','INFO')
        self.log = self.setup_logger(self.module_dir,loglevel)

        self.base_dir = self.config['datadir']

        if self.browser_use_robustfactory:
            self.browser = Browser(factory=RobustFactory())
        else:
            self.browser = Browser()
        self.browser.addheaders = [('User-agent', 'lagen.nu-bot ([email protected])')]

        # logger = logging.getLogger("mechanize")
        # logger.addHandler(logging.StreamHandler(sys.stdout))
        # logger.setLevel(logging.DEBUG)
        # self.browser.set_debug_http(True)
        # self.browser.set_debug_responses(True)
        # self.browser.set_debug_redirects(True)


        self.ns = {'rinfo':  Namespace(Util.ns['rinfo']),
                   'rinfoex':Namespace(Util.ns['rinfoex']),
                   'dct':    Namespace(Util.ns['dct'])}
开发者ID:staffanm,项目名称:legacy.lagen.nu,代码行数:30,代码来源:DocumentRepository.py

示例15: num_itens

    def num_itens(self,busca, data_inicial, data_final):
        br = Browser()
        response1 = \
            br.open("http://portal.in.gov.br/in/imprensa1/pesquisa_avancada")
        br.select_form(name="formBusca")
        br["texto_todas"] = busca
        br["dataPublicacaoInicial"] = data_inicial[:5]
        br["dataPublicacaoFinal"] = data_final[:5]
        br["ano"] = [data_final[-4:]]
        br["idJornal"] = ["1", "2", "3", "4"]
#        print(br.form)
        br.form.action = \
            "http://www.in.gov.br/imprensa/pesquisa/pesquisaresultado.jsp"
        res = br.submit()
        texto = res.read()
        x1, x2, x3 = texto.partition("ite")
        x1, x2, x3 = x1.rpartition(">")
        
        try:
            arq = open(self.retornar_html(),"w")
            arq.write(texto)
            arq.close()
        except:
            print("Erro ao tentar salvar página de buscas!")
        
        x3 = x3.replace(",","")
        x3 = x3.strip()
        #Retorna o número de itens achados
        if x3 == "Um":
            return 1
        
        if len(x3) > 0:
            return int(x3)
        else:
            return 0
开发者ID:andresmrm,项目名称:trazdia,代码行数:35,代码来源:proc.py


注:本文中的mechanize.Browser类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。