当前位置: 首页>>代码示例>>Python>>正文


Python Browser.set_handle_robots方法代码示例

本文整理汇总了Python中mechanize.Browser.set_handle_robots方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.set_handle_robots方法的具体用法?Python Browser.set_handle_robots怎么用?Python Browser.set_handle_robots使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mechanize.Browser的用法示例。


在下文中一共展示了Browser.set_handle_robots方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: find_first_article

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def find_first_article():
    mech = Browser()
    cj = cookielib.LWPCookieJar()

    mech.set_handle_equiv(True)
    # mech.set_handle_gzip(True)
    mech.set_handle_redirect(True)
    mech.set_handle_referer(True)
    mech.set_handle_robots(False)
    # mech.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    mech.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    page = mech.open("https://bitcointalk.org/index.php?board=77.0")
    html = page.read()

    soup = BeautifulSoup(html)

    first_article_tag = soup.find("td", class_="windowbg")

    global startingpost
    startingpost = first_article_tag.span.a.get("href")
    print startingpost
开发者ID:jgomezfr,项目名称:bitcoin-reporters,代码行数:29,代码来源:python-scrape.py

示例2: get_browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def get_browser():
    # Browser
    br = Browser()

    # Cookie Jar
    #cj = cookielib.LWPCookieJar()
    #br.set_cookiejar(cj)

    # Browser options
    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)

    # Follows refresh 0 but not hangs on refresh > 0
    #br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

    # Want debugging messages?
    #
    #br.set_debug_http(True)
    #br.set_debug_redirects(True)
    #br.set_debug_responses(True)

    # User-Agent (this is cheating, ok?)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    return br
开发者ID:jmnavarro,项目名称:Mapa_es_Scraper,代码行数:30,代码来源:scraper.py

示例3: on_task_start

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
    def on_task_start(self, task, config):
        try:
            from mechanize import Browser
        except ImportError:
            raise PluginError('mechanize required (python module), please install it.', log)

        userfield = config.get('userfield', 'username')
        passfield = config.get('passfield', 'password')

        url = config['url']
        username = config['username']
        password = config['password']

        br = Browser()
        br.set_handle_robots(False)
        try:
            br.open(url)
        except Exception as e:
            # TODO: improve error handling
            raise PluginError('Unable to post login form', log)

        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        #br.set_debug_http(True)

        for form in br.forms():
            loginform = form

            try:
                loginform[userfield] = username
                loginform[passfield] = password
                break
            except Exception as e:
                pass
        else:
            received = os.path.join(task.manager.config_base, 'received')
            if not os.path.isdir(received):
                os.mkdir(received)
            filename = os.path.join(received, '%s.formlogin.html' % task.name)
            with open(filename, 'w') as f:
                f.write(br.response().get_data())
            log.critical('I have saved the login page content to %s for you to view' % filename)
            raise PluginError('Unable to find login fields', log)

        br.form = loginform

        br.submit()

        cookiejar = br._ua_handlers["_cookies"].cookiejar

        # Add cookiejar to our requests session
        task.requests.add_cookiejar(cookiejar)
        # Add handler to urllib2 default opener for backwards compatibility
        handler = urllib2.HTTPCookieProcessor(cookiejar)
        if urllib2._opener:
            log.debug('Adding HTTPCookieProcessor to default opener')
            urllib2._opener.add_handler(handler)
        else:
            log.debug('Creating new opener and installing it')
            urllib2.install_opener(urllib2.build_opener(handler))
开发者ID:Anaerin,项目名称:Flexget,代码行数:62,代码来源:plugin_formlogin.py

示例4: downloadBuild

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def downloadBuild(build_file, target_directory):
    """Download a build file from the SESI website and place it in the target
    directory.

    """
    print "Attempting to download build: {}".format(build_file)

    user, password = _getSESIAuthInfo()

    browser = Browser()
    browser.set_handle_robots(False)
    browser.open("https://www.sidefx.com/login/?next=/download/daily-builds/")

    browser.select_form(nr=0)
    browser.form['username'] = user
    browser.form['password'] = password
    browser.submit()

    browser.open('http://www.sidefx.com/download/daily-builds/')
    resp = browser.follow_link(text=build_file, nr=0)
    url = resp.geturl()
    url += 'get/'
    resp = browser.open(url)

    target_path = os.path.join(target_directory, build_file)

    print "Downloading to {}".format(target_path)

    with open(target_path, 'wb') as handle:
        handle.write(resp.read())

    print "Download complete"

    return target_path
开发者ID:,项目名称:,代码行数:36,代码来源:

示例5: github_connect

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def github_connect(path=""):
    """Connect to the website"""
    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Firefox')]
    br.open('https://github.com/%s' % path)
    return br
开发者ID:Fandekasp,项目名称:github_bot,代码行数:9,代码来源:github_bot.py

示例6: __init__

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
    def __init__(self, config):
        self.login_url = 'http://%s.ogame.gameforge.com/' % config.country
        # http://s114-br.ogame.gameforge.com/game/index.php?page=overview
        self.index_url = 'http://s%s-%s.ogame.gameforge.com' % (config.universe, config.country) + '/game/index.php'
        headers = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) \
        AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36')]
        # Authentication data
        self.username = config.username
        self.password = config.password
        self.universe = config.universe
        self.country = config.country

        self.logger = logging.getLogger('ogame-bot')
        # Setting up the browser
        self.cj = cookielib.LWPCookieJar()

        br = Browser()
        br.set_cookiejar(self.cj)
        br.set_handle_robots(False)
        br.addheaders = headers
        # self.path = os.path.dirname(os.path.realpath(__file__))
        # name of the cookies file
        # self.cookies_file_name = os.path.join(self.path, 'cookies.tmp')
        self.cookies_file_name = 'cookies.tmp'
        super(AuthenticationProvider, self).__init__(br, config)
开发者ID:winiciuscota,项目名称:OG-Bot,代码行数:27,代码来源:authentication.py

示例7: respond

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def respond(bot, event):
    matches = []
    for (ident, (regex, template)) in bot.commands_cache.iteritems():
        match = regex.search(event.message)
        if match:
            params = match.groupdict()
            params['nick'] = event.source
            heappush(
                matches, (match.start(0), template.safe_substitute(params))
            )

    if not matches:
        if event.message.find("http") != -1:
            br = Browser()
            try:
                br.set_handle_robots(False)
                br.open(event.message)
                bot.send_channel_action(bot.config.messages.urltitle, title = format.bold('\"' + br.title() + '\"'))
            except:
                return False
            return True
        else:
            return False

    bot.send_channel_action(matches[0][1])
    return True
开发者ID:Tmplt,项目名称:Toothless,代码行数:28,代码来源:channel_message.py

示例8: get_machines

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def get_machines(start,num_pages):
    mech = Browser()
    mech.set_handle_robots(False)
    mech.set_handle_equiv(False) 
    mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    machines = []
    try:
        page_num = 0
        for page_num in range(start,num_pages+1):
            print("page %d" % (page_num))
            url = "http://www.pinpedia.com/machine?page=%d" % page_num
            html_page = mech.open(url)
            html = html_page.read()
            machines += parse_page(html)
            time.sleep(0.1)
    except Exception as e:
        print e
        print("finished at page %s" % page_num)

    print("storing machines to machines.txt")

    with open('machines.txt','w') as fh:
        for machine in machines:
            fh.write(machine + "\n")
开发者ID:mattvenn,项目名称:pinballs,代码行数:27,代码来源:parse.py

示例9: sa_login

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def sa_login(sa_userName, sa_password):
    '''Login to spolecneaktivity.cz portal as sa_userName user.

    Temporary(?) no check
        - if online,
        - if not logged as other user
        - succesfully logged in
    '''
    url_login = 'http://www.spolecneaktivity.cz'

    br = Browser()
    br.set_handle_robots(False)
    
    ok = False
    try:
        r = br.open(url_login)
        rru = r.read().upper()
        if "LOGIN" in rru and "HESLO" in rru:   # not logged in yet
            br.select_form(nr=0)
            br["userName"] = sa_userName
            br["password"] = sa_password
            r = br.submit()
            ok = True
    except:
        pass
    if not ok:
        print u"sa_parse.sa_login: Selhalo přihlášení do spolecneaktivity.cz"
    return br
开发者ID:zvolsky,项目名称:platby,代码行数:30,代码来源:spolecneaktivity_cz.py

示例10: get_google_news_by_url

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def get_google_news_by_url(url):

    # Construct browser object
    browser = Browser()
    ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36'
    browser.addheaders = [('User-Agent', ua), ('Accept', '*/*')]

    # Do not observe rules from robots.txt
    browser.set_handle_robots(False)

    # Create HTML document
    html = fromstring(browser.open(url).read())

    # get number of pages
    xpath_pages = '//a[@class="fl"]'
    page_num = len(html.xpath(xpath_pages)) + 1

    # get all pages url
    urls = generate_url_pages(url, page_num)
    print 'On ' + str(len(urls)) + ' pages:'

    df = [None] * page_num

    # iterate through all pages of this url
    for index, url in enumerate(urls):
        page_html = fromstring(browser.open(url).read())
        df[index] = get_google_news_in_page(page_html)

    return pd.concat(df, ignore_index=True)
开发者ID:Ellen625,项目名称:News_Analysis,代码行数:31,代码来源:GoogleNews_Crawler.py

示例11: create

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def create():
    while 1:
        try:
            br = Browser()
            br.set_handle_robots(False)
            br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
            br.open('https://classic.netaddress.com/tpl/Subscribe/Step1?Locale=en&AdInfo=&Referer=http%3A%2F%2Fwww.netaddress.com%2F&T=1332304112864372')
            br.select_form(name='Step1')
            userid = randomname()
            br.form['usrUserId'] = userid
            pwd = randomname()
            br.form['newPasswd'] = pwd
            br.form['RPasswd'] = pwd
            br.form['usrFirst'] = randomname()
            br.form['usrLast'] = randomname()
            br.form['usrTimeZone'] = ['Africa/Abidjan']
            br.form['usrCn'] = ['AF']
            br.submit()
            print "Created " + userid + " with password " + pwd
            filo = open(filex, 'a')
            filo.write(userid + "@usa.net" + ":" + pwd + "\n")
            filo.close()

        except:
            print "error"
开发者ID:Rasmus-Riis,项目名称:Huge_py,代码行数:27,代码来源:usa.py

示例12: main

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--total-jobs', metavar='<total-jobs>', help='total number of jobs downloading documents', type=int)
    parser.add_argument('--job', metavar='<job>', help='job number between 1 and <total-jobs>', type=int)

    args = parser.parse_args()
    check_args(parser, args)

    br = Browser()
    br.set_handle_robots(False)
#    br.set_debug_responses(True)

    data = urlencode({'user': USERNAME, 'pass': getpass()})

    document_urls = [LOGIN_PREFIX + url.strip() + '&view=etext' for url in file(DOCUMENT_URLS_FILE)]

    start = args.job - 1
    step = args.total_jobs

    for url in iterview(document_urls[start::step]):
        try:
            get_document_pages(br, url, data)
        except Exception as e:
            print >> sys.stderr, '\n', (url, e)
开发者ID:hannawallach,项目名称:declassified-documents,代码行数:28,代码来源:get_document_pages.py

示例13: getRandomXKCDComic

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def getRandomXKCDComic(urlBase):
    br = Browser()
    br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9')]
    br.set_handle_robots(False) 


    #XKCD Comics are enumerated in the following type by URL: http://www.xkcd.com/1, http://www.xkcd.com/2, ..., http://www.xkcd.com/n
    upperBound = 1
    lowerBound = 1

    #Multiply by two until address no longer exists
    while True:
        link = urlBase + str(upperBound) + "/"
        try:
            response = br.open(link)
        except:
            break

        lowerBound = upperBound
        upperBound = upperBound * 2

    #Binary Search for last Comic
    while True:
        pivot = (upperBound + lowerBound)/2
        link = urlBase + str(pivot) + "/"

        if lowerBound == upperBound or pivot == lowerBound:
            randomComicID = random.randint(1, pivot)
            randPageLink = urlBase + str(randomComicID) + "/"
            return br.open(randPageLink)
        try:
            response = br.open(link)
            lowerBound = pivot
        except:
            upperBound = pivot
开发者ID:djoeman84,项目名称:XKCD-Fetch-random-image,代码行数:37,代码来源:xkcdFetch.py

示例14: scrape

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
    def scrape(self):
        """
        Opens the html page and parses the pdf links.
        """
        browser = Browser()

        #-----------
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        values1 = {'name' : 'Michael Foord',
                   'location' : 'Northampton',
                   'language' : 'Python' }
        headers = { 'User-Agent' : user_agent }
        browser.set_handle_redirect(True)
        browser.set_handle_referer(True)
        browser.set_handle_robots(False)
        browser.addheaders = [('User-Agent', 'Firefox')]
        #-------------

        browser.set_handle_robots(False)

        html = browser.open(self.site)

        lines = html.read().splitlines()

        for line in lines:
            urls = re.findall('<a href="?\'?([^"\'>]*)', line)
            for url in urls:
                if '.pdf"' in url:
                    self.pdf_urls.append(url)
开发者ID:manishc1,项目名称:DySeCor,代码行数:31,代码来源:easy_scholar.py

示例15: authenticate

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
    def authenticate(self):
        if self._client:
            return self._client

        try:
            client = Browser()
            client.set_handle_redirect(True)
            client.set_handle_robots(False)
            client.open('http://%s/cgi-bin/videoconfiguration.cgi' % self.camera.host)
            client.select_form('frmLOGON')
            client['LOGIN_ACCOUNT'] = self.camera.username
            client['LOGIN_PASSWORD'] = self.camera.password
            client.submit()

            try:
                client.select_form('frmLOGON')
            except FormNotFoundError:
                pass
            else:
                raise AccessDenied('Access denied for user `%s`' % self.camera.username)

        except AccessDenied:
            raise

        except Exception, e:
            raise ImproperlyConfigured(e.message)
开发者ID:Rudy24,项目名称:modnoemesto,代码行数:28,代码来源:axis.py


注:本文中的mechanize.Browser.set_handle_robots方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。