本文整理汇总了Python中robotparser.RobotFileParser方法的典型用法代码示例。如果您正苦于以下问题:Python robotparser.RobotFileParser方法的具体用法?Python robotparser.RobotFileParser怎么用?Python robotparser.RobotFileParser使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类robotparser
的用法示例。
在下文中一共展示了robotparser.RobotFileParser方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testPasswordProtectedSite
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def testPasswordProtectedSite(self):
test_support.requires('network')
with test_support.transient_internet('mueblesmoraleda.com'):
url = 'http://mueblesmoraleda.com'
robots_url = url + "/robots.txt"
# First check the URL is usable for our purposes, since the
# test site is a bit flaky.
try:
urlopen(robots_url)
except HTTPError as e:
if e.code not in {401, 403}:
self.skipTest(
"%r should return a 401 or 403 HTTP error, not %r"
% (robots_url, e.code))
else:
self.skipTest(
"%r should return a 401 or 403 HTTP error, not succeed"
% (robots_url))
parser = robotparser.RobotFileParser()
parser.set_url(url)
try:
parser.read()
except IOError:
self.skipTest('%s is unavailable' % url)
self.assertEqual(parser.can_fetch("*", robots_url), False)
示例2: setUp
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def setUp(self):
lines = StringIO.StringIO(self.robots_txt).readlines()
self.parser = robotparser.RobotFileParser()
self.parser.parse(lines)
示例3: testPasswordProtectedSite
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def testPasswordProtectedSite(self):
addr = self.server.server_address
url = 'http://' + support.HOST + ':' + str(addr[1])
robots_url = url + "/robots.txt"
parser = robotparser.RobotFileParser()
parser.set_url(url)
parser.read()
self.assertFalse(parser.can_fetch("*", robots_url))
示例4: setUpClass
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def setUpClass(cls):
support.requires('network')
with support.transient_internet(cls.base_url):
cls.parser = robotparser.RobotFileParser(cls.robots_txt)
cls.parser.read()
示例5: test_read_404
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def test_read_404(self):
parser = robotparser.RobotFileParser(self.url('i-robot.txt'))
parser.read()
self.assertTrue(parser.allow_all)
self.assertFalse(parser.disallow_all)
self.assertEqual(parser.mtime(), 0)
示例6: RobotTest
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def RobotTest(index, robots_txt, good_urls, bad_urls,
agent="test_robotparser"):
lines = StringIO.StringIO(robots_txt).readlines()
parser = robotparser.RobotFileParser()
parser.parse(lines)
for url in good_urls:
tests.addTest(RobotTestCase(index, parser, url, 1, agent))
for url in bad_urls:
tests.addTest(RobotTestCase(index, parser, url, 0, agent))
# Examples from http://www.robotstxt.org/wc/norobots.html (fetched 2002)
# 1.
示例7: testPythonOrg
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def testPythonOrg(self):
test_support.requires('network')
with test_support.transient_internet('www.python.org'):
parser = robotparser.RobotFileParser(
"http://www.python.org/robots.txt")
parser.read()
self.assertTrue(
parser.can_fetch("*", "http://www.python.org/robots.txt"))
示例8: addrobot
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def addrobot(self, root):
root = urlparse.urljoin(root, "/")
if self.robots.has_key(root): return
url = urlparse.urljoin(root, "/robots.txt")
self.robots[root] = rp = robotparser.RobotFileParser()
self.note(2, "Parsing %s", url)
rp.debug = self.verbose > 3
rp.set_url(url)
try:
rp.read()
except (OSError, IOError), msg:
self.note(1, "I/O error parsing %s: %s", url, msg)
示例9: testPythonOrg
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def testPythonOrg(self):
test_support.requires('network')
with test_support.transient_internet('www.python.org'):
parser = robotparser.RobotFileParser(
"https://www.python.org/robots.txt")
parser.read()
self.assertTrue(
parser.can_fetch("*", "https://www.python.org/robots.txt"))
示例10: runTest
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def runTest(self):
test_support.requires('network')
# whole site is password-protected.
url = 'http://mueblesmoraleda.com'
parser = robotparser.RobotFileParser()
parser.set_url(url)
parser.read()
self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)
示例11: __init__
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def __init__(self, url='', opener=None):
robotparser.RobotFileParser.__init__(self, url)
self._opener = opener
self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT
示例12: RobotTest
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def RobotTest(index, robots_txt, good_urls, bad_urls,
agent="test_robotparser"):
lines = StringIO(robots_txt).readlines()
parser = robotparser.RobotFileParser()
parser.parse(lines)
for url in good_urls:
tests.addTest(RobotTestCase(index, parser, url, 1, agent))
for url in bad_urls:
tests.addTest(RobotTestCase(index, parser, url, 0, agent))
# Examples from http://www.robotstxt.org/wc/norobots.html (fetched 2002)
# 1.
示例13: testPythonOrg
# 需要导入模块: import robotparser [as 别名]
# 或者: from robotparser import RobotFileParser [as 别名]
def testPythonOrg(self):
support.requires('network')
with support.transient_internet('www.python.org'):
parser = robotparser.RobotFileParser(
"http://www.python.org/robots.txt")
parser.read()
self.assertTrue(
parser.can_fetch("*", "http://www.python.org/robots.txt"))