本文整理汇总了Python中store.Store.add方法的典型用法代码示例。如果您正苦于以下问题:Python Store.add方法的具体用法?Python Store.add怎么用?Python Store.add使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类store.Store
的用法示例。
在下文中一共展示了Store.add方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: go_store
# 需要导入模块: from store import Store [as 别名]
# 或者: from store.Store import add [as 别名]
def go_store():
store = Store()
store.remove(None)
store.add(323, 2, 10, 88, 78, 415, 89, 189)
store.remove(78)
store.save()
store.load()
print store.grep('\d*[02468]$')
print store.find(89, -1, 415)
for item in store.items():
print item
示例2: TestStore
# 需要导入模块: from store import Store [as 别名]
# 或者: from store.Store import add [as 别名]
class TestStore(unittest2.TestCase):
def setUp(self):
self.store = Store(name="scratch")
self.ns = Namespace('http://example.com/#')
def tearDown(self):
self.store.close()
def testSize(self):
""" Tests the size of the repository """
self.assertEqual(len(self.store),0)
def testAdd(self):
bob = self.ns['bob']
name = self.ns['name']
value = Literal('Bob Bilbins')
self.store.add((bob, name, value))
self.assertEqual(len(self.store),1)
def testRemove(self):
triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
self.store.add(triple)
self.assertEqual(len(self.store),1)
self.store.remove(triple)
self.assertEqual(len(self.store),0)
def testTriples(self):
""" Tests the search by triple. """
triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
self.store.add(triple)
for tri in self.store.triples((self.ns['alice'],None, None)):
for i in range(3):
self.assertEqual(tri[i], triple[i])
def testSimpleSparql(self):
triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
self.store.add(triple)
for tri in self.store.query("SELECT ?s ?p ?o WHERE {?s ?p ?o .}"):
for i in range(3):
self.assertEqual(tri[i], triple[i])
def testNamespacedSparql(self):
triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
self.store.add(triple)
self.store.add((self.ns['bob'],self.ns['name'],Literal('Bob')))
for tri in self.store.query("SELECT ?p ?o WHERE { ex:alice ?p ?o .}", initNs={'ex':self.ns}):
for i in range(1,3):
self.assertEqual(tri[i-1], triple[i])
def testBindedSparql(self):
triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
self.store.add(triple)
self.store.add((self.ns['bob'],self.ns['name'],Literal('Bob')))
for tri in self.store.query("SELECT ?p ?o WHERE { ?s ?p ?o .}", initBindings={'s':self.ns['alice']}):
for i in range(1,3):
self.assertEqual(tri[i-1], triple[i])
def testDataTypes(self):
birth = Literal('2006-01-03', datatype=_XSD_NS.date)
comp = Literal('2006-01-01', datatype=_XSD_NS.date)
triple = (self.ns['alice'],self.ns['birthdate'],birth)
self.store.add(triple)
for s, p, o in self.store.query("SELECT ?s ?p ?o WHERE {?s ?p ?o .}"):
self.assertLess(comp,birth)
示例3: BaseScraper
# 需要导入模块: from store import Store [as 别名]
# 或者: from store.Store import add [as 别名]
class BaseScraper(object):
""" basic scraper framework for grabbing press releases
Derived scrapers generally need to implement:
name - string name of the scraper
doc_type - numeric document type for uploaded press releases
find_latest() - to grab a list of the latest press releases (usually
from an rss feed)
extract() - parse html data to pull out the various text and metadata
of the press release
"""
def __init__(self):
# derived classes need to set these
assert self.name is not None
assert self.doc_type is not None
self.parser = OptionParser(usage="%prog: [options]")
self.parser.add_option('-v', '--verbose', action='store_true')
self.parser.add_option('-d', '--debug', action='store_true')
self.parser.add_option('-t', '--test', action='store_true', help="test only - don't send any documents to server")
self.parser.add_option('-c', '--cache', action='store_true', help="cache all http transfers in .cache dir (for repeated runs during test)")
self.parser.add_option('-u', '--url', nargs=1, help="process just the given URL")
self.parser.add_option('-i', '--ini-file', default="churnalism.cfg", nargs=1, help="filename for connection settings [default: %default]")
def main(self):
""" set everything up, then invoke go() """
(options, args) = self.parser.parse_args()
log_level = logging.ERROR
if options.debug:
log_level = logging.DEBUG
elif options.verbose:
log_level = logging.INFO
logging.basicConfig(level=log_level) #, format='%(message)s')
if options.test:
self.store = DummyStore(self.name, self.doc_type)
else:
# load in config file for real run
config = ConfigParser.ConfigParser()
config.readfp(open(options.ini_file))
auth_user = config.get("DEFAULT",'user')
auth_pass = config.get("DEFAULT",'pass')
server = config.get("DEFAULT",'server')
self.store = Store(self.name, self.doc_type, auth_user=auth_user, auth_pass=auth_pass, server=server)
if options.cache:
logging.info("using .cache")
opener = urllib2.build_opener(CacheHandler(".cache"))
urllib2.install_opener(opener)
self.go(options)
def go(self,options):
""" perform the actual scraping
default implementation is to just call find_latest and process the discovered press releases.
But it's likely derived classes will want to handle custom options for fetching historical
data
see prnewswire for an example.
"""
if options.url:
urls = [options.url,]
else:
urls = self.find_latest()
self.process_batch(urls)
def process_batch(self, urls):
""" run through a list of urls, fetching, extracting and storing each in turn """
# cull out ones we've got
n_before = len(urls)
urls = [url for url in urls if not self.store.already_got(url)]
logging.info("processing %d urls (%d are new)", n_before, len(urls))
err_cnt = 0
try:
for url in urls:
try:
logging.debug("fetch %s",url)
response = urllib2.urlopen(url)
html = response.read()
# TODO: maybe just skip ones which redirect to other domains?
if response.geturl() != url:
logging.warning("Redirect detected %s => %s",url,response.geturl())
press_release = self.extract(html, url)
#.........这里部分代码省略.........