当前位置: 首页>>代码示例>>Python>>正文


Python DumpTruck.create_index方法代码示例

本文整理汇总了Python中dumptruck.DumpTruck.create_index方法的典型用法代码示例。如果您正苦于以下问题:Python DumpTruck.create_index方法的具体用法?Python DumpTruck.create_index怎么用?Python DumpTruck.create_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dumptruck.DumpTruck的用法示例。


在下文中一共展示了DumpTruck.create_index方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_create_if_exists

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
  def test_create_if_exists(self):
    dt = DumpTruck(dbname = '/tmp/test.db')
    dt.execute('create table pineapple (bar integer, baz integer);')
    dt.create_index(['bar', 'baz'], 'pineapple')

    with self.assertRaises(sqlite3.OperationalError):
      dt.create_index(['bar', 'baz'], 'pineapple', if_not_exists = False)
开发者ID:sungoak,项目名称:dumptruck,代码行数:9,代码来源:tests.py

示例2: test_non_unique

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
  def test_non_unique(self):
    dt = DumpTruck(dbname = '/tmp/test.db')
    dt.execute('create table tomato (bar integer, baz integer);')
    dt.create_index(['bar', 'baz'], 'tomato')
    observed = dt.execute('PRAGMA index_info(tomato_bar_baz)')

    # Indexness
    self.assertIsNotNone(observed)

    # Indexed columns
    expected = [
      {u'seqno': 0, u'cid': 0, u'name': u'bar'},
      {u'seqno': 1, u'cid': 1, u'name': u'baz'},
    ]
    self.assertListEqual(observed, expected)

    # Uniqueness
    indices = dt.execute('PRAGMA index_list(tomato)')
    for index in indices:
      if index[u'name'] == u'tomato_bar_baz':
        break
    else:
      index = {}

    self.assertEqual(index[u'unique'], 0)
开发者ID:sungoak,项目名称:dumptruck,代码行数:27,代码来源:tests.py

示例3: test_create_if_not_exists

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
    def test_create_if_not_exists(self):
        dt = DumpTruck(dbname="/tmp/test.db")
        dt.execute("create table mango (bar integer, baz integer);")
        dt.create_index(["bar", "baz"], "mango")

        # This should not raise an error.
        dt.create_index(["bar", "baz"], "mango", if_not_exists=True)
开发者ID:4bic,项目名称:entity_log,代码行数:9,代码来源:tests.py

示例4: test_create_if_not_exists

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
  def test_create_if_not_exists(self):
    dt = DumpTruck(dbname = '/tmp/test.db')
    dt.execute('create table mango (bar integer, baz integer);')
    dt.create_index(['bar', 'baz'], 'mango')

    # This should not raise an error.
    dt.create_index(['bar', 'baz'], 'mango', if_not_exists = True)
开发者ID:sungoak,项目名称:dumptruck,代码行数:9,代码来源:tests.py

示例5: test_create_if_exists

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
    def test_create_if_exists(self):
        dt = DumpTruck(dbname="/tmp/test.db")
        dt.execute("create table pineapple (bar integer, baz integer);")
        dt.create_index(["bar", "baz"], "pineapple")

        with self.assertRaises(sqlite3.OperationalError):
            dt.create_index(["bar", "baz"], "pineapple", if_not_exists=False)
开发者ID:4bic,项目名称:entity_log,代码行数:9,代码来源:tests.py

示例6: apis

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
def apis():
    dt = DumpTruck('/tmp/open-data.sqlite', auto_commit = False)
    dt.create_table({'catalog':'abc.def'}, 'socrata_apis')
    dt.create_index(['catalog'], 'socrata_apis', unique = True, if_not_exists = True)

    socrata_catalogs = filter(lambda x: x[0] == 'socrata', catalogs())
    for _, catalog in socrata_catalogs:
        dt.upsert({
            'catalog': catalog.split('://')[-1],
            'apis': count_apis(catalog),
        }, 'socrata_apis')
开发者ID:angelchiara,项目名称:open-data,代码行数:13,代码来源:run.py

示例7: main

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
def main():
    edges = build_network()['edges']

    dt = DumpTruck(dbname = '/tmp/open-data.sqlite', adapt_and_convert = True)
    datasets_in = dt.execute('SELECT * FROM socrata')

    dt.create_table({'id': 'blah-blah'}, 'socrata_deduplicated')
    dt.create_index(['id'], 'socrata_deduplicated', if_not_exists = True, unique = True)

    for dataset in dedupe(datasets_in, edges):
        dt.upsert(dataset, 'socrata_deduplicated')
开发者ID:angelchiara,项目名称:open-data,代码行数:13,代码来源:socrata-federation-parse.py

示例8: to_sqlite3

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
def to_sqlite3():
    dt = DumpTruck('/tmp/open-data.sqlite', auto_commit = False)

    dummyrow = dict(zip(['software','catalog','identifier'], ['blah']*3))
    dt.create_table(dummyrow, 'datasets', if_not_exists = True)
    dt.create_index(['software','catalog','identifier'], 'datasets', if_not_exists = True, unique = True)

    for table in ['ckan','socrata']:
        dt.create_table({'catalog':'blah','identifier':'blah'}, table, if_not_exists = True)
        dt.create_index(['catalog','identifier'], table, if_not_exists = True, unique = True)

    dt.create_table({'view_id':'abc','table_id':123}, 'socrata_tables')
    dt.create_index(['view_id'], 'socrata_tables', if_not_exists = True, unique = True)
    dt.create_index(['table_id'], 'socrata_tables', if_not_exists = True)

    for dataset in datasets():
        row = {
            'software': dataset['software'],
            'catalog': dataset['catalog'],
            'identifier': dataset[SOFTWARE_MAP['identifier'][dataset['software']]],
        }
        sql = 'SELECT * FROM datasets WHERE software = ? AND catalog = ? AND identifier = ?'
        if dt.execute(sql, [row['software'],row['catalog'],row['identifier']]) != []:
            continue
        dt.upsert(row, 'datasets')
        if dataset['software'] == 'socrata':
            socrata_table = {
                'view_id': row['identifier'],
                'table_id': dataset['tableId'],
            }
            dt.upsert(socrata_table, 'socrata_tables')
        dt.upsert(dataset,dataset['software'])
        dt.commit()
开发者ID:angelchiara,项目名称:open-data,代码行数:35,代码来源:run.py

示例9: extract_dataset_table_info

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
def extract_dataset_table_info():
    dt = DumpTruck(dbname = '/tmp/table_info.db')
    dt.create_table({'portal': 'abc', 'id': 'abcd-efgh'}, 'table_info')
    dt.create_index(['portal', 'id'], 'table_info', unique = True)
    dt.create_index(['tableId'], 'table_info', unique = False)
    done = set([tuple(row.keys()) for row in dt.execute('SELECT portal, id FROM table_info')])
    for portal in os.listdir('data'):
        for viewid in os.listdir(os.path.join('data', portal, 'views')):
            if (portal, viewid) in done:
                continue
            d = _dataset_table_info(portal, viewid)
            if d == None:
                continue
            dt.upsert(d, 'table_info')
开发者ID:casunlight,项目名称:socrata-analysis,代码行数:16,代码来源:run.py

示例10: users

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
def users():
    dt = DumpTruck(dbname = '/tmp/socrata.db')
    dt.create_table({'id': 'abcd-efgh'}, 'user')
    dt.create_index(['id'], 'user', unique = True)

    _users = {}
    for portal in os.listdir('data'):
        for viewid in os.listdir(os.path.join('data', portal, 'views')):
            handle = open(os.path.join('data', portal, 'views', viewid), 'r')
            try:
                view = json.load(handle)
            except:
                # *cringe*
                continue
            handle.close()

            if view['owner']['id'] in _users:
                _users[view['owner']['id']]['views'].add(view['id'])
                try:
                    _users[view['owner']['id']]['publicationDates'].add((view['id'], view['publicationDate']))
                except:
                    return view
            else:
                _users[view['owner']['id']] = view['owner']
                _users[view['owner']['id']]['views'] = {view['id']}
                _users[view['owner']['id']]['tables'] = set()
                _users[view['owner']['id']]['publicationDates'] = set()

            if view['tableAuthor']['id'] in _users:
                _users[view['tableAuthor']['id']]['tables'].add(view['tableId'])
            else:
                _users[view['tableAuthor']['id']] = view['tableAuthor']
                _users[view['tableAuthor']['id']]['views'] = set()
                _users[view['tableAuthor']['id']]['tables'] = {view['tableId']}
                _users[view['tableAuthor']['id']]['publicationDates'] = set()

    for uid in _users.keys():
        for key in ['views', 'rights', 'tables']:
            if key in _users[uid]:
                _users[uid]['n_' + key] = len(_users[uid][key])
                del _users[uid][key]

    dt.insert(_users.values(), 'user')
    for uid, user in _users.items():
        for viewid, publicationDate in user['publicationDates']:
            dt.insert({'userid': user['id'], 'viewid': viewid, 'publicationDate': publicationDate}, 'publications', commit = False)
    dt.commit()
    return _users
开发者ID:casunlight,项目名称:socrata-analysis,代码行数:50,代码来源:run.py

示例11: get_links

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
def get_links(softwares = ['ckan','socrata']):
    dt = DumpTruck('/tmp/open-data.sqlite')

    dummyrow = dict(zip(['software','catalog','identifier', 'status_code', 'headers', 'error'], (['blah'] * 3) + ([234] * 1) + ([{'a':'b'}] * 2)))
    dt.create_table(dummyrow, 'links', if_not_exists = True)
    dt.create_index(['software','catalog','identifier'], 'links', if_not_exists = True, unique = True)

    for software in softwares:
        for catalog in read.catalogs(software):
            if SOCRATA_FIX.get(catalog, 'this is a string, not None') == None:
                continue
            try:
                for row in _check_catalog(software, catalog):
                    dt.upsert(row, 'links')
            except:
                print(os.path.join('downloads',software,catalog))
                raise
开发者ID:angelchiara,项目名称:open-data,代码行数:19,代码来源:run.py

示例12: check_links

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
def check_links():
    dt = DumpTruck('/tmp/open-data.sqlite', auto_commit = False)
    dt.create_index(['url'], 'links', if_not_exists = True, unique = False)
    dt.create_index(['status_code'], 'links', if_not_exists = True, unique = False)

    # Source
    urls = Queue()
    sql = '''
SELECT DISTINCT url
FROM links
WHERE (status_code = -42 OR status_code IS NULL) AND is_link AND url NOT NULL
ORDER BY status_code, substr(30, 100);
'''
    # Order by the substring so that we randomly bounce around catalogs

    url_list = [row['url'] for row in dt.execute(sql)]
    for url in url_list:
        urls.put(url)

    # Sink to the database
    def _db(queue):
        dt = DumpTruck('/tmp/open-data.sqlite')
        while True:
            dt.execute(*queue.get())
    db_updates = Queue()
    db_thread = Thread(None, target = _db, args = (db_updates,))
    db_thread.start()

    # Check links
    def _check_link(url_queue):
        while not urls.empty():
            url = url_queue.get()
            if url == None:
                raise ValueError('url is None')
            status_code, headers, error = links.is_alive(url)
            sql = 'UPDATE links SET status_code = ?, headers = ?, error = ? WHERE is_link = 1 AND url = ?'
            db_updates.put((sql, (status_code, headers, error, url)))
            print(url)

    threads = {}
    for i in range(100):
        threads[i] = Thread(None, target = _check_link, args = (urls,))

    for thread in threads.values():
        thread.start()
开发者ID:angelchiara,项目名称:open-data,代码行数:47,代码来源:run.py

示例13: test_non_unique

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
    def test_non_unique(self):
        dt = DumpTruck(dbname="/tmp/test.db")
        dt.execute("create table tomato (bar integer, baz integer);")
        dt.create_index(["bar", "baz"], "tomato")
        observed = dt.execute("PRAGMA index_info(tomato_bar_baz)")

        # Indexness
        self.assertIsNotNone(observed)

        # Indexed columns
        expected = [{u"seqno": 0, u"cid": 0, u"name": u"bar"}, {u"seqno": 1, u"cid": 1, u"name": u"baz"}]
        self.assertListEqual(observed, expected)

        # Uniqueness
        indices = dt.execute("PRAGMA index_list(tomato)")
        for index in indices:
            if index[u"name"] == u"tomato_bar_baz":
                break
        else:
            index = {}

        self.assertEqual(index[u"unique"], 0)
开发者ID:4bic,项目名称:entity_log,代码行数:24,代码来源:tests.py

示例14: scrape_main_article

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
def scrape_main_article(url):
    req = requests.get(url)
    doc = lxml.html.fromstring(req.text)

    div = doc.xpath("//*[@class='wrapper']")[0]
    div.remove(div.find("h1"))
    for para in div.findall("p"):
        if para.find("strong") is not None:
            div.remove(para)
    return htmlize(etree.tostring(div))

dt = DumpTruck(dbname="scotland.db")
dt.create_table({"Title": "",
                 "Publication date": "",
                 "Old URL": "",
                 "Summary": "",
                 "Body": "",
                 "Associated organisations": ""}, "news")
dt.create_index(["Title", "Old URL"], "news", unique=True)

for url in URLS:
    for news_item in scrape_list_page(url):
        attachments = json.loads(news_item.pop("Attachments"))
        link = attachments[0]["link"]
        news_item["Old URL"] = link
        news_item["Body"] = scrape_main_article(link)
        dt.upsert(news_item, "news")

dumptruck_to_csv(dt, "news", "/home/http/scotland/news.csv")
开发者ID:scraperdragon,项目名称:gov.uk,代码行数:31,代码来源:news.py

示例15: scrape_now

# 需要导入模块: from dumptruck import DumpTruck [as 别名]
# 或者: from dumptruck.DumpTruck import create_index [as 别名]
from dumptruck import DumpTruck
import random   
import os,sys
import logging
from sqlite3 import OperationalError
import datetime
import copy,types,time

logging.basicConfig(level=logging.DEBUG,file=sys.stdout,format='%(levelname)s %(asctime)-15s %(filename)s %(lineno)s %(message)s')
logger=logging.getLogger(os.path.split(__file__)[1])

_here=os.path.split(__file__)[0]

dt=DumpTruck(dbname=os.path.join(_here,"data/events.sqlite"))
try :
	dt.create_index(["id"],"events",unique=True)
	dt.create_index(["url"],"events",unique=True)
except OperationalError :
	pass

def scrape_now() :
	nn=0
	for l in listevents() :
		try :
			d=dt.execute("select count(*) as c from events where url='%(url)s'" % l)
		except OperationalError :
			d=[{ "c" : 0 }]
			pass
		if d[0]["c"]==0 :
			l.update(eventdata(l["url"]))
			logger.info("found new %s %s" % (l.get("id",l.get("threadid","")),l.get("title","")))
开发者ID:mvtango,项目名称:python-scraper-toolkit,代码行数:33,代码来源:scraper.py


注:本文中的dumptruck.DumpTruck.create_index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。