本文整理汇总了Python中sqlaload.upsert函数的典型用法代码示例。如果您正苦于以下问题:Python upsert函数的具体用法?Python upsert怎么用?Python upsert使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了upsert函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_transcript
def load_transcript(engine, wp, session, incremental=True):
url = URL % (wp, session)
Speech = sl.get_table(engine, 'speech')
if incremental and sl.find_one(engine, Speech,
source_url=url, matched=True):
return True
if '404 Seite nicht gefunden' in fetch(url):
return False
sio = fetch_stream(url)
if sio is None:
return False
log.info("Loading transcript: %s/%s" % (wp, session))
seq = 0
parser = SpeechParser(engine, sio)
for contrib in parser:
if not len(contrib['text'].strip()):
continue
contrib['sitzung'] = session
contrib['sequence'] = seq
contrib['wahlperiode'] = wp
contrib['source_url'] = url
contrib['matched'] = True
sl.upsert(engine, Speech, contrib,
unique=['sequence', 'sitzung', 'wahlperiode'])
seq += 1
if parser.missing_recon:
sl.upsert(engine, Speech, {
'matched': False,
'sitzung': session,
'wahlperiode': wp
}, unique=['sitzung', 'wahlperiode'])
return True
示例2: update_network_entities
def update_network_entities(engine, file_name):
log.info("Updating network entities reference sheet: %s", file_name)
network_entities = set()
table = sl.get_table(engine, 'network_entity')
if os.path.exists(file_name):
fh = open(file_name, 'rb')
reader = csv.DictReader(fh)
for d in reader:
e = [(k, v.decode('utf-8')) for (k, v) in d.items()]
e = dict(e)
network_entities.add((e['representativeEtlId'], e['etlFingerPrint']))
sl.upsert(engine, table, e, ['representativeEtlId', 'etlFingerPrint'])
fh.close()
reps = set([ne[0] for ne in network_entities])
rep_table = sl.get_table(engine, 'representative')
for rep in reps:
sl.update(engine, rep_table, {'etlId': rep}, {'network_extracted': True})
for row in sl.all(engine, table):
network_entities.add((row['representativeEtlId'], row['etlFingerPrint']))
fh = open(file_name, 'wb')
writer = None
table = sl.get_table(engine, 'network_entity')
for ic, fp in network_entities:
row = {
'representativeEtlId': ic,
'etlFingerPrint': fp
}
if writer is None:
writer = csv.DictWriter(fh, row.keys())
writer.writerow(dict(zip(row.keys(), row.keys())))
r = [(k, unicode(v).encode('utf-8')) for k, v in row.items()]
writer.writerow(dict(r))
fh.close()
示例3: generate_person_long_names
def generate_person_long_names(engine):
log.info("Generating person fingerprints and slugs...")
from offenesparlament.transform.namematch import match_speaker
nkp = nk_persons()
Person = sl.get_table(engine, 'person')
for person in sl.find(engine, Person):
long_name = make_long_name(person)
try:
long_name = match_speaker(long_name)
except NKNoMatch:
pass
log.info(" -> %s" % long_name.strip())
slug = url_slug(long_name)
sl.upsert(engine, Person, {
'fingerprint': long_name,
'slug': slug,
'id': person['id']},
unique=['id'])
tries = 0
while True:
try:
nkp.ensure_value(long_name, data=person)
except ValueError, E:
log.warn('Exception: %s' % str(E))
tries = tries + 1
if tries > 5:
raise
else:
break
示例4: merge_speeches
def merge_speeches(engine):
# desired result: (position_id, debatte_id)
referenzen = referenzen_index(engine)
items = item_index(engine)
log.info("Finding best matches.... ")
matches = {}
for (ablauf_id, rwp, rsession), rdrs in referenzen.items():
for (iwp, isession, item_id), idrs in items.items():
if iwp != rwp or rsession != isession:
continue
ints = len(idrs.intersection(rdrs))
if ints == 0:
continue
k = (ablauf_id, rwp, rsession)
if k in matches and matches[k][1] > ints:
continue
matches[k] = (item_id, ints)
log.info("Saving position associations....")
pos_tbl = sl.get_table(engine, 'position')
for (ablauf_id, wp, session), (item_id, n) in matches.items():
for pos in sl.find(engine, pos_tbl, ablauf_id="%s/%s" % (wp, ablauf_id)):
if not pos['fundstelle_url']:
continue
if 'btp/%s/%s%03d.pdf' % (wp, wp, int(session)) in pos['fundstelle_url']:
d = {'ablauf_id': pos['ablauf_id'],
'hash': pos['hash'],
'debatte_wp': wp,
'debatte_session': session,
'debatte_item_id': item_id}
sl.upsert(engine, pos_tbl, d, unique=['ablauf_id', 'hash'])
示例5: extend_position
def extend_position(engine, table, data):
dt, rest = data['fundstelle'].split("-", 1)
data['date'] = datetime.strptime(dt.strip(), "%d.%m.%Y").isoformat()
if ',' in data['urheber']:
typ, quelle = data['urheber'].split(',', 1)
data['quelle'] = re.sub("^.*Urheber.*:", "", quelle).strip()
data['typ'] = typ.strip()
else:
data['typ'] = data['urheber']
br = 'Bundesregierung, '
if data['urheber'].startswith(br):
data['urheber'] = data['urheber'][len(br):]
data['fundstelle_doc'] = None
if data['fundstelle_url'] and \
'btp' in data['fundstelle_url']:
data['fundstelle_doc'] = data['fundstelle_url']\
.rsplit('#',1)[0]
hash = sha1(data['fundstelle'].encode('utf-8') \
+ data['urheber'].encode('utf-8') + \
data['source_url'].encode('utf-8')).hexdigest()
data['hash'] = hash[:10]
sl.upsert(engine, table, data, unique=['id'])
示例6: extend_positions
def extend_positions(engine):
log.info("Amending positions ...")
Position = sl.get_table(engine, 'position')
for i, data in enumerate(sl.find(engine, Position)):
if i % 1000 == 0:
sys.stdout.write('.')
sys.stdout.flush()
dt, rest = data['fundstelle'].split("-", 1)
data['date'] = datetime.strptime(dt.strip(), "%d.%m.%Y").isoformat()
if ',' in data['urheber']:
typ, quelle = data['urheber'].split(',', 1)
data['quelle'] = re.sub("^.*Urheber.*:", "", quelle).strip()
data['typ'] = typ.strip()
else:
data['typ'] = data['urheber']
br = 'Bundesregierung, '
if data['urheber'].startswith(br):
data['urheber'] = data['urheber'][len(br):]
data['fundstelle_doc'] = None
if data['fundstelle_url'] and \
'btp' in data['fundstelle_url']:
data['fundstelle_doc'] = data['fundstelle_url']\
.rsplit('#',1)[0]
hash = sha1(data['fundstelle'].encode('utf-8') \
+ data['urheber'].encode('utf-8') + \
data['ablauf_id'].encode('utf-8')).hexdigest()
data['hash'] = hash[:10]
sl.upsert(engine, Position, data, unique=UNIQUE)
示例7: cleanup_resource
def cleanup_resource(engine, source_table, row, force):
if not row["combine_status"]:
return
# Skip over tables we have already cleaned up
if (
not force
and sl.find_one(
engine, source_table, resource_id=row["resource_id"], cleanup_status=True, cleanup_hash=row["combine_hash"]
)
is not None
):
return
log.info("Cleanup: %s, Resource %s", row["package_name"], row["resource_id"])
status = True
for sheet_id in range(0, row["sheets"]):
sheet_status = cleanup_sheet(engine, row, sheet_id)
if status and not sheet_status:
status = False
sl.upsert(
engine,
source_table,
{"resource_id": row["resource_id"], "cleanup_hash": row["combine_hash"], "cleanup_status": status},
unique=["resource_id"],
)
示例8: map_columns
def map_columns():
engine, columns_table = connect()
q = select([columns_table.c.normalised, columns_table.c.count, columns_table.c.valid], order_by=[columns_table.c.count.desc().nullslast()])
for normalised, count, valid in engine.execute(q):
if valid is not None:
continue
try:
columns = map_column(engine, columns_table, normalised, count)
if columns is not None:
sl.upsert(engine, columns_table,
{'normalised': normalised,
'valid': True,
'column_map': json.dumps(columns)},
['normalised'])
else:
sl.upsert(engine, columns_table,
{'normalised': normalised,
'valid': False},
['normalised'])
except SystemExit:
raise
except:
traceback.print_exc()
示例9: scrape_transcript
def scrape_transcript(engine, url, force=False):
wp, session = url_metadata(url)
table = sl.get_table(engine, 'speech')
sample = sl.find_one(engine, table, source_url=url, matched=True)
response, sio = fetch_stream(url)
sample = check_tags(sample or {}, response, force)
base_data = {'source_url': url,
'sitzung': session,
'wahlperiode': wp,
'matched': False,
'loaded': False,
'source_etag': sample['source_etag']}
log.info("Loading transcript: %s/%s, from %s" , wp, session, url)
seq = 0
parser = SpeechParser(sio)
for contrib in parser:
if not len(contrib['text'].strip()):
continue
contrib.update(base_data)
contrib['sequence'] = seq
sl.upsert(engine, table, contrib,
unique=['source_url', 'sequence'])
seq += 1
if not parser.missing_recon:
sl.upsert(engine, table, {
'matched': True,
'source_url': url,
}, unique=['source_url'])
else:
raise InvalidReference()
return base_data
示例10: load_budget
def load_budget(base_url, year, engine, table):
context = {'data_year': year}
print "\nHaushalt: %s" % year
i = 0
for row in load_einzelplaene(base_url % year, context):
row['titel_id'] = row['id']
del row['id']
row['remarks'] = "\n\n".join(row['remarks'])
commitment_appropriations = row['commitment_appropriations'].copy()
del row['commitment_appropriations']
#if len(commitment_appropriations):
# #print len(commitment_appropriations)
row['commitment_year'] = None
row['source_id'] = str(year) + "." + str(i)
sl.upsert(engine, table, row, UNIQUE_COLUMNS)
i += 1
for year, amount in commitment_appropriations.items():
ca = row.copy()
ca['commitment_year'] = context['data_year']
ca['year'] = year
ca['amount'] = amount
ca['financial_type'] = 'VE'
ca['source_id'] = str(year) + "." + str(i)
sl.upsert(engine, table, ca, UNIQUE_COLUMNS)
i += 1
示例11: make_fingerprint
def make_fingerprint(engine, person):
try:
long_name = make_long_name(person)
try:
long_name = resolve_person(long_name)
log.info(" -> %s" % long_name.strip())
except:
log.error("Resolve did not work")
pass
Person = sl.get_table(engine, 'person')
sl.upsert(engine, Person, {
'fingerprint': long_name,
'slug': url_slug(long_name),
'mdb_id': person['mdb_id']
}, unique=['mdb_id'])
Rolle = sl.get_table(engine, 'rolle')
sl.upsert(engine, Rolle, {
'mdb_id': person['mdb_id'],
'fingerprint': long_name
}, unique=['mdb_id'])
person['fingerprint'] = long_name
except BadReference:
log.error("Bad Reference %s", person)
pass
示例12: process_rows
def process_rows(handlefunc, engine=None):
if engine is None:
engine = make_engine()
table = sl.get_table(engine, 'fts')
for row in sl.all(engine, table):
out = handlefunc(row)
sl.upsert(engine, table, out, ['id'])
return table
示例13: condense
def condense(engine, resource_id, table_id, force):
table_suffix = '%s_table%s' % (resource_id, table_id)
if not engine.has_table('raw_%s' % table_suffix):
return
condensed_table = sl.get_table(engine, 'condensed')
# Skip over tables we have already extracted
if not force and sl.find_one(engine, condensed_table, resource_id=resource_id, table_id=table_id) is not None:
return
connection = engine.connect()
trans = connection.begin()
start = time.time()
try:
raw_table = sl.get_table(connection, 'raw_%s' % table_suffix)
sl.drop_table(connection, 'spending_%s' % table_suffix)
spending_table = sl.get_table(connection, 'spending_%s' % table_suffix)
columns_table = sl.get_table(connection, 'column_sets')
normalise_map = normalised_columns_map(raw_table)
normalised_headers = ','.join(sorted(normalise_map.values()))
mapping_row = sl.find_one(connection, columns_table, normalised=normalised_headers)
if mapping_row is None or not mapping_row.get('valid'):
# This table is unmapped, cannot be condensed
return
column_mapping = json.loads(mapping_row['column_map'])
# Build the final mapping from input column to output column
mapping = {}
for k,n in normalise_map.iteritems():
if n in column_mapping and column_mapping[n] is not None and len(column_mapping[n]) > 0:
mapping[k] = column_mapping[n]
for row in sl.all(connection, raw_table):
spending_row = {}
for key, value in row.items():
if key not in mapping:
continue
if not value or not len(value.strip()):
continue
if mapping[key] in spending_row:
continue
spending_row[mapping[key]] = value.strip()
#print spending_row
sl.add_row(connection, spending_table, spending_row)
sl.upsert(connection, condensed_table, {'resource_id': resource_id,
'table_id': table_id,
'condense_time': time.time() - start,
}, ['resource_id', 'table_id'])
trans.commit()
finally:
connection.close()
示例14: speechmatcher_alignment_post
def speechmatcher_alignment_post(wp, session):
engine = etl_engine()
table = sl.get_table(engine, 'alignments')
data = dict(request.form.items())
data['sequence'] = int(data['sequence'])
data['wp'] = wp
data['session'] = session
sl.upsert(engine, table, data, ['wp', 'session', 'sequence'])
return speechmatcher_alignment_get(wp, session)
示例15: merge
def merge():
engine = util.make_engine()
table = sl.get_table(engine, 'fts')
for row in sl.distinct(engine, table, 'beneficiary', 'country_code'):
canonical, uri, score = lookup(row.get('beneficiary'), row.get('country_code'), engine)
row['beneficiary_canonical'] = canonical
row['beneficiary_uri'] = uri
row['beneficiary_score'] = score
sl.upsert(engine, table, row, ['beneficiary', 'country'])