本文整理汇总了Python中DB.conn方法的典型用法代码示例。如果您正苦于以下问题:Python DB.conn方法的具体用法?Python DB.conn怎么用?Python DB.conn使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DB
的用法示例。
在下文中一共展示了DB.conn方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: find_or_create
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def find_or_create(journo, hint_art=None, expected_journo=None):
""" returns journo id """
j_id = FindJourno(DB.conn(),journo['name'],hint_art,expected_journo)
if j_id is None:
j = create(journo['name'])
ukmedia.DBUG2(" NEW journo: %s\n" % (j['ref'],))
j_id = j['id']
return j_id
示例2: update_activation
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def update_activation(journo_id):
""" activate the journos status if they've got more than one active article and they've not been hidden """
q = DB.conn().cursor()
# count number of articles
q.execute( "SELECT COUNT(*) FROM journo_attr ja INNER JOIN article a ON (a.id=ja.article_id AND a.status='a') WHERE ja.journo_id=%s", (journo_id,))
r = q.fetchone()
if r[0] > 1:
q.execute( "UPDATE journo SET status='a' WHERE id=%s AND status='i'",(journo_id,))
q.close()
示例3: GetAttrLogStr
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def GetAttrLogStr(article_id ):
""" return a list of attributed journos for logging eg "[a1234 fred blogs], [a4321 bob roberts]" """
c = DB.conn().cursor()
sql = """
SELECT j.id,j.ref,j.prettyname
FROM ( JOURNO j INNER JOIN journo_attr attr ON attr.journo_id=j.id )
WHERE attr.article_id=%s
"""
c.execute( sql, (article_id,) )
rows = c.fetchall()
return ", ".join( [ "[j%d %s]" % (int(row['id']),row['prettyname']) for row in rows ] )
示例4: ArticleExists
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def ArticleExists( self, srcid ):
"""returns article id, if article is already in the DB"""
article_id = None
cursor = DB.conn().cursor()
q = 'SELECT id FROM article WHERE srcid=%s'
cursor.execute( q, ( srcid, ) )
r = cursor.fetchone()
if r:
article_id = r[0]
cursor.close()
return article_id
示例5: upsert
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def upsert(article_id, commentlink):
"""insert/replace commentlink"""
c = DB.conn().cursor()
l = commentlink
# fill in optional args
if 'score' not in l:
l['score'] = None
if 'num_comments' not in l:
l['num_comments'] = None
c.execute( """DELETE FROM article_commentlink WHERE article_id=%s and source=%s""", (article_id, l['source']) )
c.execute( """INSERT INTO article_commentlink (article_id,source,comment_url,num_comments,score ) VALUES (%s,%s,%s,%s,%s)""",
( article_id, l['source'], l['comment_url'], l['num_comments'], l['score'] ) )
示例6: GetOrgID
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def GetOrgID(shortname):
"""Look up org id using shortname"""
global cached_orgidmap
if cached_orgidmap == None:
cached_orgidmap = {}
c = DB.conn().cursor()
c.execute( "SELECT id,shortname FROM organisation" )
while 1:
row = c.fetchone()
if row == None:
break
cached_orgidmap[ row['shortname'] ] = row['id']
c.close()
return cached_orgidmap[ shortname ]
示例7: generate
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def generate(article_id, article_content):
""" Generate tags for an article """
txt = ukmedia.StripHTML(article_content)
tags = ExtractFromText(txt)
# write the tags into the DB
c2 = DB.conn().cursor()
c2.execute("DELETE FROM article_tag WHERE article_id=%s", (article_id,))
for tagkey, tagfreq in tags.items():
tagname = tagkey[0].encode("utf-8")
tagkind = tagkey[1]
c2.execute(
"INSERT INTO article_tag (article_id, tag, kind, freq) VALUES (%s,%s,%s,%s)",
(article_id, tagname, tagkind, tagfreq),
)
c2.close()
示例8: GetBlacklist
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def GetBlacklist():
"""get the list of banned tags from the db"""
global blacklist_cached
# don't hit db more often that necessary
if blacklist_cached != None:
return blacklist_cached
c = DB.conn().cursor()
c.execute("SELECT bannedtag FROM tag_blacklist")
blacklist_cached = []
while 1:
r = c.fetchone()
if not r:
break
tag = r[0].decode("utf-8")
blacklist_cached.append(tag)
c.close()
return blacklist_cached
示例9: scrape_articles
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
#.........这里部分代码省略.........
context['permalink'] = canonical_url
# strip off "?rss=yes" etc from permalink
tidied_url = tidy_url(context['permalink'])
if tidied_url != context['permalink']:
context['permalink'] = tidied_url
known_urls.add(tidied_url)
context['urls'] = known_urls
# check that all urls are OK (eg express.co.uk have a habit of publishing borked ones for blogs)
for url in known_urls:
url.encode('utf-8') # will raise an exception if dud
# repeat url-based existence check with the urls we now have
# TODO: if so, add any new urls... maybe rescrape and update article?
article_id = None
got = store.find_article(known_urls)
if len(got) > 0:
if extralogging:
for article_id in got:
ukmedia.DBUG( u"already got %s [a%s] (attributed to: %s)\n" % (context['srcurl'], article_id,GetAttrLogStr(article_id)))
if not opts.force_rescrape:
had_count += 1
continue; # skip it - we've already got it
else:
assert(len(got) == 1)
article_id = got[0]
# some extra, last minute context :-)
context[ 'lastscraped' ] = datetime.now()
art = extract(html, context, **kwargs)
if art:
# set the srcorg id for the article
if 'srcorgname' in art and art['srcorgname'] is not None:
srcorg = Misc.GetOrgID( art[ 'srcorgname' ] )
else:
# no publication specified - look up using domain name
o = urlparse.urlparse(art['permalink'])
domain = o[1].lower()
srcorg = Publication.find_or_create(domain)
art['srcorg'] = srcorg
# resolve bylined authors to journo ids
authors = Byline.CrackByline(art['byline'])
attributed = []
for author in authors:
attributed.append(Journo.find_or_create(author, art, expected_journo))
art['journos'] = attributed
if opts.test:
ukmedia.PrettyDump( art )
if article_id:
# rescraping existing article
art['id'] = article_id
article_id = store.upsert( art )
rescrape_count += 1
else:
#
article_id = store.upsert( art )
newcount += 1
if opts.test:
DB.conn().rollback()
else:
DB.conn().commit()
except Exception, err:
DB.conn().rollback()
# always just bail out upon ctrl-c
if isinstance( err, KeyboardInterrupt ):
raise
failcount = failcount+1
# TODO: phase out NonFatal! just get scraper to print out a warning message instead
if isinstance( err, ukmedia.NonFatal ):
continue
report = traceback.format_exc()
if 'title' in context:
msg = u"FAILED (%s): '%s' (%s)" % (err, context['title'], context['srcurl'])
else:
msg = u"FAILED (%s): (%s)" % (err,context['srcurl'])
ukmedia.DBUG( msg + "\n" )
ukmedia.DBUG2( report + "\n" )
ukmedia.DBUG2( '-'*60 + "\n" )
abortcount = abortcount + 1
if abortcount > max_errors:
print >>sys.stderr, "Too many errors - ABORTING"
raise
示例10: upsert
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def upsert( self, art ):
"""Insert or update an article"""
# if no separate 'urls' set, create it
if not 'urls' in art:
art['urls'] = set((art['permalink'], art['srcurl']))
# fill in some defaults if missing
if 'lastscraped' not in art:
art['lastscraped'] = datetime.now()
if 'lastseen' not in art:
art['lastseen'] = datetime.now()
if 'description' not in art:
art['description'] = ukmedia.FirstPara(art['content'])
CheckArticle( art )
# send text to the DB as utf-8
title = art['title'].encode( 'utf-8' )
byline = art[ 'byline' ].encode( 'utf-8' )
description = art['description'].encode( 'utf-8' )
pubdate = "%s" %(art['pubdate'])
lastscraped = "%s" % (art['lastscraped'])
lastseen = "%s" % (art['lastseen'])
firstseen = lastseen # it's a new entry
srcurl = art['srcurl']
permalink = art['permalink']
srcorg = art['srcorg']
# phasing out srcid...
if 'srcid' in art:
srcid = art['srcid']
else:
srcid = art['permalink']
wordcount = None
content = None
# does article include content?
if 'content' in art:
content = art['content'].encode( 'utf-8' )
# noddy wordcount
txt = ukmedia.StripHTML( art['content'] )
wordcount = len( txt.split() );
# send to db!
cursor = DB.conn().cursor()
updating = False
if 'id' in art:
updating = True
if updating:
# update existing
article_id = art['id']
q = 'UPDATE article SET (title, byline, description, lastscraped, pubdate, lastseen, permalink, srcurl, srcorg, srcid, wordcount, last_comment_check) = (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) WHERE id=%s'
cursor.execute(q, (title, byline, description, lastscraped, pubdate, lastseen, permalink, srcurl, srcorg, srcid, wordcount, lastscraped, article_id))
else:
# insert new
q = 'INSERT INTO article (title, byline, description, lastscraped, pubdate, firstseen, lastseen, permalink, srcurl, srcorg, srcid, wordcount, last_comment_check) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
cursor.execute( q, ( title, byline, description, lastscraped, pubdate, firstseen, lastseen, permalink, srcurl, srcorg, srcid, wordcount, lastscraped ) )
# get the newly-allocated id
cursor.execute( "select currval('article_id_seq')" )
article_id = cursor.fetchone()[0]
# add the known urls for the article
if updating:
cursor.execute( "DELETE FROM article_url WHERE article_id=%s", (article_id,))
for url in set(art['urls']):
cursor.execute( "INSERT INTO article_url (url,article_id) VALUES (%s,%s)", (url,article_id))
# update content, if included
if content is None:
insert_content = False
else:
insert_content = True
if updating:
# TODO: keep multiple revisions to track changes
# has the content actually changed?
cursor.execute("SELECT id FROM article_content WHERE article_id=%s AND content=%s", (article_id,content))
foo = cursor.fetchall() # gah... couldn't get cursor.rowcount to work...
if len(foo)>=1:
# no change, so just leave it as is
insert_content = False
if insert_content:
cursor.execute("DELETE FROM article_content WHERE article_id=%s", (article_id,))
q = 'INSERT INTO article_content (article_id, content,scraped) VALUES ( %s,%s,%s )'
cursor.execute(q, (article_id, content, lastscraped))
# queue it for xapian indexing
cursor.execute("DELETE FROM article_needs_indexing WHERE article_id=%s", (article_id,))
cursor.execute("INSERT INTO article_needs_indexing (article_id) VALUES (%s)", (article_id,))
# if there was a scraper error entry for this article, delete it now
cursor.execute( "DELETE FROM error_articlescrape WHERE srcid=%s", (srcid,) )
#.........这里部分代码省略.........
示例11: find_article
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def find_article(self,known_urls):
sql = "SELECT article_id FROM article_url WHERE url IN (" + ','.join(['%s' for u in known_urls]) + ")"
c = DB.conn().cursor()
c.execute(sql,list(known_urls))
return [row['article_id'] for row in c]
示例12: create
# 需要导入模块: import DB [as 别名]
# 或者: from DB import conn [as 别名]
def create(rawname ):
conn = DB.conn()
#gtb alias = DefaultAlias( rawname )
prettyname = GetPrettyNameFromRawName( conn, rawname )
# (firstname,lastname) = prettyname.split(None,1)
# gtb, this is a hack! until we sort out what we are doing with journalists who want to opt out of being in the database:
if prettyname==u'Jini Reddy':
raise Exception, "Not creating New Journo who has opted out"
parts = prettyname.lower().split()
if len(parts) == 0:
raise "Empty journo name!"
elif len(parts) == 1:
firstname = parts[0]
lastname = parts[0]
else:
firstname = parts[0]
lastname = parts[-1]
# get metaphone versions of names (as calculated by php metaphone())
# 4 chars seems like the magic length for fuzzy matching.
# (utf-8 encoding a little silly, but consistent assumptions on the web side of things)
firstname_metaphone = metaphone.php_metaphone( firstname.encode('utf-8') )[:4]
lastname_metaphone = metaphone.php_metaphone( lastname.encode('utf-8') )[:4]
ref = GenerateUniqueRef( conn, prettyname )
#print("CreateNewJourno: ",rawname," = ",prettyname," = ",ref);
# TODO: maybe need to filter out some chars from ref?
q = conn.cursor()
q.execute( "select nextval('journo_id_seq')" )
(journo_id,) = q.fetchone()
q.execute( "INSERT INTO journo (id,ref,prettyname,firstname,lastname,firstname_metaphone,lastname_metaphone,"
"created) VALUES (%s,%s,%s,%s,%s,%s,%s,now())",
( journo_id,
ref.encode('utf-8'),
prettyname.encode('utf-8'),
firstname.encode('utf-8'),
lastname.encode('utf-8'),
firstname_metaphone,
lastname_metaphone ) )
#gtb q.execute( "INSERT INTO journo_alias (journo_id,alias) VALUES (%s,%s)",
# journo_id,
# alias.encode('utf-8') )
q.close()
journo = {
'id':journo_id,
'ref':ref,
'prettyname':prettyname,
'firstname':firstname,
'lastname': lastname,
'firstname_metaphone': firstname_metaphone,
'lastname_metaphone': lastname_metaphone,
}
return journo