本文整理汇总了Python中mediawords.db.DatabaseHandler.find_or_create方法的典型用法代码示例。如果您正苦于以下问题:Python DatabaseHandler.find_or_create方法的具体用法?Python DatabaseHandler.find_or_create怎么用?Python DatabaseHandler.find_or_create使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mediawords.db.DatabaseHandler
的用法示例。
在下文中一共展示了DatabaseHandler.find_or_create方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add_story
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def add_story(db: DatabaseHandler, story: dict, feeds_id: int, skip_checking_if_new: bool = False) -> Optional[dict]:
"""If the story is new, add story to the database with the feed of the download as story feed.
Returns created story or None if story wasn't created.
"""
story = decode_object_from_bytes_if_needed(story)
if isinstance(feeds_id, bytes):
feeds_id = decode_object_from_bytes_if_needed(feeds_id)
feeds_id = int(feeds_id)
if isinstance(skip_checking_if_new, bytes):
skip_checking_if_new = decode_object_from_bytes_if_needed(skip_checking_if_new)
skip_checking_if_new = bool(int(skip_checking_if_new))
if db.in_transaction():
raise McAddStoryException("add_story() can't be run from within transaction.")
db.begin()
db.query("LOCK TABLE stories IN ROW EXCLUSIVE MODE")
if not skip_checking_if_new:
if not is_new(db=db, story=story):
log.debug("Story '{}' is not new.".format(story['url']))
db.commit()
return None
medium = db.find_by_id(table='media', object_id=story['media_id'])
if story.get('full_text_rss', None) is None:
story['full_text_rss'] = medium.get('full_text_rss', False) or False
if len(story.get('description', '')) == 0:
story['full_text_rss'] = False
try:
story = db.create(table='stories', insert_hash=story)
except Exception as ex:
db.rollback()
# FIXME get rid of this, replace with native upsert on "stories_guid" unique constraint
if 'unique constraint \"stories_guid' in str(ex):
log.warning(
"Failed to add story for '{}' to GUID conflict (guid = '{}')".format(story['url'], story['guid'])
)
return None
else:
raise McAddStoryException("Error adding story: {}\nStory: {}".format(str(ex), str(story)))
db.find_or_create(
table='feeds_stories_map',
insert_hash={
'stories_id': story['stories_id'],
'feeds_id': feeds_id,
}
)
db.commit()
return story
示例2: update_extractor_version_tag
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def update_extractor_version_tag(db: DatabaseHandler, story: dict) -> None:
"""Add extractor version tag to the story."""
# FIXME no caching because unit tests run in the same process so a cached tag set / tag will not be recreated.
# Purging such a cache manually is very error-prone.
story = decode_object_from_bytes_if_needed(story)
tag_set = db.find_or_create(table='tag_sets', insert_hash={'name': extractor_version_tag_sets_name()})
db.query("""
DELETE FROM stories_tags_map AS stm
USING tags AS t
JOIN tag_sets AS ts
ON ts.tag_sets_id = t.tag_sets_id
WHERE t.tags_id = stm.tags_id
AND ts.tag_sets_id = %(tag_sets_id)s
AND stm.stories_id = %(stories_id)s
""", {
'tag_sets_id': tag_set['tag_sets_id'],
'stories_id': story['stories_id'],
})
extractor_version = extractor_name()
tag = db.find_or_create(table='tags', insert_hash={'tag': extractor_version, 'tag_sets_id': tag_set['tag_sets_id']})
tags_id = tag['tags_id']
db.query("""
INSERT INTO stories_tags_map (stories_id, tags_id)
VALUES (%(stories_id)s, %(tags_id)s)
""", {'stories_id': story['stories_id'], 'tags_id': tags_id})
示例3: guess_medium
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def guess_medium(db: DatabaseHandler, story_url: str) -> dict:
"""Guess the media source for a story with the given url.
The guess is based on a normalized version of the host part of the url. The guess takes into account the
duplicate media relationships included in the postgres database through the media.dup_media_id fields. If
no appropriate media source exists, this function will create a new one and return it.
"""
(medium_url, medium_name) = generate_medium_url_and_name_from_url(story_url)
medium = lookup_medium(db, medium_url, medium_name)
if medium is not None:
return medium
normalized_medium_url = _normalize_url(medium_url)
normalized_story_url = _normalize_url(story_url)
all_urls = [normalized_medium_url, medium_url, normalized_story_url, story_url]
# avoid conflicts with existing media names and urls that are missed
# by the above query b/c of dups feeds or foreign_rss_links
medium_name = get_unique_medium_name(db, [medium_name] + all_urls)
medium_url = get_unique_medium_url(db, all_urls)
# a race condition with another thread can cause this to fail sometimes, but after the medium in the
# other process has been created, all should be fine
for i in range(_GUESS_MEDIUM_RETRIES):
medium_data = {'name': medium_name, 'url': medium_url, 'normalized_url': normalized_medium_url}
medium = db.find_or_create('media', medium_data)
if medium is not None:
break
else:
time.sleep(1)
if medium is None:
raise McTopicMediaUniqueException(
"Unable to find or create medium for %s / %s" % (medium_name, medium_url))
log.info("add medium: %s / %s / %d" % (medium_name, medium_url, medium['media_id']))
spidered_tag = get_spidered_tag(db)
db.find_or_create('media_tags_map', {'media_id': medium['media_id'], 'tags_id': spidered_tag['tags_id']})
return medium
示例4: assign_date_guess_tag
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def assign_date_guess_tag(
db: DatabaseHandler,
story: dict,
date_guess: GuessDateResult,
fallback_date: typing.Optional[str]) -> None:
"""Assign a guess method tag to the story based on the date_guess result.
If date_guess found a result, assign a date_guess_method:guess_by_url, guess_by_tag_*, or guess_by_uknown tag.
Otherwise if there is a fallback_date, assign date_guess_metehod:fallback_date. Else assign
date_invalid:date_invalid.
Arguments:
db - db handle
story - story dict from db
date_guess - GuessDateResult from guess_date() call
Returns:
None
"""
if date_guess.found:
tag_set = mediawords.tm.guess_date.GUESS_METHOD_TAG_SET
guess_method = date_guess.guess_method
if guess_method.startswith('Extracted from url'):
tag = 'guess_by_url'
elif guess_method.startswith('Extracted from tag'):
match = re2.search(r'\<(\w+)', guess_method)
html_tag = match.group(1) if match is not None else 'unknown'
tag = 'guess_by_tag_' + str(html_tag)
else:
tag = 'guess_by_unknown'
elif fallback_date is not None:
tag_set = mediawords.tm.guess_date.GUESS_METHOD_TAG_SET
tag = 'fallback_date'
else:
tag_set = mediawords.tm.guess_date.INVALID_TAG_SET
tag = mediawords.tm.guess_date.INVALID_TAG
ts = db.find_or_create('tag_sets', {'name': tag_set})
t = db.find_or_create('tags', {'tag': tag, 'tag_sets_id': ts['tag_sets_id']})
db.query("delete from stories_tags_map where stories_id = %(a)s", {'a': story['stories_id']})
db.query(
"insert into stories_tags_map (stories_id, tags_id) values (%(a)s, %(b)s)",
{'a': story['stories_id'], 'b': t['tags_id']})
示例5: get_spidered_tag
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def get_spidered_tag(db: DatabaseHandler) -> dict:
"""Return the spidered:spidered tag dict."""
spidered_tag = db.query(
"""
select t.*
from tags t
join tag_sets ts using ( tag_sets_id )
where
t.tag = %(a)s and
ts.name = %(b)s
""",
{'a': SPIDERED_TAG_TAG, 'b': SPIDERED_TAG_SET}).hash()
if spidered_tag is None:
tag_set = db.find_or_create('tag_sets', {'name': SPIDERED_TAG_SET})
spidered_tag = db.find_or_create('tags', {'tag': SPIDERED_TAG_TAG, 'tag_sets_id': tag_set['tag_sets_id']})
return spidered_tag
示例6: get_spider_feed
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def get_spider_feed(db: DatabaseHandler, medium: dict) -> dict:
"""Find or create the 'Spider Feed' feed for the media source."""
feed = db.query(
"select * from feeds where media_id = %(a)s and name = %(b)s",
{'a': medium['media_id'], 'b': SPIDER_FEED_NAME}).hash()
if feed is not None:
return feed
return db.find_or_create('feeds', {
'media_id': medium['media_id'],
'url': medium['url'] + '#spiderfeed',
'name': SPIDER_FEED_NAME,
'active': False,
})