当前位置: 首页>>代码示例>>Python>>正文


Python DatabaseHandler.find_or_create方法代码示例

本文整理汇总了Python中mediawords.db.DatabaseHandler.find_or_create方法的典型用法代码示例。如果您正苦于以下问题:Python DatabaseHandler.find_or_create方法的具体用法?Python DatabaseHandler.find_or_create怎么用?Python DatabaseHandler.find_or_create使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mediawords.db.DatabaseHandler的用法示例。


在下文中一共展示了DatabaseHandler.find_or_create方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: add_story

# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def add_story(db: DatabaseHandler, story: dict, feeds_id: int, skip_checking_if_new: bool = False) -> Optional[dict]:
    """If the story is new, add story to the database with the feed of the download as story feed.

    Returns created story or None if story wasn't created.
    """

    story = decode_object_from_bytes_if_needed(story)
    if isinstance(feeds_id, bytes):
        feeds_id = decode_object_from_bytes_if_needed(feeds_id)
    feeds_id = int(feeds_id)
    if isinstance(skip_checking_if_new, bytes):
        skip_checking_if_new = decode_object_from_bytes_if_needed(skip_checking_if_new)
    skip_checking_if_new = bool(int(skip_checking_if_new))

    if db.in_transaction():
        raise McAddStoryException("add_story() can't be run from within transaction.")

    db.begin()

    db.query("LOCK TABLE stories IN ROW EXCLUSIVE MODE")

    if not skip_checking_if_new:
        if not is_new(db=db, story=story):
            log.debug("Story '{}' is not new.".format(story['url']))
            db.commit()
            return None

    medium = db.find_by_id(table='media', object_id=story['media_id'])

    if story.get('full_text_rss', None) is None:
        story['full_text_rss'] = medium.get('full_text_rss', False) or False
        if len(story.get('description', '')) == 0:
            story['full_text_rss'] = False

    try:
        story = db.create(table='stories', insert_hash=story)
    except Exception as ex:
        db.rollback()

        # FIXME get rid of this, replace with native upsert on "stories_guid" unique constraint
        if 'unique constraint \"stories_guid' in str(ex):
            log.warning(
                "Failed to add story for '{}' to GUID conflict (guid = '{}')".format(story['url'], story['guid'])
            )
            return None

        else:
            raise McAddStoryException("Error adding story: {}\nStory: {}".format(str(ex), str(story)))

    db.find_or_create(
        table='feeds_stories_map',
        insert_hash={
            'stories_id': story['stories_id'],
            'feeds_id': feeds_id,
        }
    )

    db.commit()

    return story
开发者ID:berkmancenter,项目名称:mediacloud,代码行数:62,代码来源:stories.py

示例2: update_extractor_version_tag

# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def update_extractor_version_tag(db: DatabaseHandler, story: dict) -> None:
    """Add extractor version tag to the story."""
    # FIXME no caching because unit tests run in the same process so a cached tag set / tag will not be recreated.
    # Purging such a cache manually is very error-prone.

    story = decode_object_from_bytes_if_needed(story)

    tag_set = db.find_or_create(table='tag_sets', insert_hash={'name': extractor_version_tag_sets_name()})

    db.query("""
        DELETE FROM stories_tags_map AS stm
            USING tags AS t
                JOIN tag_sets AS ts
                    ON ts.tag_sets_id = t.tag_sets_id
        WHERE t.tags_id = stm.tags_id
          AND ts.tag_sets_id = %(tag_sets_id)s
          AND stm.stories_id = %(stories_id)s
    """, {
        'tag_sets_id': tag_set['tag_sets_id'],
        'stories_id': story['stories_id'],
    })

    extractor_version = extractor_name()
    tag = db.find_or_create(table='tags', insert_hash={'tag': extractor_version, 'tag_sets_id': tag_set['tag_sets_id']})
    tags_id = tag['tags_id']

    db.query("""
        INSERT INTO stories_tags_map (stories_id, tags_id)
        VALUES (%(stories_id)s, %(tags_id)s)
    """, {'stories_id': story['stories_id'], 'tags_id': tags_id})
开发者ID:berkmancenter,项目名称:mediacloud,代码行数:32,代码来源:extractor_version.py

示例3: guess_medium

# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def guess_medium(db: DatabaseHandler, story_url: str) -> dict:
    """Guess the media source for a story with the given url.

    The guess is based on a normalized version of the host part of the url.  The guess takes into account the
    duplicate media relationships included in the postgres database through the media.dup_media_id fields.  If
    no appropriate media source exists, this function will create a new one and return it.

    """
    (medium_url, medium_name) = generate_medium_url_and_name_from_url(story_url)

    medium = lookup_medium(db, medium_url, medium_name)

    if medium is not None:
        return medium

    normalized_medium_url = _normalize_url(medium_url)
    normalized_story_url = _normalize_url(story_url)
    all_urls = [normalized_medium_url, medium_url, normalized_story_url, story_url]

    # avoid conflicts with existing media names and urls that are missed
    # by the above query b/c of dups feeds or foreign_rss_links
    medium_name = get_unique_medium_name(db, [medium_name] + all_urls)
    medium_url = get_unique_medium_url(db, all_urls)

    # a race condition with another thread can cause this to fail sometimes, but after the medium in the
    # other process has been created, all should be fine
    for i in range(_GUESS_MEDIUM_RETRIES):
        medium_data = {'name': medium_name, 'url': medium_url, 'normalized_url': normalized_medium_url}
        medium = db.find_or_create('media', medium_data)

        if medium is not None:
            break
        else:
            time.sleep(1)

    if medium is None:
        raise McTopicMediaUniqueException(
            "Unable to find or create medium for %s / %s" % (medium_name, medium_url))

    log.info("add medium: %s / %s / %d" % (medium_name, medium_url, medium['media_id']))

    spidered_tag = get_spidered_tag(db)

    db.find_or_create('media_tags_map', {'media_id': medium['media_id'], 'tags_id': spidered_tag['tags_id']})

    return medium
开发者ID:berkmancenter,项目名称:mediacloud,代码行数:48,代码来源:media.py

示例4: assign_date_guess_tag

# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def assign_date_guess_tag(
        db: DatabaseHandler,
        story: dict,
        date_guess: GuessDateResult,
        fallback_date: typing.Optional[str]) -> None:
    """Assign a guess method tag to the story based on the date_guess result.

    If date_guess found a result, assign a date_guess_method:guess_by_url, guess_by_tag_*, or guess_by_uknown tag.
    Otherwise if there is a fallback_date, assign date_guess_metehod:fallback_date.  Else assign
    date_invalid:date_invalid.

    Arguments:
    db - db handle
    story - story dict from db
    date_guess - GuessDateResult from guess_date() call

    Returns:
    None

    """
    if date_guess.found:
        tag_set = mediawords.tm.guess_date.GUESS_METHOD_TAG_SET
        guess_method = date_guess.guess_method
        if guess_method.startswith('Extracted from url'):
            tag = 'guess_by_url'
        elif guess_method.startswith('Extracted from tag'):
            match = re2.search(r'\<(\w+)', guess_method)
            html_tag = match.group(1) if match is not None else 'unknown'
            tag = 'guess_by_tag_' + str(html_tag)
        else:
            tag = 'guess_by_unknown'
    elif fallback_date is not None:
        tag_set = mediawords.tm.guess_date.GUESS_METHOD_TAG_SET
        tag = 'fallback_date'
    else:
        tag_set = mediawords.tm.guess_date.INVALID_TAG_SET
        tag = mediawords.tm.guess_date.INVALID_TAG

    ts = db.find_or_create('tag_sets', {'name': tag_set})
    t = db.find_or_create('tags', {'tag': tag, 'tag_sets_id': ts['tag_sets_id']})

    db.query("delete from stories_tags_map where stories_id = %(a)s", {'a': story['stories_id']})
    db.query(
        "insert into stories_tags_map (stories_id, tags_id) values (%(a)s, %(b)s)",
        {'a': story['stories_id'], 'b': t['tags_id']})
开发者ID:berkmancenter,项目名称:mediacloud,代码行数:47,代码来源:stories.py

示例5: get_spidered_tag

# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def get_spidered_tag(db: DatabaseHandler) -> dict:
    """Return the spidered:spidered tag dict."""
    spidered_tag = db.query(
        """
        select t.*
            from tags t
                join tag_sets ts using ( tag_sets_id )
            where
                t.tag = %(a)s and
                ts.name = %(b)s
        """,
        {'a': SPIDERED_TAG_TAG, 'b': SPIDERED_TAG_SET}).hash()

    if spidered_tag is None:
        tag_set = db.find_or_create('tag_sets', {'name': SPIDERED_TAG_SET})
        spidered_tag = db.find_or_create('tags', {'tag': SPIDERED_TAG_TAG, 'tag_sets_id': tag_set['tag_sets_id']})

    return spidered_tag
开发者ID:berkmancenter,项目名称:mediacloud,代码行数:20,代码来源:media.py

示例6: get_spider_feed

# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import find_or_create [as 别名]
def get_spider_feed(db: DatabaseHandler, medium: dict) -> dict:
    """Find or create the 'Spider Feed' feed for the media source."""

    feed = db.query(
        "select * from feeds where media_id = %(a)s and name = %(b)s",
        {'a': medium['media_id'], 'b': SPIDER_FEED_NAME}).hash()

    if feed is not None:
        return feed

    return db.find_or_create('feeds', {
        'media_id': medium['media_id'],
        'url': medium['url'] + '#spiderfeed',
        'name': SPIDER_FEED_NAME,
        'active': False,
    })
开发者ID:berkmancenter,项目名称:mediacloud,代码行数:18,代码来源:stories.py


注:本文中的mediawords.db.DatabaseHandler.find_or_create方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。