本文整理汇总了Python中mediawords.db.DatabaseHandler.query方法的典型用法代码示例。如果您正苦于以下问题:Python DatabaseHandler.query方法的具体用法?Python DatabaseHandler.query怎么用?Python DatabaseHandler.query使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mediawords.db.DatabaseHandler
的用法示例。
在下文中一共展示了DatabaseHandler.query方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_links_from_story_text
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def get_links_from_story_text(db: DatabaseHandler, story: dict) -> typing.List[str]:
"""Get all urls that appear in the text or description of the story using a simple regex."""
download_ids = db.query("""
SELECT downloads_id
FROM downloads
WHERE stories_id = %(stories_id)s
""", {'stories_id': story['stories_id']}
).flat()
download_texts = db.query("""
SELECT *
FROM download_texts
WHERE downloads_id = ANY(%(download_ids)s)
ORDER BY download_texts_id
""", {'download_ids': download_ids}
).hashes()
story_text = ' '.join([dt['download_text'] for dt in download_texts])
story_text = story_text + ' ' + str(story['title']) if story['title'] is not None else story_text
story_text = story_text + ' ' + str(story['description']) if story['description'] is not None else story_text
links = []
for url in re.findall(r'https?://[^\s\")]+', story_text):
url = re.sub(r'\W+$', '', url)
links.append(url)
return links
示例2: create
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def create(db: DatabaseHandler, download: dict, extract: dict) -> dict:
"""Create a download_text hash and insert it into the database. Delete any existing download_text row for the
download."""
# FIXME don't pass freeform "extract" dict, we need just the "extracted_text"
download = decode_object_from_bytes_if_needed(download)
extract = decode_object_from_bytes_if_needed(extract)
db.query("""
DELETE FROM download_texts
WHERE downloads_id = %(downloads_id)s
""", {'downloads_id': download['downloads_id']})
download_text = db.query("""
INSERT INTO download_texts (downloads_id, download_text, download_text_length)
VALUES (%(downloads_id)s, %(download_text)s, CHAR_LENGTH(%(download_text)s))
RETURNING *
""", {
'downloads_id': download['downloads_id'],
'download_text': extract['extracted_text'],
}).hash()
db.query("""
UPDATE downloads
SET extracted = 't'
WHERE downloads_id = %(downloads_id)s
""", {'downloads_id': download['downloads_id']})
return download_text
示例3: add_story
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def add_story(db: DatabaseHandler, story: dict, feeds_id: int, skip_checking_if_new: bool = False) -> Optional[dict]:
"""If the story is new, add story to the database with the feed of the download as story feed.
Returns created story or None if story wasn't created.
"""
story = decode_object_from_bytes_if_needed(story)
if isinstance(feeds_id, bytes):
feeds_id = decode_object_from_bytes_if_needed(feeds_id)
feeds_id = int(feeds_id)
if isinstance(skip_checking_if_new, bytes):
skip_checking_if_new = decode_object_from_bytes_if_needed(skip_checking_if_new)
skip_checking_if_new = bool(int(skip_checking_if_new))
if db.in_transaction():
raise McAddStoryException("add_story() can't be run from within transaction.")
db.begin()
db.query("LOCK TABLE stories IN ROW EXCLUSIVE MODE")
if not skip_checking_if_new:
if not is_new(db=db, story=story):
log.debug("Story '{}' is not new.".format(story['url']))
db.commit()
return None
medium = db.find_by_id(table='media', object_id=story['media_id'])
if story.get('full_text_rss', None) is None:
story['full_text_rss'] = medium.get('full_text_rss', False) or False
if len(story.get('description', '')) == 0:
story['full_text_rss'] = False
try:
story = db.create(table='stories', insert_hash=story)
except Exception as ex:
db.rollback()
# FIXME get rid of this, replace with native upsert on "stories_guid" unique constraint
if 'unique constraint \"stories_guid' in str(ex):
log.warning(
"Failed to add story for '{}' to GUID conflict (guid = '{}')".format(story['url'], story['guid'])
)
return None
else:
raise McAddStoryException("Error adding story: {}\nStory: {}".format(str(ex), str(story)))
db.find_or_create(
table='feeds_stories_map',
insert_hash={
'stories_id': story['stories_id'],
'feeds_id': feeds_id,
}
)
db.commit()
return story
示例4: update_extractor_version_tag
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def update_extractor_version_tag(db: DatabaseHandler, story: dict) -> None:
"""Add extractor version tag to the story."""
# FIXME no caching because unit tests run in the same process so a cached tag set / tag will not be recreated.
# Purging such a cache manually is very error-prone.
story = decode_object_from_bytes_if_needed(story)
tag_set = db.find_or_create(table='tag_sets', insert_hash={'name': extractor_version_tag_sets_name()})
db.query("""
DELETE FROM stories_tags_map AS stm
USING tags AS t
JOIN tag_sets AS ts
ON ts.tag_sets_id = t.tag_sets_id
WHERE t.tags_id = stm.tags_id
AND ts.tag_sets_id = %(tag_sets_id)s
AND stm.stories_id = %(stories_id)s
""", {
'tag_sets_id': tag_set['tag_sets_id'],
'stories_id': story['stories_id'],
})
extractor_version = extractor_name()
tag = db.find_or_create(table='tags', insert_hash={'tag': extractor_version, 'tag_sets_id': tag_set['tag_sets_id']})
tags_id = tag['tags_id']
db.query("""
INSERT INTO stories_tags_map (stories_id, tags_id)
VALUES (%(stories_id)s, %(tags_id)s)
""", {'stories_id': story['stories_id'], 'tags_id': tags_id})
示例5: _fetch_tweets_for_day
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def _fetch_tweets_for_day(
db: DatabaseHandler,
twitter_class: typing.Type[AbstractTwitter],
topic: dict,
topic_tweet_day: dict,
max_tweets: typing.Optional[int] = None) -> None:
"""
Fetch tweets for a single day.
If tweets_fetched is false for the given topic_tweet_days row, fetch the tweets for the given day by querying
the list of tweets from CH and then fetching each tweet from twitter.
Arguments:
db - db handle
twitter_class - AbstractTwitter class
topic - topic dict
topic_tweet_day - topic_tweet_day dict
max_tweets - max tweets to fetch for a single day
Return:
None
"""
if topic_tweet_day['tweets_fetched']:
return
ch_posts_data = topic_tweet_day['ch_posts']
ch_posts = ch_posts_data['posts']
if (max_tweets is not None):
ch_posts = ch_posts[0:max_tweets]
log.info("adding %d tweets for topic %s, day %s" % (len(ch_posts), topic['topics_id'], topic_tweet_day['day']))
# we can only get 100 posts at a time from twitter
for i in range(0, len(ch_posts), 100):
_add_tweets_to_ch_posts(twitter_class, ch_posts[i:i + 100])
ch_posts = list(filter(lambda p: _post_matches_pattern(topic, p), ch_posts))
log.info("%d tweets remaining after match" % (len(ch_posts)))
db.begin()
log.debug("inserting into topic_tweets ...")
[_store_tweet_and_urls(db, topic_tweet_day, ch_post) for ch_post in ch_posts]
topic_tweet_day['num_ch_tweets'] = len(ch_posts)
db.query(
"update topic_tweet_days set tweets_fetched = true, num_ch_tweets = %(a)s where topic_tweet_days_id = %(b)s",
{'a': topic_tweet_day['num_ch_tweets'], 'b': topic_tweet_day['topic_tweet_days_id']})
db.commit()
log.debug("done inserting into topic_tweets")
示例6: lookup_medium
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def lookup_medium(db: DatabaseHandler, url: str, name: str) -> typing.Optional[dict]:
"""Lookup a media source by normalized url and then name.
Uses mediawords.util.url.normalize_url_lossy to normalize urls. Returns the parent media for duplicate media
sources and returns no media that are marked foreign_rss_links.
This function queries the media.normalized_url field to find the matching urls. Because the normalization
function is in python, we have to keep that denormalized_url field current from within python. This function
is responsible for keeping the table up to date by filling the field for any media for which it is null.
Arguments:
db - db handle
url - url to lookup
name - name to lookup
Returns:
a media source dict or None
"""
_update_media_normalized_urls(db)
nu = _normalize_url(url)
lookup_query = \
"""
select m.*
from media m
where
m.normalized_url = %(a)s and
foreign_rss_links = 'f'
order by dup_media_id asc nulls last, media_id asc
"""
medium = db.query(lookup_query, {'a': nu}).hash()
if medium is None:
medium = db.query(
"select m.* from media m where lower(m.name) = lower(%(a)s) and m.foreign_rss_links = false",
{'a': name}).hash()
if medium is None:
return None
if medium['dup_media_id'] is not None:
media_cycle_lookup = dict() # type: dict
while medium['dup_media_id'] is not None:
if medium['media_id'] in media_cycle_lookup:
raise McTopicMediaException('Cycle found in duplicate media path: ' + str(media_cycle_lookup.keys()))
media_cycle_lookup[medium['media_id']] = True
medium = db.query("select * from media where media_id = %(a)s", {'a': medium['dup_media_id']}).hash()
if medium['foreign_rss_links']:
raise McTopicMediaException('Parent duplicate media source %d has foreign_rss_links' % medium['media_id'])
return medium
示例7: _insert_tweet_urls
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def _insert_tweet_urls(db: DatabaseHandler, topic_tweet: dict, urls: typing.List) -> typing.List:
"""Insert list of urls into topic_tweet_urls."""
for url in urls:
db.query(
"""
insert into topic_tweet_urls( topic_tweets_id, url )
values( %(a)s, %(b)s )
on conflict do nothing
""",
{'a': topic_tweet['topic_tweets_id'], 'b': url})
示例8: change_password
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def change_password(db: DatabaseHandler,
email: str,
new_password: str,
new_password_repeat: str,
do_not_inform_via_email: bool = False) -> None:
"""Change user's password."""
email = decode_object_from_bytes_if_needed(email)
new_password = decode_object_from_bytes_if_needed(new_password)
new_password_repeat = decode_object_from_bytes_if_needed(new_password_repeat)
if isinstance(do_not_inform_via_email, bytes):
do_not_inform_via_email = decode_object_from_bytes_if_needed(do_not_inform_via_email)
do_not_inform_via_email = bool(int(do_not_inform_via_email))
# Check if user exists
try:
user = user_info(db=db, email=email)
except Exception:
raise McAuthChangePasswordException('User with email address "%s" does not exist.' % email)
password_validation_message = validate_new_password(email=email,
password=new_password,
password_repeat=new_password_repeat)
if password_validation_message:
raise McAuthChangePasswordException("Unable to change password: %s" % password_validation_message)
# Hash + validate the password
try:
password_new_hash = generate_secure_hash(password=new_password)
except Exception as ex:
raise McAuthChangePasswordException("Unable to hash a new password: %s" % str(ex))
if not password_new_hash:
raise McAuthChangePasswordException("Generated password hash is empty.")
# Set the password hash
db.query("""
UPDATE auth_users
SET password_hash = %(password_hash)s,
active = TRUE
WHERE email = %(email)s
""", {
'email': email,
'password_hash': password_new_hash,
})
if not do_not_inform_via_email:
message = AuthPasswordChangedMessage(to=email, full_name=user.full_name())
if not send_email(message):
raise McAuthChangePasswordException(
'The password has been changed, but I was unable to send an email notifying you about the change.'
)
示例9: __get_topic_url_variants
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def __get_topic_url_variants(db: DatabaseHandler, urls: List[str]) -> List[str]:
"""Get any alternative urls for the given url from topic_merged_stories or topic_links."""
urls = decode_object_from_bytes_if_needed(urls)
# MC_REWRITE_TO_PYTHON: change to tuple parameter because Perl database handler proxy can't handle tuples
stories_ids_sql = "SELECT stories_id "
stories_ids_sql += "FROM stories "
stories_ids_sql += "WHERE url = ANY(?)"
stories_ids = db.query(stories_ids_sql, urls).flat()
# MC_REWRITE_TO_PYTHON: Perl database handler proxy (the dreaded "wantarray" part) returns None on empty result
# sets, a scalar on a single item and arrayref on many items
if stories_ids is None:
stories_ids = []
elif isinstance(stories_ids, int):
stories_ids = [stories_ids]
stories_ids = [int(x) for x in stories_ids]
all_stories_ids = __get_merged_stories_ids(db=db, stories_ids=stories_ids)
if len(all_stories_ids) == 0:
return urls
all_urls = db.query("""
SELECT DISTINCT url
FROM (
SELECT redirect_url AS url
FROM topic_links
WHERE ref_stories_id = ANY(?)
UNION
SELECT url
FROM topic_links
WHERE ref_stories_id = ANY(?)
UNION
SELECT url
FROM stories
WHERE stories_id = ANY(?)
) AS q
WHERE q IS NOT NULL
""", all_stories_ids, all_stories_ids, all_stories_ids).flat()
# MC_REWRITE_TO_PYTHON: Perl database handler proxy (the dreaded "wantarray" part) returns None on empty result
# sets, a scalar on a single item and arrayref on many items
if all_urls is None:
all_urls = []
elif isinstance(all_urls, str):
all_urls = [all_urls]
return all_urls
示例10: __remove_object_from_cache
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def __remove_object_from_cache(self, db: DatabaseHandler, object_id: int) -> None:
"""Attempt to remove object from cache.
Raise if removal fails because after removal we'd expect the object to be gone for good."""
object_id = self._prepare_object_id(object_id)
sql = "DELETE FROM %s " % self.__cache_table # interpolated by Python
sql += "WHERE object_id = %(object_id)s" # interpolated by psycopg2
db.query(sql, {'object_id': object_id})
示例11: create_password_reset_token
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def create_password_reset_token(db: DatabaseHandler, email: str) -> Optional[str]:
"""Generate password reset token used for both activating newly registered users and resetting passwords.
Returns non-hashed password reset token or None if user was not found.
"""
email = decode_object_from_bytes_if_needed(email)
if not email:
raise McAuthProfileException('Email address is empty.')
# Check if the email address exists in the user table; if not, pretend that we sent the activation link with a
# "success" message. That way the adversary would not be able to find out which email addresses are active users.
#
# (Possible improvement: make the script work for the exact same amount of time in both cases to avoid timing
# attacks)
user_exists = db.query("""
SELECT auth_users_id,
email
FROM auth_users
WHERE email = %(email)s
LIMIT 1
""", {'email': email}).hash()
if user_exists is None or len(user_exists) == 0:
# User was not found, so set the email address to an empty string, but don't return just now and continue with a
# rather slowish process of generating a activation token (in order to reduce the risk of timing attacks)
email = ''
# Generate the activation token
password_reset_token = random_string(length=64)
if len(password_reset_token) == 0:
raise McAuthProfileException('Unable to generate an activation token.')
# Hash + validate the activation token
password_reset_token_hash = generate_secure_hash(password=password_reset_token)
if not password_reset_token_hash:
raise McAuthProfileException("Unable to hash an activation token.")
# Set the activation token hash in the database (if the email address doesn't exist, this query will do nothing)
db.query("""
UPDATE auth_users
SET password_reset_token_hash = %(password_reset_token_hash)s
WHERE email = %(email)s
AND email != ''
""", {
'email': email,
'password_reset_token_hash': password_reset_token_hash,
})
return password_reset_token
示例12: get_consistent_color
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def get_consistent_color(db: DatabaseHandler, item_set: str, item_id: str) -> str:
"""Return the same hex color (e.g. "ff0000" for the same set / ID combination every time this function is called."""
item_set = decode_object_from_bytes_if_needed(item_set)
item_id = decode_object_from_bytes_if_needed(item_id)
# Always return grey for null or not typed values
if item_id.lower() in {'null', 'not typed'}:
return '999999'
color = db.query("""SELECT color FROM color_sets WHERE color_set = %(item_set)s AND id = %(item_id)s""", {
'item_set': item_set,
'item_id': item_id,
}).flat()
if color is not None and len(color):
if isinstance(color, list):
color = color[0]
return color
set_colors = db.query("""SELECT color FROM color_sets WHERE color_set = %(item_set)s""", {
'item_set': item_set,
}).flat()
if set_colors is not None:
if not isinstance(set_colors, list):
set_colors = [set_colors]
existing_colors = set()
if set_colors is not None:
for color in set_colors:
existing_colors.add(color)
# Use the hard coded palette of 25 colors if possible
new_color = None
for color in __MC_COLORS:
if color not in existing_colors:
new_color = color
break
# Otherwise, just generate a random color
if new_color is None:
colors = analogous_color(color='0000ff', return_slices=256, split_slices=255)
new_color = random.choice(colors)
db.create(table='color_sets', insert_hash={
'color_set': item_set,
'id': item_id,
'color': new_color,
})
return new_color
示例13: regenerate_api_key
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def regenerate_api_key(db: DatabaseHandler, email: str) -> None:
"""Regenerate API key -- creates new non-IP limited API key, removes all IP-limited API keys."""
email = decode_object_from_bytes_if_needed(email)
if not email:
raise McAuthProfileException('Email address is empty.')
# Check if user exists
try:
user = user_info(db=db, email=email)
except Exception:
raise McAuthProfileException("User with email address '%s' does not exist." % email)
db.begin()
# Purge all IP-limited API keys
db.query("""
DELETE FROM auth_user_api_keys
WHERE ip_address IS NOT NULL
AND auth_users_id = (
SELECT auth_users_id
FROM auth_users
WHERE email = %(email)s
)
""", {'email': email})
# Regenerate non-IP limited API key
db.query("""
UPDATE auth_user_api_keys
-- DEFAULT points to a generation function
SET api_key = DEFAULT
WHERE ip_address IS NULL
AND auth_users_id = (
SELECT auth_users_id
FROM auth_users
WHERE email = %(email)s
)
""", {'email': email})
message = AuthAPIKeyResetMessage(to=email, full_name=user.full_name())
if not send_email(message):
db.rollback()
raise McAuthProfileException("Unable to send email about reset API key.")
db.commit()
示例14: process_download_for_extractor
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def process_download_for_extractor(db: DatabaseHandler,
download: dict,
extractor_args: PyExtractorArguments = PyExtractorArguments()) -> None:
"""Extract the download and create the resulting download_text entry. If there are no remaining downloads to be
extracted for the story, call process_extracted_story() on the parent story."""
download = decode_object_from_bytes_if_needed(download)
stories_id = download['stories_id']
log.debug("extract: {} {} {}".format(download['downloads_id'], stories_id, download['url']))
extract_and_create_download_text(db=db, download=download, extractor_args=extractor_args)
has_remaining_download = db.query("""
SELECT downloads_id
FROM downloads
WHERE stories_id = %(stories_id)s
AND extracted = 'f'
AND type = 'content'
""", {'stories_id': stories_id}).hash()
# MC_REWRITE_TO_PYTHON: Perlism
if has_remaining_download is None:
has_remaining_download = {}
if len(has_remaining_download) > 0:
log.info("Pending more downloads...")
else:
story = db.find_by_id(table='stories', object_id=stories_id)
process_extracted_story(db=db, story=story, extractor_args=extractor_args)
示例15: password_reset_token_is_valid
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import query [as 别名]
def password_reset_token_is_valid(db: DatabaseHandler, email: str, password_reset_token: str) -> bool:
"""Validate password reset token (used for both user activation and password reset)."""
email = decode_object_from_bytes_if_needed(email)
password_reset_token = decode_object_from_bytes_if_needed(password_reset_token)
if not (email and password_reset_token):
log.error("Email and / or password reset token is empty.")
return False
# Fetch readonly information about the user
password_reset_token_hash = db.query("""
SELECT auth_users_id,
email,
password_reset_token_hash
FROM auth_users
WHERE email = %(email)s
LIMIT 1
""", {'email': email}).hash()
if password_reset_token_hash is None or 'auth_users_id' not in password_reset_token_hash:
log.error("Unable to find user %s in the database." % email)
return False
password_reset_token_hash = password_reset_token_hash['password_reset_token_hash']
if password_hash_is_valid(password_hash=password_reset_token_hash, password=password_reset_token):
return True
else:
return False